2017-05-26 04:48:44 +08:00
/*
* DataDistribution . actor . cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013 - 2018 Apple Inc . and the FoundationDB project authors
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* http : //www.apache.org/licenses/LICENSE-2.0
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
# include <set>
2018-12-15 03:46:47 +08:00
# include <sstream>
2020-05-07 01:35:56 +08:00
# include "fdbclient/FDBOptions.g.h"
# include "fdbclient/FDBTypes.h"
# include "fdbclient/Knobs.h"
# include "fdbclient/StorageServerInterface.h"
2019-07-16 01:45:51 +08:00
# include "fdbclient/SystemData.h"
# include "fdbclient/DatabaseContext.h"
2019-02-18 09:38:13 +08:00
# include "fdbclient/ManagementAPI.actor.h"
2020-02-04 10:14:26 +08:00
# include "fdbclient/RunTransaction.actor.h"
2017-05-26 04:48:44 +08:00
# include "fdbrpc/Replication.h"
2019-07-16 01:45:51 +08:00
# include "fdbserver/DataDistribution.actor.h"
2019-06-20 02:12:24 +08:00
# include "fdbserver/FDBExecHelper.actor.h"
2018-10-20 01:30:13 +08:00
# include "fdbserver/IKeyValueStore.h"
2019-07-16 01:45:51 +08:00
# include "fdbserver/Knobs.h"
# include "fdbserver/MoveKeys.actor.h"
2019-06-20 02:12:24 +08:00
# include "fdbserver/QuietDatabase.h"
2019-07-16 01:45:51 +08:00
# include "fdbserver/ServerDBInfo.h"
# include "fdbserver/TLogInterface.h"
# include "fdbserver/WaitFailure.h"
# include "flow/ActorCollection.h"
2020-05-07 01:35:56 +08:00
# include "flow/Arena.h"
2021-08-13 05:05:04 +08:00
# include "flow/BooleanParam.h"
2018-12-06 06:36:45 +08:00
# include "flow/Trace.h"
2019-07-16 01:45:51 +08:00
# include "flow/UnitTest.h"
2021-03-11 02:06:03 +08:00
# include "flow/actorcompiler.h" // This must be the last #include.
2020-05-07 01:35:56 +08:00
# include "flow/serialize.h"
2017-05-26 04:48:44 +08:00
class TCTeamInfo ;
2018-08-30 05:40:39 +08:00
struct TCMachineInfo ;
class TCMachineTeamInfo ;
2017-05-26 04:48:44 +08:00
2021-08-13 05:05:04 +08:00
FDB_BOOLEAN_PARAM ( IsPrimary ) ;
2019-09-18 04:03:57 +08:00
ACTOR Future < Void > checkAndRemoveInvalidLocalityAddr ( DDTeamCollection * self ) ;
2019-08-17 06:04:11 +08:00
ACTOR Future < Void > removeWrongStoreType ( DDTeamCollection * self ) ;
2021-05-21 07:31:08 +08:00
ACTOR Future < Void > waitForAllDataRemoved ( Database cx , UID serverID , Version addedVersion , DDTeamCollection * teams ) ;
2021-06-12 06:58:05 +08:00
bool _exclusionSafetyCheck ( vector < UID > & excludeServerIDs , DDTeamCollection * teamCollection ) ;
2019-08-17 06:04:11 +08:00
2017-05-26 04:48:44 +08:00
struct TCServerInfo : public ReferenceCounted < TCServerInfo > {
UID id ;
2021-05-21 07:31:08 +08:00
Version addedVersion ; // Read version when this Server is added
2020-04-07 14:37:11 +08:00
DDTeamCollection * collection ;
2017-05-26 04:48:44 +08:00
StorageServerInterface lastKnownInterface ;
ProcessClass lastKnownClass ;
vector < Reference < TCTeamInfo > > teams ;
2018-08-30 05:40:39 +08:00
Reference < TCMachineInfo > machine ;
2017-05-26 04:48:44 +08:00
Future < Void > tracker ;
int64_t dataInFlightToServer ;
2019-07-26 07:27:32 +08:00
ErrorOr < GetStorageMetricsReply > serverMetrics ;
2017-05-26 04:48:44 +08:00
Promise < std : : pair < StorageServerInterface , ProcessClass > > interfaceChanged ;
Future < std : : pair < StorageServerInterface , ProcessClass > > onInterfaceChanged ;
Promise < Void > removed ;
Future < Void > onRemoved ;
2021-03-06 03:28:15 +08:00
Future < Void > onTSSPairRemoved ;
2021-05-13 02:53:20 +08:00
Promise < Void > killTss ;
2017-05-26 04:48:44 +08:00
Promise < Void > wakeUpTracker ;
2018-09-01 08:54:55 +08:00
bool inDesiredDC ;
LocalityEntry localityEntry ;
2018-11-13 09:39:40 +08:00
Promise < Void > updated ;
2019-08-14 06:44:46 +08:00
AsyncVar < bool > wrongStoreTypeToRemove ;
2020-02-04 10:14:26 +08:00
AsyncVar < bool > ssVersionTooFarBehind ;
2019-08-17 07:46:54 +08:00
// A storage server's StoreType does not change.
2019-08-14 06:44:46 +08:00
// To change storeType for an ip:port, we destroy the old one and create a new one.
2019-08-13 01:08:12 +08:00
KeyValueStoreType storeType ; // Storage engine type
2021-03-11 02:06:03 +08:00
TCServerInfo ( StorageServerInterface ssi ,
DDTeamCollection * collection ,
ProcessClass processClass ,
bool inDesiredDC ,
2021-05-21 07:31:08 +08:00
Reference < LocalitySet > storageServerSet ,
Version addedVersion = 0 )
2021-07-25 08:29:27 +08:00
: id ( ssi . id ( ) ) , addedVersion ( addedVersion ) , collection ( collection ) , lastKnownInterface ( ssi ) ,
lastKnownClass ( processClass ) , dataInFlightToServer ( 0 ) , onInterfaceChanged ( interfaceChanged . getFuture ( ) ) ,
onRemoved ( removed . getFuture ( ) ) , onTSSPairRemoved ( Never ( ) ) , inDesiredDC ( inDesiredDC ) ,
storeType ( KeyValueStoreType : : END ) {
2021-06-02 01:51:42 +08:00
2021-05-13 02:53:20 +08:00
if ( ! ssi . isTss ( ) ) {
2021-03-06 03:28:15 +08:00
localityEntry = ( ( LocalityMap < UID > * ) storageServerSet . getPtr ( ) ) - > add ( ssi . locality , & id ) ;
}
2018-09-01 08:54:55 +08:00
}
2019-08-13 01:08:12 +08:00
bool isCorrectStoreType ( KeyValueStoreType configStoreType ) {
2019-08-14 06:44:46 +08:00
// A new storage server's store type may not be set immediately.
// If a storage server does not reply its storeType, it will be tracked by failure monitor and removed.
2019-08-13 01:08:12 +08:00
return ( storeType = = configStoreType | | storeType = = KeyValueStoreType : : END ) ;
}
2020-04-07 14:37:11 +08:00
~ TCServerInfo ( ) ;
2017-05-26 04:48:44 +08:00
} ;
2018-10-16 07:25:17 +08:00
struct TCMachineInfo : public ReferenceCounted < TCMachineInfo > {
2018-11-22 03:18:26 +08:00
std : : vector < Reference < TCServerInfo > > serversOnMachine ; // SOMEDAY: change from vector to set
2018-08-30 05:40:39 +08:00
Standalone < StringRef > machineID ;
2018-11-22 03:18:26 +08:00
std : : vector < Reference < TCMachineTeamInfo > > machineTeams ; // SOMEDAY: split good and bad machine teams.
2018-08-30 05:40:39 +08:00
LocalityEntry localityEntry ;
2018-11-22 03:18:26 +08:00
explicit TCMachineInfo ( Reference < TCServerInfo > server , const LocalityEntry & entry ) : localityEntry ( entry ) {
ASSERT ( serversOnMachine . empty ( ) ) ;
serversOnMachine . push_back ( server ) ;
2019-09-14 04:55:46 +08:00
2019-09-14 04:56:43 +08:00
LocalityData & locality = server - > lastKnownInterface . locality ;
2019-09-14 06:34:41 +08:00
ASSERT ( locality . zoneId ( ) . present ( ) ) ;
machineID = locality . zoneId ( ) . get ( ) ;
2018-08-30 05:40:39 +08:00
}
2020-12-27 07:37:54 +08:00
std : : string getServersIDStr ( ) const {
2018-12-15 03:46:47 +08:00
std : : stringstream ss ;
2021-03-11 02:06:03 +08:00
if ( serversOnMachine . empty ( ) )
return " [unset] " ;
2018-08-30 05:40:39 +08:00
2020-12-27 07:37:54 +08:00
for ( const auto & server : serversOnMachine ) {
2018-12-15 03:46:47 +08:00
ss < < server - > id . toString ( ) < < " " ;
2018-08-30 05:40:39 +08:00
}
2020-12-27 07:37:54 +08:00
return std : : move ( ss ) . str ( ) ;
2018-08-30 05:40:39 +08:00
}
} ;
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > updateServerMetrics ( Reference < TCServerInfo > server ) ;
2017-05-26 04:48:44 +08:00
2019-07-20 07:22:15 +08:00
// TeamCollection's machine team information
2018-08-30 05:40:39 +08:00
class TCMachineTeamInfo : public ReferenceCounted < TCMachineTeamInfo > {
public :
2018-11-22 03:18:26 +08:00
vector < Reference < TCMachineInfo > > machines ;
vector < Standalone < StringRef > > machineIDs ;
2019-02-13 02:59:54 +08:00
vector < Reference < TCTeamInfo > > serverTeams ;
2020-10-22 02:10:14 +08:00
UID id ;
2018-08-30 05:40:39 +08:00
2020-10-22 02:10:14 +08:00
explicit TCMachineTeamInfo ( vector < Reference < TCMachineInfo > > const & machines )
: machines ( machines ) , id ( deterministicRandom ( ) - > randomUniqueID ( ) ) {
2018-08-30 05:40:39 +08:00
machineIDs . reserve ( machines . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( int i = 0 ; i < machines . size ( ) ; i + + ) {
2018-08-30 05:40:39 +08:00
machineIDs . push_back ( machines [ i ] - > machineID ) ;
}
sort ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
}
2020-12-27 07:23:01 +08:00
int size ( ) const {
2018-11-22 03:18:26 +08:00
ASSERT ( machines . size ( ) = = machineIDs . size ( ) ) ;
2018-08-30 05:40:39 +08:00
return machineIDs . size ( ) ;
}
2020-12-27 07:37:54 +08:00
std : : string getMachineIDsStr ( ) const {
2018-12-15 03:46:47 +08:00
std : : stringstream ss ;
2018-08-30 05:40:39 +08:00
2021-03-11 02:06:03 +08:00
if ( machineIDs . empty ( ) )
return " [unset] " ;
2018-08-30 05:40:39 +08:00
2020-12-27 07:37:54 +08:00
for ( const auto & id : machineIDs ) {
2018-12-15 03:46:47 +08:00
ss < < id . contents ( ) . toString ( ) < < " " ;
2018-08-30 05:40:39 +08:00
}
2020-12-27 07:37:54 +08:00
return std : : move ( ss ) . str ( ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
bool operator = = ( TCMachineTeamInfo & rhs ) const { return this - > machineIDs = = rhs . machineIDs ; }
2018-08-30 05:40:39 +08:00
} ;
2019-07-20 07:22:15 +08:00
// TeamCollection's server team info.
2020-10-08 12:58:24 +08:00
class TCTeamInfo final : public ReferenceCounted < TCTeamInfo > , public IDataDistributionTeam {
2021-03-11 02:06:03 +08:00
vector < Reference < TCServerInfo > > servers ;
2017-05-26 04:48:44 +08:00
vector < UID > serverIDs ;
bool healthy ;
2021-03-11 02:06:03 +08:00
bool wrongConfiguration ; // True if any of the servers in the team have the wrong configuration
2017-05-26 04:48:44 +08:00
int priority ;
2020-10-22 02:10:14 +08:00
UID id ;
2017-05-26 04:48:44 +08:00
2020-07-21 15:18:54 +08:00
public :
Reference < TCMachineTeamInfo > machineTeam ;
Future < Void > tracker ;
2018-11-22 03:18:26 +08:00
explicit TCTeamInfo ( vector < Reference < TCServerInfo > > const & servers )
2021-07-25 08:29:27 +08:00
: servers ( servers ) , healthy ( true ) , wrongConfiguration ( false ) , priority ( SERVER_KNOBS - > PRIORITY_TEAM_HEALTHY ) ,
2020-10-22 02:10:14 +08:00
id ( deterministicRandom ( ) - > randomUniqueID ( ) ) {
2018-11-22 03:18:26 +08:00
if ( servers . empty ( ) ) {
2021-07-27 10:55:10 +08:00
TraceEvent ( SevInfo , " ConstructTCTeamFromEmptyServers " ) . log ( ) ;
2018-08-30 05:40:39 +08:00
}
2017-05-26 04:48:44 +08:00
serverIDs . reserve ( servers . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( int i = 0 ; i < servers . size ( ) ; i + + ) {
2017-05-26 04:48:44 +08:00
serverIDs . push_back ( servers [ i ] - > id ) ;
2018-08-30 05:40:39 +08:00
}
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
2020-11-17 06:46:36 +08:00
std : : string getTeamID ( ) const override { return id . shortString ( ) ; }
2020-10-22 02:10:14 +08:00
2020-07-21 15:08:01 +08:00
vector < StorageServerInterface > getLastKnownServerInterfaces ( ) const override {
2020-10-10 01:51:06 +08:00
vector < StorageServerInterface > v ;
v . reserve ( servers . size ( ) ) ;
for ( const auto & server : servers ) {
v . push_back ( server - > lastKnownInterface ) ;
}
2017-05-26 04:48:44 +08:00
return v ;
}
2020-07-21 15:08:01 +08:00
int size ( ) const override {
2019-02-11 00:58:56 +08:00
ASSERT ( servers . size ( ) = = serverIDs . size ( ) ) ;
return servers . size ( ) ;
}
2020-07-21 15:08:01 +08:00
vector < UID > const & getServerIDs ( ) const override { return serverIDs ; }
2020-12-27 07:37:54 +08:00
const vector < Reference < TCServerInfo > > & getServers ( ) const { return servers ; }
2018-08-30 05:40:39 +08:00
2020-07-22 00:42:33 +08:00
std : : string getServerIDsStr ( ) const {
2018-12-15 03:46:47 +08:00
std : : stringstream ss ;
2021-03-11 02:06:03 +08:00
if ( serverIDs . empty ( ) )
return " [unset] " ;
2018-08-30 05:40:39 +08:00
2020-12-27 07:37:54 +08:00
for ( const auto & id : serverIDs ) {
2018-12-15 03:46:47 +08:00
ss < < id . toString ( ) < < " " ;
2018-08-30 05:40:39 +08:00
}
2020-12-27 07:37:54 +08:00
return std : : move ( ss ) . str ( ) ;
2018-08-30 05:40:39 +08:00
}
2020-07-21 15:08:01 +08:00
void addDataInFlightToTeam ( int64_t delta ) override {
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < servers . size ( ) ; i + + )
2017-05-26 04:48:44 +08:00
servers [ i ] - > dataInFlightToServer + = delta ;
}
2020-07-21 15:08:01 +08:00
int64_t getDataInFlightToTeam ( ) const override {
2017-05-26 04:48:44 +08:00
int64_t dataInFlight = 0.0 ;
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < servers . size ( ) ; i + + )
2017-05-26 04:48:44 +08:00
dataInFlight + = servers [ i ] - > dataInFlightToServer ;
return dataInFlight ;
}
2020-07-21 15:08:01 +08:00
int64_t getLoadBytes ( bool includeInFlight = true , double inflightPenalty = 1.0 ) const override {
2017-05-26 04:48:44 +08:00
int64_t physicalBytes = getLoadAverage ( ) ;
2020-02-21 03:21:03 +08:00
double minAvailableSpaceRatio = getMinAvailableSpaceRatio ( includeInFlight ) ;
2017-05-26 04:48:44 +08:00
int64_t inFlightBytes = includeInFlight ? getDataInFlightToTeam ( ) / servers . size ( ) : 0 ;
2021-03-11 02:06:03 +08:00
double availableSpaceMultiplier =
SERVER_KNOBS - > AVAILABLE_SPACE_RATIO_CUTOFF /
( std : : max ( std : : min ( SERVER_KNOBS - > AVAILABLE_SPACE_RATIO_CUTOFF , minAvailableSpaceRatio ) , 0.000001 ) ) ;
if ( servers . size ( ) > 2 ) {
// make sure in triple replication the penalty is high enough that you will always avoid a team with a
// member at 20% free space
2020-02-21 08:47:50 +08:00
availableSpaceMultiplier = availableSpaceMultiplier * availableSpaceMultiplier ;
}
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
if ( minAvailableSpaceRatio < SERVER_KNOBS - > TARGET_AVAILABLE_SPACE_RATIO ) {
TraceEvent ( SevWarn , " DiskNearCapacity " )
. suppressFor ( 1.0 )
. detail ( " AvailableSpaceRatio " , minAvailableSpaceRatio ) ;
2020-02-21 08:47:50 +08:00
}
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
return ( physicalBytes + ( inflightPenalty * inFlightBytes ) ) * availableSpaceMultiplier ;
2017-05-26 04:48:44 +08:00
}
2020-07-21 15:08:01 +08:00
int64_t getMinAvailableSpace ( bool includeInFlight = true ) const override {
2020-02-21 03:21:03 +08:00
int64_t minAvailableSpace = std : : numeric_limits < int64_t > : : max ( ) ;
2020-07-21 15:08:01 +08:00
for ( const auto & server : servers ) {
if ( server - > serverMetrics . present ( ) ) {
auto & replyValue = server - > serverMetrics . get ( ) ;
2017-05-26 04:48:44 +08:00
2020-02-21 03:21:03 +08:00
ASSERT ( replyValue . available . bytes > = 0 ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( replyValue . capacity . bytes > = 0 ) ;
2020-02-21 03:21:03 +08:00
int64_t bytesAvailable = replyValue . available . bytes ;
2021-03-11 02:06:03 +08:00
if ( includeInFlight ) {
2020-07-21 15:08:01 +08:00
bytesAvailable - = server - > dataInFlightToServer ;
2017-05-26 04:48:44 +08:00
}
2020-02-21 03:21:03 +08:00
minAvailableSpace = std : : min ( bytesAvailable , minAvailableSpace ) ;
2017-05-26 04:48:44 +08:00
}
}
2020-02-21 03:21:03 +08:00
return minAvailableSpace ; // Could be negative
2017-05-26 04:48:44 +08:00
}
2020-07-21 15:08:01 +08:00
double getMinAvailableSpaceRatio ( bool includeInFlight = true ) const override {
2017-05-26 04:48:44 +08:00
double minRatio = 1.0 ;
2020-07-21 15:08:01 +08:00
for ( const auto & server : servers ) {
if ( server - > serverMetrics . present ( ) ) {
auto & replyValue = server - > serverMetrics . get ( ) ;
2017-05-26 04:48:44 +08:00
2020-02-21 03:21:03 +08:00
ASSERT ( replyValue . available . bytes > = 0 ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( replyValue . capacity . bytes > = 0 ) ;
2020-02-21 03:21:03 +08:00
int64_t bytesAvailable = replyValue . available . bytes ;
2021-03-11 02:06:03 +08:00
if ( includeInFlight ) {
2020-07-21 15:08:01 +08:00
bytesAvailable = std : : max ( ( int64_t ) 0 , bytesAvailable - server - > dataInFlightToServer ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
if ( replyValue . capacity . bytes = = 0 )
2017-05-26 04:48:44 +08:00
minRatio = 0 ;
else
2021-03-11 02:06:03 +08:00
minRatio = std : : min ( minRatio , ( ( double ) bytesAvailable ) / replyValue . capacity . bytes ) ;
2017-05-26 04:48:44 +08:00
}
}
return minRatio ;
}
2020-07-21 15:08:01 +08:00
bool hasHealthyAvailableSpace ( double minRatio ) const override {
2020-02-22 07:39:17 +08:00
return getMinAvailableSpaceRatio ( ) > = minRatio & & getMinAvailableSpace ( ) > SERVER_KNOBS - > MIN_AVAILABLE_SPACE ;
2017-05-26 04:48:44 +08:00
}
2020-07-21 15:08:01 +08:00
Future < Void > updateStorageMetrics ( ) override { return doUpdateStorageMetrics ( this ) ; }
2017-05-26 04:48:44 +08:00
2020-07-21 15:08:01 +08:00
bool isOptimal ( ) const override {
for ( const auto & server : servers ) {
if ( server - > lastKnownClass . machineClassFitness ( ProcessClass : : Storage ) > ProcessClass : : UnsetFit ) {
2017-05-26 04:48:44 +08:00
return false ;
}
}
return true ;
}
2020-07-21 15:08:01 +08:00
bool isWrongConfiguration ( ) const override { return wrongConfiguration ; }
void setWrongConfiguration ( bool wrongConfiguration ) override { this - > wrongConfiguration = wrongConfiguration ; }
bool isHealthy ( ) const override { return healthy ; }
void setHealthy ( bool h ) override { healthy = h ; }
int getPriority ( ) const override { return priority ; }
void setPriority ( int p ) override { priority = p ; }
2020-10-08 09:41:19 +08:00
void addref ( ) override { ReferenceCounted < TCTeamInfo > : : addref ( ) ; }
void delref ( ) override { ReferenceCounted < TCTeamInfo > : : delref ( ) ; }
2017-05-26 04:48:44 +08:00
2020-10-08 09:41:19 +08:00
void addServers ( const vector < UID > & servers ) override {
2017-10-11 01:36:33 +08:00
serverIDs . reserve ( servers . size ( ) ) ;
for ( int i = 0 ; i < servers . size ( ) ; i + + ) {
serverIDs . push_back ( servers [ i ] ) ;
}
}
2017-05-26 04:48:44 +08:00
private :
2021-03-11 02:06:03 +08:00
// Calculate an "average" of the metrics replies that we received. Penalize teams from which we did not receive all
// replies.
2020-07-21 15:08:01 +08:00
int64_t getLoadAverage ( ) const {
2017-05-26 04:48:44 +08:00
int64_t bytesSum = 0 ;
int added = 0 ;
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < servers . size ( ) ; i + + )
if ( servers [ i ] - > serverMetrics . present ( ) ) {
2017-05-26 04:48:44 +08:00
added + + ;
bytesSum + = servers [ i ] - > serverMetrics . get ( ) . load . bytes ;
}
2021-03-11 02:06:03 +08:00
if ( added < servers . size ( ) )
2017-05-26 04:48:44 +08:00
bytesSum * = 2 ;
return added = = 0 ? 0 : bytesSum / added ;
}
// Calculate the max of the metrics replies that we received.
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > doUpdateStorageMetrics ( TCTeamInfo * self ) {
2017-05-26 04:48:44 +08:00
std : : vector < Future < Void > > updates ;
2021-03-04 11:36:21 +08:00
updates . reserve ( self - > servers . size ( ) ) ;
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < self - > servers . size ( ) ; i + + )
updates . push_back ( updateServerMetrics ( self - > servers [ i ] ) ) ;
wait ( waitForAll ( updates ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
} ;
struct ServerStatus {
2021-05-21 07:31:08 +08:00
bool isWiggling ;
2017-05-26 04:48:44 +08:00
bool isFailed ;
bool isUndesired ;
bool isWrongConfiguration ;
2021-03-11 02:06:03 +08:00
bool initialized ; // AsyncMap erases default constructed objects
2017-05-26 04:48:44 +08:00
LocalityData locality ;
2021-05-21 07:31:08 +08:00
ServerStatus ( )
: isWiggling ( false ) , isFailed ( true ) , isUndesired ( false ) , isWrongConfiguration ( false ) , initialized ( false ) { }
2021-06-12 06:58:05 +08:00
ServerStatus ( bool isFailed , bool isUndesired , bool isWiggling , LocalityData const & locality )
2021-07-25 08:29:27 +08:00
: isWiggling ( isWiggling ) , isFailed ( isFailed ) , isUndesired ( isUndesired ) , isWrongConfiguration ( false ) ,
initialized ( true ) , locality ( locality ) { }
2017-05-26 04:48:44 +08:00
bool isUnhealthy ( ) const { return isFailed | | isUndesired ; }
2021-06-12 06:58:05 +08:00
const char * toString ( ) const {
return isFailed ? " Failed " : isUndesired ? " Undesired " : isWiggling ? " Wiggling " : " Healthy " ;
}
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
bool operator = = ( ServerStatus const & r ) const {
2021-06-12 06:58:05 +08:00
return isFailed = = r . isFailed & & isUndesired = = r . isUndesired & & isWiggling = = r . isWiggling & &
2021-03-11 02:06:03 +08:00
isWrongConfiguration = = r . isWrongConfiguration & & locality = = r . locality & & initialized = = r . initialized ;
}
2020-07-11 05:37:47 +08:00
bool operator ! = ( ServerStatus const & r ) const { return ! ( * this = = r ) ; }
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
// If a process has reappeared without the storage server that was on it (isFailed == true), we don't need to
// exclude it We also don't need to exclude processes who are in the wrong configuration (since those servers will
// be removed)
2017-05-26 04:48:44 +08:00
bool excludeOnRecruit ( ) { return ! isFailed & & ! isWrongConfiguration ; }
} ;
typedef AsyncMap < UID , ServerStatus > ServerStatusMap ;
// Read keyservers, return unique set of teams
2021-03-11 02:06:03 +08:00
ACTOR Future < Reference < InitialDataDistribution > > getInitialDataDistribution ( Database cx ,
UID distributorId ,
2020-09-28 06:26:50 +08:00
MoveKeysLock moveKeysLock ,
std : : vector < Optional < Key > > remoteDcIds ,
const DDEnabledState * ddEnabledState ) {
2020-11-07 15:50:55 +08:00
state Reference < InitialDataDistribution > result = makeReference < InitialDataDistribution > ( ) ;
2017-05-26 04:48:44 +08:00
state Key beginKey = allKeys . begin ;
state bool succeeded ;
2021-03-11 02:06:03 +08:00
state Transaction tr ( cx ) ;
2017-05-26 04:48:44 +08:00
2018-03-09 02:50:05 +08:00
state std : : map < UID , Optional < Key > > server_dc ;
state std : : map < vector < UID > , std : : pair < vector < UID > , vector < UID > > > team_cache ;
2021-03-06 03:28:15 +08:00
state std : : vector < std : : pair < StorageServerInterface , ProcessClass > > tss_servers ;
2018-03-09 02:50:05 +08:00
2021-03-11 02:06:03 +08:00
// Get the server list in its own try/catch block since it modifies result. We don't want a subsequent failure
// causing entries to be duplicated
2017-05-26 04:48:44 +08:00
loop {
2018-03-09 02:50:05 +08:00
server_dc . clear ( ) ;
2017-05-26 04:48:44 +08:00
succeeded = false ;
try {
2019-07-10 07:09:51 +08:00
2019-07-17 06:20:58 +08:00
// Read healthyZone value which is later used to determine on/off of failure triggered DD
2019-07-10 07:09:51 +08:00
tr . setOption ( FDBTransactionOptions : : READ_SYSTEM_KEYS ) ;
2019-07-17 06:20:58 +08:00
tr . setOption ( FDBTransactionOptions : : READ_LOCK_AWARE ) ;
2019-07-10 07:09:51 +08:00
Optional < Value > val = wait ( tr . get ( healthyZoneKey ) ) ;
if ( val . present ( ) ) {
auto p = decodeHealthyZoneValue ( val . get ( ) ) ;
2019-07-19 04:18:36 +08:00
if ( p . second > tr . getReadVersion ( ) . get ( ) | | p . first = = ignoreSSFailuresZoneString ) {
2019-07-10 07:09:51 +08:00
result - > initHealthyZoneValue = Optional < Key > ( p . first ) ;
} else {
result - > initHealthyZoneValue = Optional < Key > ( ) ;
}
} else {
result - > initHealthyZoneValue = Optional < Key > ( ) ;
}
2017-05-26 04:48:44 +08:00
result - > mode = 1 ;
tr . setOption ( FDBTransactionOptions : : PRIORITY_SYSTEM_IMMEDIATE ) ;
2021-03-11 02:06:03 +08:00
Optional < Value > mode = wait ( tr . get ( dataDistributionModeKey ) ) ;
2017-05-26 04:48:44 +08:00
if ( mode . present ( ) ) {
2021-03-11 02:06:03 +08:00
BinaryReader rd ( mode . get ( ) , Unversioned ( ) ) ;
2017-05-26 04:48:44 +08:00
rd > > result - > mode ;
}
2020-09-28 06:26:50 +08:00
if ( ! result - > mode | | ! ddEnabledState - > isDDEnabled ( ) ) {
2019-07-24 07:16:31 +08:00
// DD can be disabled persistently (result->mode = 0) or transiently (isDDEnabled() = 0)
2021-07-27 10:55:10 +08:00
TraceEvent ( SevDebug , " GetInitialDataDistribution_DisabledDD " ) . log ( ) ;
2017-05-26 04:48:44 +08:00
return result ;
2019-07-24 07:16:31 +08:00
}
2017-05-26 04:48:44 +08:00
state Future < vector < ProcessData > > workers = getWorkers ( & tr ) ;
2021-05-04 04:14:16 +08:00
state Future < RangeResult > serverList = tr . getRange ( serverListKeys , CLIENT_KNOBS - > TOO_MANY ) ;
2021-03-11 02:06:03 +08:00
wait ( success ( workers ) & & success ( serverList ) ) ;
ASSERT ( ! serverList . get ( ) . more & & serverList . get ( ) . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
2017-05-26 04:48:44 +08:00
std : : map < Optional < Standalone < StringRef > > , ProcessData > id_data ;
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < workers . get ( ) . size ( ) ; i + + )
2017-05-26 04:48:44 +08:00
id_data [ workers . get ( ) [ i ] . locality . processId ( ) ] = workers . get ( ) [ i ] ;
succeeded = true ;
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < serverList . get ( ) . size ( ) ; i + + ) {
auto ssi = decodeServerListValue ( serverList . get ( ) [ i ] . value ) ;
2021-05-13 02:53:20 +08:00
if ( ! ssi . isTss ( ) ) {
2021-06-08 01:26:06 +08:00
result - > allServers . emplace_back ( ssi , id_data [ ssi . locality . processId ( ) ] . processClass ) ;
2021-03-06 03:28:15 +08:00
server_dc [ ssi . id ( ) ] = ssi . locality . dcId ( ) ;
} else {
2021-06-08 01:26:06 +08:00
tss_servers . emplace_back ( ssi , id_data [ ssi . locality . processId ( ) ] . processClass ) ;
2021-03-06 03:28:15 +08:00
}
2017-05-26 04:48:44 +08:00
}
break ;
2021-03-11 02:06:03 +08:00
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
ASSERT ( ! succeeded ) ; // We shouldn't be retrying if we have already started modifying result in this loop
2021-07-27 10:55:10 +08:00
TraceEvent ( " GetInitialTeamsRetry " , distributorId ) . log ( ) ;
2017-05-26 04:48:44 +08:00
}
}
2021-03-11 02:06:03 +08:00
// If keyServers is too large to read in a single transaction, then we will have to break this process up into
// multiple transactions. In that case, each iteration should begin where the previous left off
while ( beginKey < allKeys . end ) {
TEST ( beginKey > allKeys . begin ) ; // Multi-transactional getInitialDataDistribution
2017-05-26 04:48:44 +08:00
loop {
succeeded = false ;
try {
tr . setOption ( FDBTransactionOptions : : PRIORITY_SYSTEM_IMMEDIATE ) ;
2020-09-28 06:26:50 +08:00
wait ( checkMoveKeysLockReadOnly ( & tr , moveKeysLock , ddEnabledState ) ) ;
2021-05-04 04:14:16 +08:00
state RangeResult UIDtoTagMap = wait ( tr . getRange ( serverTagKeys , CLIENT_KNOBS - > TOO_MANY ) ) ;
2021-03-11 02:06:03 +08:00
ASSERT ( ! UIDtoTagMap . more & & UIDtoTagMap . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
2021-05-04 04:14:16 +08:00
RangeResult keyServers = wait ( krmGetRanges ( & tr ,
keyServersPrefix ,
KeyRangeRef ( beginKey , allKeys . end ) ,
SERVER_KNOBS - > MOVE_KEYS_KRM_LIMIT ,
SERVER_KNOBS - > MOVE_KEYS_KRM_LIMIT_BYTES ) ) ;
2017-05-26 04:48:44 +08:00
succeeded = true ;
vector < UID > src , dest , last ;
// for each range
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < keyServers . size ( ) - 1 ; i + + ) {
DDShardInfo info ( keyServers [ i ] . key ) ;
decodeKeyServersValue ( UIDtoTagMap , keyServers [ i ] . value , src , dest ) ;
if ( remoteDcIds . size ( ) ) {
2018-03-09 02:50:05 +08:00
auto srcIter = team_cache . find ( src ) ;
2021-03-11 02:06:03 +08:00
if ( srcIter = = team_cache . end ( ) ) {
for ( auto & id : src ) {
2018-03-09 02:50:05 +08:00
auto & dc = server_dc [ id ] ;
2021-03-11 02:06:03 +08:00
if ( std : : find ( remoteDcIds . begin ( ) , remoteDcIds . end ( ) , dc ) ! = remoteDcIds . end ( ) ) {
2018-03-09 02:50:05 +08:00
info . remoteSrc . push_back ( id ) ;
} else {
info . primarySrc . push_back ( id ) ;
}
}
2021-03-11 02:06:03 +08:00
result - > primaryTeams . insert ( info . primarySrc ) ;
result - > remoteTeams . insert ( info . remoteSrc ) ;
2018-03-09 02:50:05 +08:00
team_cache [ src ] = std : : make_pair ( info . primarySrc , info . remoteSrc ) ;
} else {
info . primarySrc = srcIter - > second . first ;
info . remoteSrc = srcIter - > second . second ;
}
2021-03-11 02:06:03 +08:00
if ( dest . size ( ) ) {
2018-03-09 02:50:05 +08:00
info . hasDest = true ;
auto destIter = team_cache . find ( dest ) ;
2021-03-11 02:06:03 +08:00
if ( destIter = = team_cache . end ( ) ) {
for ( auto & id : dest ) {
2018-03-09 02:50:05 +08:00
auto & dc = server_dc [ id ] ;
2021-03-11 02:06:03 +08:00
if ( std : : find ( remoteDcIds . begin ( ) , remoteDcIds . end ( ) , dc ) ! = remoteDcIds . end ( ) ) {
2018-03-09 02:50:05 +08:00
info . remoteDest . push_back ( id ) ;
} else {
info . primaryDest . push_back ( id ) ;
}
}
2021-03-11 02:06:03 +08:00
result - > primaryTeams . insert ( info . primaryDest ) ;
result - > remoteTeams . insert ( info . remoteDest ) ;
2018-03-09 02:50:05 +08:00
team_cache [ dest ] = std : : make_pair ( info . primaryDest , info . remoteDest ) ;
} else {
info . primaryDest = destIter - > second . first ;
info . remoteDest = destIter - > second . second ;
}
}
} else {
info . primarySrc = src ;
2018-07-13 03:10:18 +08:00
auto srcIter = team_cache . find ( src ) ;
2021-03-11 02:06:03 +08:00
if ( srcIter = = team_cache . end ( ) ) {
result - > primaryTeams . insert ( src ) ;
2018-07-13 03:10:18 +08:00
team_cache [ src ] = std : : pair < vector < UID > , vector < UID > > ( ) ;
}
2018-03-09 02:50:05 +08:00
if ( dest . size ( ) ) {
info . hasDest = true ;
info . primaryDest = dest ;
2018-07-13 03:10:18 +08:00
auto destIter = team_cache . find ( dest ) ;
2021-03-11 02:06:03 +08:00
if ( destIter = = team_cache . end ( ) ) {
result - > primaryTeams . insert ( dest ) ;
2018-07-13 03:10:18 +08:00
team_cache [ dest ] = std : : pair < vector < UID > , vector < UID > > ( ) ;
}
2018-03-09 02:50:05 +08:00
}
}
2021-03-11 02:06:03 +08:00
result - > shards . push_back ( info ) ;
2017-05-26 04:48:44 +08:00
}
ASSERT ( keyServers . size ( ) > 0 ) ;
beginKey = keyServers . end ( ) [ - 1 ] . key ;
break ;
} catch ( Error & e ) {
2020-08-10 14:42:30 +08:00
TraceEvent ( " GetInitialTeamsKeyServersRetry " , distributorId ) . error ( e ) ;
2020-08-30 03:35:31 +08:00
wait ( tr . onError ( e ) ) ;
2021-03-11 02:06:03 +08:00
ASSERT ( ! succeeded ) ; // We shouldn't be retrying if we have already started modifying result in this loop
2017-05-26 04:48:44 +08:00
}
}
tr . reset ( ) ;
}
// a dummy shard at the end with no keys or servers makes life easier for trackInitialShards()
2021-03-11 02:06:03 +08:00
result - > shards . push_back ( DDShardInfo ( allKeys . end ) ) ;
2017-05-26 04:48:44 +08:00
2021-03-06 03:28:15 +08:00
// add tss to server list AFTER teams are built
for ( auto & it : tss_servers ) {
result - > allServers . push_back ( it ) ;
}
2017-05-26 04:48:44 +08:00
return result ;
}
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > storageServerTracker ( struct DDTeamCollection * self ,
Database cx ,
TCServerInfo * server ,
Promise < Void > errorOut ,
Version addedVersion ,
2021-03-06 03:28:15 +08:00
const DDEnabledState * ddEnabledState ,
bool isTss ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
Future < Void > teamTracker ( struct DDTeamCollection * const & self ,
Reference < TCTeamInfo > const & team ,
bool const & badTeam ,
bool const & redundantTeam ) ;
2017-05-26 04:48:44 +08:00
2018-11-10 02:07:55 +08:00
struct DDTeamCollection : ReferenceCounted < DDTeamCollection > {
2019-08-14 07:10:18 +08:00
// clang-format off
2017-05-26 04:48:44 +08:00
enum { REQUESTING_WORKER = 0 , GETTING_WORKER = 1 , GETTING_STORAGE = 2 } ;
2021-05-21 07:31:08 +08:00
enum class Status { NONE = 0 , WIGGLING = 1 , EXCLUDED = 2 , FAILED = 3 } ;
2017-05-26 04:48:44 +08:00
2019-02-07 15:34:56 +08:00
// addActor: add to actorCollection so that when an actor has error, the ActorCollection can catch the error.
// addActor is used to create the actorCollection when the dataDistributionTeamCollection is created
2017-05-26 04:48:44 +08:00
PromiseStream < Future < Void > > addActor ;
Database cx ;
2018-12-14 05:31:37 +08:00
UID distributorId ;
2017-09-08 06:32:08 +08:00
DatabaseConfiguration configuration ;
2017-05-26 04:48:44 +08:00
bool doBuildTeams ;
2019-07-29 10:31:21 +08:00
bool lastBuildTeamsFailed ;
2017-05-26 04:48:44 +08:00
Future < Void > teamBuilder ;
AsyncTrigger restartTeamBuilder ;
MoveKeysLock lock ;
PromiseStream < RelocateShard > output ;
vector < UID > allServers ;
ServerStatusMap server_status ;
int64_t unhealthyServers ;
2018-08-10 04:16:09 +08:00
std : : map < int , int > priority_teams ;
2017-05-26 04:48:44 +08:00
std : : map < UID , Reference < TCServerInfo > > server_info ;
2021-05-20 11:32:15 +08:00
std : : map < Key , std : : vector < Reference < TCServerInfo > > > pid2server_info ; // some process may serve as multiple storage servers
2021-06-12 06:58:05 +08:00
std : : vector < AddressExclusion > wiggle_addresses ; // collection of wiggling servers' address
2021-03-06 03:28:15 +08:00
std : : map < UID , Reference < TCServerInfo > > tss_info_by_pair ;
std : : map < UID , Reference < TCServerInfo > > server_and_tss_info ; // TODO could replace this with an efficient way to do a read-only concatenation of 2 data structures?
2020-04-07 14:37:11 +08:00
std : : map < Key , int > lagging_zones ; // zone to number of storage servers lagging
AsyncVar < bool > disableFailingLaggingServers ;
2021-06-02 14:12:45 +08:00
Optional < Key > wigglingPid ; // Process id of current wiggling storage server;
2021-06-12 07:00:44 +08:00
Reference < AsyncVar < bool > > pauseWiggle ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
// machine_info has all machines info; key must be unique across processes on the same machine
std : : map < Standalone < StringRef > , Reference < TCMachineInfo > > machine_info ;
std : : vector < Reference < TCMachineTeamInfo > > machineTeams ; // all machine teams
2018-08-30 05:40:39 +08:00
LocalityMap < UID > machineLocalityMap ; // locality info of machines
2017-05-26 04:48:44 +08:00
vector < Reference < TCTeamInfo > > teams ;
2018-08-22 12:08:15 +08:00
vector < Reference < TCTeamInfo > > badTeams ;
2017-05-26 04:48:44 +08:00
Reference < ShardsAffectedByTeamFailure > shardsAffectedByTeamFailure ;
PromiseStream < UID > removedServers ;
2021-03-06 03:28:15 +08:00
PromiseStream < UID > removedTSS ;
2021-05-29 02:15:52 +08:00
std : : set < UID > recruitingIds ; // The IDs of the SS/TSS which are being recruited
2017-05-26 04:48:44 +08:00
std : : set < NetworkAddress > recruitingLocalities ;
Future < Void > initialFailureReactionDelay ;
Future < Void > initializationDoneActor ;
Promise < Void > serverTrackerErrorOut ;
AsyncVar < int > recruitingStream ;
Debouncer restartRecruiting ;
int healthyTeamCount ;
2018-02-03 03:46:04 +08:00
Reference < AsyncVar < bool > > zeroHealthyTeams ;
2017-05-26 04:48:44 +08:00
int optimalTeamCount ;
2018-01-31 09:00:51 +08:00
AsyncVar < bool > zeroOptimalTeams ;
2017-05-26 04:48:44 +08:00
2021-07-12 11:36:10 +08:00
int bestTeamKeepStuckCount = 0 ;
2021-06-12 07:00:44 +08:00
2021-03-06 03:28:15 +08:00
bool isTssRecruiting ; // If tss recruiting is waiting on a pair, don't consider DD recruiting for the purposes of QuietDB
2021-06-02 14:12:45 +08:00
// WIGGLING if an address is under storage wiggling.
2019-09-25 01:04:56 +08:00
// EXCLUDED if an address is in the excluded list in the database.
// FAILED if an address is permanently failed.
// NONE by default. Updated asynchronously (eventually)
AsyncMap < AddressExclusion , Status > excludedServers ;
2017-05-26 04:48:44 +08:00
2019-09-15 02:21:51 +08:00
std : : set < AddressExclusion > invalidLocalityAddr ; // These address have invalidLocality for the configured storagePolicy
2017-05-26 04:48:44 +08:00
2017-10-11 01:36:33 +08:00
std : : vector < Optional < Key > > includedDCs ;
2018-02-14 09:01:34 +08:00
Optional < std : : vector < Optional < Key > > > otherTrackedDCs ;
2018-03-09 02:50:05 +08:00
bool primary ;
2018-04-09 12:24:05 +08:00
Reference < AsyncVar < bool > > processingUnhealthy ;
2018-06-08 07:14:40 +08:00
Future < Void > readyToStart ;
2018-06-11 11:21:39 +08:00
Future < Void > checkTeamDelay ;
2018-11-08 13:05:31 +08:00
Promise < Void > addSubsetComplete ;
Future < Void > badTeamRemover ;
2019-09-15 02:21:51 +08:00
Future < Void > checkInvalidLocalities ;
2018-04-09 12:24:05 +08:00
2019-08-17 01:48:50 +08:00
Future < Void > wrongStoreTypeRemover ;
2018-04-09 12:24:05 +08:00
2018-09-01 08:54:55 +08:00
Reference < LocalitySet > storageServerSet ;
2018-11-10 02:07:55 +08:00
std : : vector < DDTeamCollection * > teamCollections ;
2019-04-02 08:55:13 +08:00
AsyncVar < Optional < Key > > healthyZone ;
2019-07-19 04:18:36 +08:00
Future < bool > clearHealthyZoneFuture ;
2020-02-22 07:14:32 +08:00
double medianAvailableSpace ;
double lastMedianAvailableSpaceUpdate ;
2019-08-14 07:10:18 +08:00
// clang-format on
2018-11-10 02:07:55 +08:00
2020-03-14 06:21:33 +08:00
int lowestUtilizationTeam ;
int highestUtilizationTeam ;
2020-10-16 01:01:56 +08:00
AsyncTrigger printDetailedTeamsInfo ;
2020-10-21 07:40:56 +08:00
PromiseStream < GetMetricsRequest > getShardMetrics ;
2021-05-21 07:31:08 +08:00
PromiseStream < Promise < int > > getUnhealthyRelocationCount ;
2021-03-16 01:43:06 +08:00
Promise < UID > removeFailedServer ;
2020-10-16 01:01:56 +08:00
2018-09-01 08:54:55 +08:00
void resetLocalitySet ( ) {
storageServerSet = Reference < LocalitySet > ( new LocalityMap < UID > ( ) ) ;
2021-03-11 02:06:03 +08:00
LocalityMap < UID > * storageServerMap = ( LocalityMap < UID > * ) storageServerSet . getPtr ( ) ;
2018-09-01 08:54:55 +08:00
2021-03-11 02:06:03 +08:00
for ( auto & it : server_info ) {
2018-09-01 08:54:55 +08:00
it . second - > localityEntry = storageServerMap - > add ( it . second - > lastKnownInterface . locality , & it . second - > id ) ;
}
}
2021-08-13 09:52:57 +08:00
bool satisfiesPolicy ( const std : : vector < Reference < TCServerInfo > > & team , int amount = - 1 ) const {
std : : vector < LocalityEntry > forcedEntries , resultEntries ;
2021-03-11 02:06:03 +08:00
if ( amount = = - 1 ) {
2018-09-01 08:54:55 +08:00
amount = team . size ( ) ;
}
2021-08-13 09:52:57 +08:00
forcedEntries . reserve ( amount ) ;
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < amount ; i + + ) {
2018-09-01 08:54:55 +08:00
forcedEntries . push_back ( team [ i ] - > localityEntry ) ;
}
bool result = storageServerSet - > selectReplicas ( configuration . storagePolicy , forcedEntries , resultEntries ) ;
return result & & resultEntries . size ( ) = = 0 ;
}
2021-03-11 02:06:03 +08:00
DDTeamCollection ( Database const & cx ,
UID distributorId ,
MoveKeysLock const & lock ,
2019-02-13 05:41:18 +08:00
PromiseStream < RelocateShard > const & output ,
Reference < ShardsAffectedByTeamFailure > const & shardsAffectedByTeamFailure ,
2021-03-11 02:06:03 +08:00
DatabaseConfiguration configuration ,
std : : vector < Optional < Key > > includedDCs ,
Optional < std : : vector < Optional < Key > > > otherTrackedDCs ,
Future < Void > readyToStart ,
Reference < AsyncVar < bool > > zeroHealthyTeams ,
2021-08-13 05:05:04 +08:00
IsPrimary primary ,
2021-03-11 02:06:03 +08:00
Reference < AsyncVar < bool > > processingUnhealthy ,
2021-03-16 01:43:06 +08:00
PromiseStream < GetMetricsRequest > getShardMetrics ,
2021-05-21 07:31:08 +08:00
Promise < UID > removeFailedServer ,
PromiseStream < Promise < int > > getUnhealthyRelocationCount )
2021-07-25 13:14:43 +08:00
: cx ( cx ) , distributorId ( distributorId ) , configuration ( configuration ) , doBuildTeams ( true ) ,
lastBuildTeamsFailed ( false ) , teamBuilder ( Void ( ) ) , lock ( lock ) , output ( output ) , unhealthyServers ( 0 ) ,
shardsAffectedByTeamFailure ( shardsAffectedByTeamFailure ) ,
2019-02-13 05:41:18 +08:00
initialFailureReactionDelay (
2019-06-25 17:47:35 +08:00
delayed ( readyToStart , SERVER_KNOBS - > INITIAL_FAILURE_REACTION_DELAY , TaskPriority : : DataDistribution ) ) ,
2019-02-13 05:41:18 +08:00
initializationDoneActor ( logOnCompletion ( readyToStart & & initialFailureReactionDelay , this ) ) ,
2021-07-25 13:14:43 +08:00
recruitingStream ( 0 ) , restartRecruiting ( SERVER_KNOBS - > DEBOUNCE_RECRUITING_DELAY ) , healthyTeamCount ( 0 ) ,
zeroHealthyTeams ( zeroHealthyTeams ) , optimalTeamCount ( 0 ) , zeroOptimalTeams ( true ) , isTssRecruiting ( false ) ,
includedDCs ( includedDCs ) , otherTrackedDCs ( otherTrackedDCs ) , primary ( primary ) ,
processingUnhealthy ( processingUnhealthy ) , readyToStart ( readyToStart ) ,
checkTeamDelay ( delay ( SERVER_KNOBS - > CHECK_TEAM_DELAY , TaskPriority : : DataDistribution ) ) , badTeamRemover ( Void ( ) ) ,
checkInvalidLocalities ( Void ( ) ) , wrongStoreTypeRemover ( Void ( ) ) , storageServerSet ( new LocalityMap < UID > ( ) ) ,
clearHealthyZoneFuture ( true ) , medianAvailableSpace ( SERVER_KNOBS - > MIN_AVAILABLE_SPACE_RATIO ) ,
lastMedianAvailableSpaceUpdate ( 0 ) , lowestUtilizationTeam ( 0 ) , highestUtilizationTeam ( 0 ) ,
2021-07-25 08:29:27 +08:00
getShardMetrics ( getShardMetrics ) , getUnhealthyRelocationCount ( getUnhealthyRelocationCount ) ,
removeFailedServer ( removeFailedServer ) {
2021-03-11 02:06:03 +08:00
if ( ! primary | | configuration . usableRegions = = 1 ) {
TraceEvent ( " DDTrackerStarting " , distributorId ) . detail ( " State " , " Inactive " ) . trackLatest ( " DDTrackerStarting " ) ;
2018-06-11 11:25:15 +08:00
}
2017-05-26 04:48:44 +08:00
}
~ DDTeamCollection ( ) {
2020-07-17 01:22:18 +08:00
TraceEvent ( " DDTeamCollectionDestructed " , distributorId ) . detail ( " Primary " , primary ) ;
2021-03-22 11:03:05 +08:00
// Cancel the teamBuilder to avoid creating new teams after teams are cancelled.
2021-03-22 09:25:02 +08:00
teamBuilder . cancel ( ) ;
// TraceEvent("DDTeamCollectionDestructed", distributorId)
// .detail("Primary", primary)
// .detail("TeamBuilderDestroyed", server_info.size());
2020-07-17 00:48:10 +08:00
// Other teamCollections also hold pointer to this teamCollection;
// TeamTracker may access the destructed DDTeamCollection if we do not reset the pointer
for ( int i = 0 ; i < teamCollections . size ( ) ; i + + ) {
if ( teamCollections [ i ] ! = nullptr & & teamCollections [ i ] ! = this ) {
for ( int j = 0 ; j < teamCollections [ i ] - > teamCollections . size ( ) ; + + j ) {
if ( teamCollections [ i ] - > teamCollections [ j ] = = this ) {
teamCollections [ i ] - > teamCollections [ j ] = nullptr ;
}
}
}
}
2020-07-17 01:22:18 +08:00
// Team tracker has pointers to DDTeamCollections both in primary and remote.
2020-07-17 00:48:10 +08:00
// The following kills a reference cycle between the teamTracker actor and the TCTeamInfo that both holds and is
// held by the actor It also ensures that the trackers are done fiddling with healthyTeamCount before we free
// this
2020-12-27 13:46:20 +08:00
for ( auto & team : teams ) {
team - > tracker . cancel ( ) ;
2017-05-26 04:48:44 +08:00
}
2020-07-17 01:22:18 +08:00
// The commented TraceEvent log is useful in detecting what is running during the destruction
// TraceEvent("DDTeamCollectionDestructed", distributorId)
// .detail("Primary", primary)
// .detail("TeamTrackerDestroyed", teams.size());
2020-12-27 13:46:20 +08:00
for ( auto & badTeam : badTeams ) {
badTeam - > tracker . cancel ( ) ;
2018-08-22 12:08:15 +08:00
}
2020-07-17 01:22:18 +08:00
// TraceEvent("DDTeamCollectionDestructed", distributorId)
// .detail("Primary", primary)
// .detail("BadTeamTrackerDestroyed", badTeams.size());
// The following makes sure that, even if a reference to a team is held in the DD Queue, the tracker will be
// stopped
2017-05-26 04:48:44 +08:00
// before the server_status map to which it has a pointer, is destroyed.
2021-03-06 03:28:15 +08:00
for ( auto & [ _ , info ] : server_and_tss_info ) {
2020-12-27 13:46:20 +08:00
info - > tracker . cancel ( ) ;
info - > collection = nullptr ;
2017-05-26 04:48:44 +08:00
}
2021-03-06 03:28:15 +08:00
2020-07-17 01:22:18 +08:00
// TraceEvent("DDTeamCollectionDestructed", distributorId)
2021-03-22 11:03:05 +08:00
// .detail("Primary", primary)
// .detail("ServerTrackerDestroyed", server_info.size());
2017-05-26 04:48:44 +08:00
}
2020-04-07 14:37:11 +08:00
void addLaggingStorageServer ( Key zoneId ) {
lagging_zones [ zoneId ] + + ;
2021-03-11 02:06:03 +08:00
if ( lagging_zones . size ( ) > std : : max ( 1 , configuration . storageTeamSize - 1 ) & &
! disableFailingLaggingServers . get ( ) )
2020-04-07 14:37:11 +08:00
disableFailingLaggingServers . set ( true ) ;
}
void removeLaggingStorageServer ( Key zoneId ) {
auto iter = lagging_zones . find ( zoneId ) ;
2020-04-09 01:48:32 +08:00
ASSERT ( iter ! = lagging_zones . end ( ) ) ;
2020-04-07 14:37:11 +08:00
iter - > second - - ;
ASSERT ( iter - > second > = 0 ) ;
if ( iter - > second = = 0 )
lagging_zones . erase ( iter ) ;
2021-03-11 02:06:03 +08:00
if ( lagging_zones . size ( ) < = std : : max ( 1 , configuration . storageTeamSize - 1 ) & &
disableFailingLaggingServers . get ( ) )
2020-04-07 14:37:11 +08:00
disableFailingLaggingServers . set ( false ) ;
}
2021-03-11 02:06:03 +08:00
ACTOR static Future < Void > logOnCompletion ( Future < Void > signal , DDTeamCollection * self ) {
2018-08-11 04:57:10 +08:00
wait ( signal ) ;
2019-06-25 17:47:35 +08:00
wait ( delay ( SERVER_KNOBS - > LOG_ON_COMPLETION_DELAY , TaskPriority : : DataDistribution ) ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
if ( ! self - > primary | | self - > configuration . usableRegions = = 1 ) {
2018-12-14 05:31:37 +08:00
TraceEvent ( " DDTrackerStarting " , self - > distributorId )
2021-03-11 02:06:03 +08:00
. detail ( " State " , " Active " )
. trackLatest ( " DDTrackerStarting " ) ;
2018-06-11 11:25:15 +08:00
}
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
2021-03-11 02:06:03 +08:00
ACTOR static Future < Void > interruptableBuildTeams ( DDTeamCollection * self ) {
if ( ! self - > addSubsetComplete . isSet ( ) ) {
wait ( addSubsetOfEmergencyTeams ( self ) ) ;
2018-11-08 13:05:31 +08:00
self - > addSubsetComplete . send ( Void ( ) ) ;
}
loop {
choose {
2021-03-11 02:06:03 +08:00
when ( wait ( self - > buildTeams ( self ) ) ) { return Void ( ) ; }
when ( wait ( self - > restartTeamBuilder . onTrigger ( ) ) ) { }
2018-11-08 13:05:31 +08:00
}
}
}
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
ACTOR static Future < Void > checkBuildTeams ( DDTeamCollection * self ) {
wait ( self - > checkTeamDelay ) ;
while ( ! self - > teamBuilder . isReady ( ) )
wait ( self - > teamBuilder ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
if ( self - > doBuildTeams & & self - > readyToStart . isReady ( ) ) {
2017-05-26 04:48:44 +08:00
self - > doBuildTeams = false ;
2021-03-11 02:06:03 +08:00
self - > teamBuilder = self - > interruptableBuildTeams ( self ) ;
wait ( self - > teamBuilder ) ;
2017-05-26 04:48:44 +08:00
}
return Void ( ) ;
}
// SOMEDAY: Make bestTeam better about deciding to leave a shard where it is (e.g. in PRIORITY_TEAM_HEALTHY case)
// use keys, src, dest, metrics, priority, system load, etc.. to decide...
2021-03-11 02:06:03 +08:00
ACTOR static Future < Void > getTeam ( DDTeamCollection * self , GetTeamRequest req ) {
2017-05-26 04:48:44 +08:00
try {
2021-03-11 02:06:03 +08:00
wait ( self - > checkBuildTeams ( self ) ) ;
if ( now ( ) - self - > lastMedianAvailableSpaceUpdate > SERVER_KNOBS - > AVAILABLE_SPACE_UPDATE_DELAY ) {
2020-02-22 07:14:32 +08:00
self - > lastMedianAvailableSpaceUpdate = now ( ) ;
std : : vector < double > teamAvailableSpace ;
teamAvailableSpace . reserve ( self - > teams . size ( ) ) ;
2020-12-27 13:46:20 +08:00
for ( const auto & team : self - > teams ) {
if ( team - > isHealthy ( ) ) {
teamAvailableSpace . push_back ( team - > getMinAvailableSpaceRatio ( ) ) ;
2020-02-22 07:14:32 +08:00
}
}
2021-03-11 02:06:03 +08:00
size_t pivot = teamAvailableSpace . size ( ) / 2 ;
2020-02-22 07:41:01 +08:00
if ( teamAvailableSpace . size ( ) > 1 ) {
2021-03-11 02:06:03 +08:00
std : : nth_element (
teamAvailableSpace . begin ( ) , teamAvailableSpace . begin ( ) + pivot , teamAvailableSpace . end ( ) ) ;
self - > medianAvailableSpace =
std : : max ( SERVER_KNOBS - > MIN_AVAILABLE_SPACE_RATIO ,
std : : min ( SERVER_KNOBS - > TARGET_AVAILABLE_SPACE_RATIO , teamAvailableSpace [ pivot ] ) ) ;
2020-02-22 07:41:01 +08:00
} else {
self - > medianAvailableSpace = SERVER_KNOBS - > MIN_AVAILABLE_SPACE_RATIO ;
2020-02-22 07:14:32 +08:00
}
2020-10-16 01:01:56 +08:00
if ( self - > medianAvailableSpace < SERVER_KNOBS - > TARGET_AVAILABLE_SPACE_RATIO ) {
2020-10-24 06:29:08 +08:00
TraceEvent ( SevWarn , " DDTeamMedianAvailableSpaceTooSmall " , self - > distributorId )
2020-10-16 06:25:00 +08:00
. detail ( " MedianAvailableSpaceRatio " , self - > medianAvailableSpace )
2020-10-24 06:29:08 +08:00
. detail ( " TargetAvailableSpaceRatio " , SERVER_KNOBS - > TARGET_AVAILABLE_SPACE_RATIO )
. detail ( " Primary " , self - > primary ) ;
2020-10-24 05:01:53 +08:00
self - > printDetailedTeamsInfo . trigger ( ) ;
2020-10-16 01:01:56 +08:00
}
2020-02-22 07:14:32 +08:00
}
2017-05-26 04:48:44 +08:00
2020-06-30 01:02:27 +08:00
bool foundSrc = false ;
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < req . src . size ( ) ; i + + ) {
if ( self - > server_info . count ( req . src [ i ] ) ) {
2020-06-30 01:02:27 +08:00
foundSrc = true ;
break ;
}
}
2017-05-26 04:48:44 +08:00
// Select the best team
// Currently the metric is minimum used disk space (adjusted for data in flight)
// Only healthy teams may be selected. The team has to be healthy at the moment we update
// shardsAffectedByTeamFailure or we could be dropping a shard on the floor (since team
// tracking is "edge triggered")
// SOMEDAY: Account for capacity, load (when shardMetrics load is high)
2018-12-07 03:26:30 +08:00
// self->teams.size() can be 0 under the ConfigureTest.txt test when we change configurations
2018-11-02 04:32:13 +08:00
// The situation happens rarely. We may want to eliminate this situation someday
2021-03-11 02:06:03 +08:00
if ( ! self - > teams . size ( ) ) {
req . reply . send ( std : : make_pair ( Optional < Reference < IDataDistributionTeam > > ( ) , foundSrc ) ) ;
2018-11-02 04:32:13 +08:00
return Void ( ) ;
}
2018-01-31 09:00:51 +08:00
2017-05-26 04:48:44 +08:00
int64_t bestLoadBytes = 0 ;
Optional < Reference < IDataDistributionTeam > > bestOption ;
2020-01-10 08:59:37 +08:00
std : : vector < Reference < IDataDistributionTeam > > randomTeams ;
2020-01-11 06:58:38 +08:00
const std : : set < UID > completeSources ( req . completeSources . begin ( ) , req . completeSources . end ( ) ) ;
2017-05-26 04:48:44 +08:00
2020-02-21 06:52:08 +08:00
// Note: this block does not apply any filters from the request
2021-03-11 02:06:03 +08:00
if ( ! req . wantsNewServers ) {
for ( int i = 0 ; i < req . completeSources . size ( ) ; i + + ) {
if ( ! self - > server_info . count ( req . completeSources [ i ] ) ) {
2020-01-11 06:46:40 +08:00
continue ;
}
2021-03-11 02:06:03 +08:00
auto & teamList = self - > server_info [ req . completeSources [ i ] ] - > teams ;
for ( int j = 0 ; j < teamList . size ( ) ; j + + ) {
2020-01-11 06:46:40 +08:00
bool found = true ;
auto serverIDs = teamList [ j ] - > getServerIDs ( ) ;
2021-03-11 02:06:03 +08:00
for ( int k = 0 ; k < teamList [ j ] - > size ( ) ; k + + ) {
if ( ! completeSources . count ( serverIDs [ k ] ) ) {
2020-01-11 06:46:40 +08:00
found = false ;
break ;
2017-05-26 04:48:44 +08:00
}
}
2021-03-11 02:06:03 +08:00
if ( found & & teamList [ j ] - > isHealthy ( ) ) {
2020-06-30 01:02:27 +08:00
bestOption = teamList [ j ] ;
2021-03-11 02:06:03 +08:00
req . reply . send ( std : : make_pair ( bestOption , foundSrc ) ) ;
2020-01-11 06:46:40 +08:00
return Void ( ) ;
}
2017-05-26 04:48:44 +08:00
}
}
}
2021-03-11 02:06:03 +08:00
if ( req . wantsTrueBest ) {
ASSERT ( ! bestOption . present ( ) ) ;
auto & startIndex =
req . preferLowerUtilization ? self - > lowestUtilizationTeam : self - > highestUtilizationTeam ;
if ( startIndex > = self - > teams . size ( ) ) {
2020-03-14 06:21:33 +08:00
startIndex = 0 ;
}
int bestIndex = startIndex ;
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < self - > teams . size ( ) ; i + + ) {
2020-03-14 06:21:33 +08:00
int currentIndex = ( startIndex + i ) % self - > teams . size ( ) ;
if ( self - > teams [ currentIndex ] - > isHealthy ( ) & &
2021-03-11 02:06:03 +08:00
( ! req . preferLowerUtilization | |
self - > teams [ currentIndex ] - > hasHealthyAvailableSpace ( self - > medianAvailableSpace ) ) ) {
2020-03-14 06:21:33 +08:00
int64_t loadBytes = self - > teams [ currentIndex ] - > getLoadBytes ( true , req . inflightPenalty ) ;
2021-03-11 02:06:03 +08:00
if ( ( ! bestOption . present ( ) | | ( req . preferLowerUtilization & & loadBytes < bestLoadBytes ) | |
( ! req . preferLowerUtilization & & loadBytes > bestLoadBytes ) ) & &
( ! req . teamMustHaveShards | |
self - > shardsAffectedByTeamFailure - > hasShards ( ShardsAffectedByTeamFailure : : Team (
self - > teams [ currentIndex ] - > getServerIDs ( ) , self - > primary ) ) ) ) {
2017-05-26 04:48:44 +08:00
bestLoadBytes = loadBytes ;
2020-03-14 06:21:33 +08:00
bestOption = self - > teams [ currentIndex ] ;
bestIndex = currentIndex ;
2017-05-26 04:48:44 +08:00
}
}
}
2020-03-14 06:21:33 +08:00
startIndex = bestIndex ;
2021-03-11 02:06:03 +08:00
} else {
2017-05-26 04:48:44 +08:00
int nTries = 0 ;
2021-03-11 02:06:03 +08:00
while ( randomTeams . size ( ) < SERVER_KNOBS - > BEST_TEAM_OPTION_COUNT & &
nTries < SERVER_KNOBS - > BEST_TEAM_MAX_TEAM_TRIES ) {
2019-10-05 01:02:11 +08:00
// If unhealthy team is majority, we may not find an ok dest in this while loop
2019-05-11 05:01:52 +08:00
Reference < IDataDistributionTeam > dest = deterministicRandom ( ) - > randomChoice ( self - > teams ) ;
2017-05-26 04:48:44 +08:00
2020-08-16 12:55:02 +08:00
bool ok = dest - > isHealthy ( ) & & ( ! req . preferLowerUtilization | |
dest - > hasHealthyAvailableSpace ( self - > medianAvailableSpace ) ) ;
2020-02-21 01:32:00 +08:00
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; ok & & i < randomTeams . size ( ) ; i + + ) {
2020-01-10 08:59:37 +08:00
if ( randomTeams [ i ] - > getServerIDs ( ) = = dest - > getServerIDs ( ) ) {
2017-05-26 04:48:44 +08:00
ok = false ;
2020-01-10 08:59:37 +08:00
break ;
}
}
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
ok = ok & & ( ! req . teamMustHaveShards | |
self - > shardsAffectedByTeamFailure - > hasShards (
ShardsAffectedByTeamFailure : : Team ( dest - > getServerIDs ( ) , self - > primary ) ) ) ;
2020-03-13 02:22:03 +08:00
2017-05-26 04:48:44 +08:00
if ( ok )
2021-03-11 02:06:03 +08:00
randomTeams . push_back ( dest ) ;
2017-05-26 04:48:44 +08:00
else
nTries + + ;
}
2019-09-12 05:00:48 +08:00
// Log BestTeamStuck reason when we have healthy teams but they do not have healthy free space
2021-06-12 07:00:44 +08:00
if ( randomTeams . empty ( ) & & ! self - > zeroHealthyTeams - > get ( ) ) {
2021-07-12 11:36:10 +08:00
self - > bestTeamKeepStuckCount + + ;
2021-06-15 07:00:02 +08:00
if ( g_network - > isSimulated ( ) ) {
2021-06-12 07:00:44 +08:00
TraceEvent ( SevWarn , " GetTeamReturnEmpty " ) . detail ( " HealthyTeams " , self - > healthyTeamCount ) ;
}
2021-06-15 07:00:02 +08:00
} else {
2021-07-12 11:36:10 +08:00
self - > bestTeamKeepStuckCount = 0 ;
2019-09-12 05:00:48 +08:00
}
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < randomTeams . size ( ) ; i + + ) {
2020-01-10 08:59:37 +08:00
int64_t loadBytes = randomTeams [ i ] - > getLoadBytes ( true , req . inflightPenalty ) ;
2021-03-11 02:06:03 +08:00
if ( ! bestOption . present ( ) | | ( req . preferLowerUtilization & & loadBytes < bestLoadBytes ) | |
( ! req . preferLowerUtilization & & loadBytes > bestLoadBytes ) ) {
2017-05-26 04:48:44 +08:00
bestLoadBytes = loadBytes ;
2020-01-10 08:59:37 +08:00
bestOption = randomTeams [ i ] ;
2017-05-26 04:48:44 +08:00
}
}
}
2018-01-31 09:00:51 +08:00
2018-11-22 03:18:26 +08:00
// Note: req.completeSources can be empty and all servers (and server teams) can be unhealthy.
// We will get stuck at this! This only happens when a DC fails. No need to consider it right now.
2020-02-21 06:52:08 +08:00
// Note: this block does not apply any filters from the request
2021-03-11 02:06:03 +08:00
if ( ! bestOption . present ( ) & & self - > zeroHealthyTeams - > get ( ) ) {
// Attempt to find the unhealthy source server team and return it
for ( int i = 0 ; i < req . completeSources . size ( ) ; i + + ) {
if ( ! self - > server_info . count ( req . completeSources [ i ] ) ) {
2020-01-11 06:46:40 +08:00
continue ;
}
2021-03-11 02:06:03 +08:00
auto & teamList = self - > server_info [ req . completeSources [ i ] ] - > teams ;
for ( int j = 0 ; j < teamList . size ( ) ; j + + ) {
2020-01-11 06:46:40 +08:00
bool found = true ;
auto serverIDs = teamList [ j ] - > getServerIDs ( ) ;
2021-03-11 02:06:03 +08:00
for ( int k = 0 ; k < teamList [ j ] - > size ( ) ; k + + ) {
if ( ! completeSources . count ( serverIDs [ k ] ) ) {
2020-01-11 06:46:40 +08:00
found = false ;
break ;
2018-01-31 09:00:51 +08:00
}
}
2021-03-11 02:06:03 +08:00
if ( found ) {
2020-06-30 01:02:27 +08:00
bestOption = teamList [ j ] ;
2021-03-11 02:06:03 +08:00
req . reply . send ( std : : make_pair ( bestOption , foundSrc ) ) ;
2020-01-11 06:46:40 +08:00
return Void ( ) ;
}
2018-01-31 09:00:51 +08:00
}
}
}
2019-09-10 10:16:49 +08:00
// if (!bestOption.present()) {
// TraceEvent("GetTeamRequest").detail("Request", req.getDesc());
// self->traceAllInfo(true);
// }
2018-01-31 09:00:51 +08:00
2021-03-11 02:06:03 +08:00
req . reply . send ( std : : make_pair ( bestOption , foundSrc ) ) ;
2019-02-13 06:57:33 +08:00
2017-05-26 04:48:44 +08:00
return Void ( ) ;
2021-03-11 02:06:03 +08:00
} catch ( Error & e ) {
if ( e . code ( ) ! = error_code_actor_cancelled )
req . reply . sendError ( e ) ;
2017-05-26 04:48:44 +08:00
throw ;
}
}
2021-08-13 09:52:57 +08:00
int64_t getDebugTotalDataInFlight ( ) const {
2017-05-26 04:48:44 +08:00
int64_t total = 0 ;
2021-03-11 02:06:03 +08:00
for ( auto itr = server_info . begin ( ) ; itr ! = server_info . end ( ) ; + + itr )
2017-05-26 04:48:44 +08:00
total + = itr - > second - > dataInFlightToServer ;
return total ;
}
2021-03-11 02:06:03 +08:00
ACTOR static Future < Void > addSubsetOfEmergencyTeams ( DDTeamCollection * self ) {
2018-09-01 08:54:55 +08:00
state int idx = 0 ;
state std : : vector < Reference < TCServerInfo > > servers ;
state std : : vector < UID > serverIds ;
state Reference < LocalitySet > tempSet = Reference < LocalitySet > ( new LocalityMap < UID > ( ) ) ;
2021-03-11 02:06:03 +08:00
state LocalityMap < UID > * tempMap = ( LocalityMap < UID > * ) tempSet . getPtr ( ) ;
2018-09-01 08:54:55 +08:00
2021-03-11 02:06:03 +08:00
for ( ; idx < self - > badTeams . size ( ) ; idx + + ) {
2018-09-01 08:54:55 +08:00
servers . clear ( ) ;
2021-03-11 02:06:03 +08:00
for ( const auto & server : self - > badTeams [ idx ] - > getServers ( ) ) {
if ( server - > inDesiredDC & & ! self - > server_status . get ( server - > id ) . isUnhealthy ( ) ) {
2018-09-01 08:54:55 +08:00
servers . push_back ( server ) ;
}
}
2019-02-13 02:59:54 +08:00
// For the bad team that is too big (too many servers), we will try to find a subset of servers in the team
// to construct a new healthy team, so that moving data to the new healthy team will not
// cause too much data movement overhead
// FIXME: This code logic can be simplified.
2021-03-11 02:06:03 +08:00
if ( servers . size ( ) > = self - > configuration . storageTeamSize ) {
2018-09-01 08:54:55 +08:00
bool foundTeam = false ;
2021-03-11 02:06:03 +08:00
for ( int j = 0 ; j < servers . size ( ) - self - > configuration . storageTeamSize + 1 & & ! foundTeam ; j + + ) {
2018-11-08 13:05:31 +08:00
auto & serverTeams = servers [ j ] - > teams ;
2021-03-11 02:06:03 +08:00
for ( int k = 0 ; k < serverTeams . size ( ) ; k + + ) {
auto & testTeam = serverTeams [ k ] - > getServerIDs ( ) ;
2019-02-13 02:59:54 +08:00
bool allInTeam = true ; // All servers in testTeam belong to the healthy servers
2021-03-11 02:06:03 +08:00
for ( int l = 0 ; l < testTeam . size ( ) ; l + + ) {
2018-09-01 08:54:55 +08:00
bool foundServer = false ;
2021-03-11 02:06:03 +08:00
for ( auto it : servers ) {
if ( it - > id = = testTeam [ l ] ) {
2018-09-01 08:54:55 +08:00
foundServer = true ;
break ;
}
}
2021-03-11 02:06:03 +08:00
if ( ! foundServer ) {
2018-09-01 08:54:55 +08:00
allInTeam = false ;
break ;
}
}
2021-03-11 02:06:03 +08:00
if ( allInTeam ) {
2018-09-01 08:54:55 +08:00
foundTeam = true ;
2017-05-26 04:48:44 +08:00
break ;
}
}
2018-09-01 08:54:55 +08:00
}
2021-03-11 02:06:03 +08:00
if ( ! foundTeam ) {
if ( self - > satisfiesPolicy ( servers ) ) {
if ( servers . size ( ) = = self - > configuration . storageTeamSize | |
self - > satisfiesPolicy ( servers , self - > configuration . storageTeamSize ) ) {
2018-09-01 08:54:55 +08:00
servers . resize ( self - > configuration . storageTeamSize ) ;
2018-08-30 05:40:39 +08:00
self - > addTeam ( servers , true ) ;
2021-03-11 02:06:03 +08:00
// self->traceTeamCollectionInfo(); // Trace at the end of the function
2018-09-01 08:54:55 +08:00
} else {
tempSet - > clear ( ) ;
2021-03-11 02:06:03 +08:00
for ( auto it : servers ) {
2018-09-01 08:54:55 +08:00
tempMap - > add ( it - > lastKnownInterface . locality , & it - > id ) ;
}
2021-08-13 09:52:57 +08:00
std : : vector < LocalityEntry > resultEntries , forcedEntries ;
2021-03-11 02:06:03 +08:00
bool result = tempSet - > selectReplicas (
2021-08-13 09:52:57 +08:00
self - > configuration . storagePolicy , forcedEntries , resultEntries ) ;
ASSERT ( result & & resultEntries . size ( ) = = self - > configuration . storageTeamSize ) ;
2018-09-01 08:54:55 +08:00
serverIds . clear ( ) ;
2021-08-13 09:52:57 +08:00
for ( auto & it : resultEntries ) {
2018-09-01 08:54:55 +08:00
serverIds . push_back ( * tempMap - > getObject ( it ) ) ;
}
2019-08-01 07:02:08 +08:00
std : : sort ( serverIds . begin ( ) , serverIds . end ( ) ) ;
2018-08-30 05:40:39 +08:00
self - > addTeam ( serverIds . begin ( ) , serverIds . end ( ) , true ) ;
2018-09-01 08:54:55 +08:00
}
} else {
serverIds . clear ( ) ;
2021-03-11 02:06:03 +08:00
for ( auto it : servers ) {
2018-09-01 08:54:55 +08:00
serverIds . push_back ( it - > id ) ;
}
2021-03-11 02:06:03 +08:00
TraceEvent ( SevWarnAlways , " CannotAddSubset " , self - > distributorId )
. detail ( " Servers " , describe ( serverIds ) ) ;
2018-08-22 12:08:15 +08:00
}
2017-05-26 04:48:44 +08:00
}
}
2021-03-11 02:06:03 +08:00
wait ( yield ( ) ) ;
2017-05-26 04:48:44 +08:00
}
2019-02-13 02:59:54 +08:00
2019-02-13 05:41:18 +08:00
// Trace and record the current number of teams for correctness test
self - > traceTeamCollectionInfo ( ) ;
2019-02-13 02:59:54 +08:00
2018-09-01 08:54:55 +08:00
return Void ( ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
ACTOR static Future < Void > init ( DDTeamCollection * self ,
Reference < InitialDataDistribution > initTeams ,
2020-09-28 06:26:50 +08:00
const DDEnabledState * ddEnabledState ) {
2019-07-10 07:09:51 +08:00
self - > healthyZone . set ( initTeams - > initHealthyZoneValue ) ;
2021-03-11 02:06:03 +08:00
// SOMEDAY: If some servers have teams and not others (or some servers have more data than others) and there is
// an address/locality collision, should we preferentially mark the least used server as undesirable?
2021-03-06 03:28:15 +08:00
2018-09-01 08:54:55 +08:00
for ( auto i = initTeams - > allServers . begin ( ) ; i ! = initTeams - > allServers . end ( ) ; + + i ) {
if ( self - > shouldHandleServer ( i - > first ) ) {
2019-09-14 02:55:04 +08:00
if ( ! self - > isValidLocality ( self - > configuration . storagePolicy , i - > first . locality ) ) {
2019-09-14 04:56:43 +08:00
TraceEvent ( SevWarnAlways , " MissingLocality " )
. detail ( " Server " , i - > first . uniqueID )
. detail ( " Locality " , i - > first . locality . toString ( ) ) ;
2020-04-11 04:45:16 +08:00
auto addr = i - > first . stableAddress ( ) ;
2019-09-18 04:03:57 +08:00
self - > invalidLocalityAddr . insert ( AddressExclusion ( addr . ip , addr . port ) ) ;
if ( self - > checkInvalidLocalities . isReady ( ) ) {
self - > checkInvalidLocalities = checkAndRemoveInvalidLocalityAddr ( self ) ;
self - > addActor . send ( self - > checkInvalidLocalities ) ;
}
2019-09-14 01:45:28 +08:00
}
2020-09-28 06:26:50 +08:00
self - > addServer ( i - > first , i - > second , self - > serverTrackerErrorOut , 0 , ddEnabledState ) ;
2017-10-11 01:36:33 +08:00
}
}
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
state std : : set < std : : vector < UID > > : : iterator teamIter =
self - > primary ? initTeams - > primaryTeams . begin ( ) : initTeams - > remoteTeams . begin ( ) ;
state std : : set < std : : vector < UID > > : : iterator teamIterEnd =
self - > primary ? initTeams - > primaryTeams . end ( ) : initTeams - > remoteTeams . end ( ) ;
for ( ; teamIter ! = teamIterEnd ; + + teamIter ) {
2018-11-22 03:18:26 +08:00
self - > addTeam ( teamIter - > begin ( ) , teamIter - > end ( ) , true ) ;
2021-03-11 02:06:03 +08:00
wait ( yield ( ) ) ;
2017-05-26 04:48:44 +08:00
}
2018-09-01 08:54:55 +08:00
return Void ( ) ;
2017-05-26 04:48:44 +08:00
}
2019-09-14 02:55:04 +08:00
// Check if server or machine has a valid locality based on configured replication policy
2021-08-13 09:52:57 +08:00
bool isValidLocality ( Reference < IReplicationPolicy > storagePolicy , const LocalityData & locality ) const {
2019-09-14 07:26:52 +08:00
// Future: Once we add simulation test that misconfigure a cluster, such as not setting some locality entries,
// DD_VALIDATE_LOCALITY should always be true. Otherwise, simulation test may fail.
2019-09-14 04:18:33 +08:00
if ( ! SERVER_KNOBS - > DD_VALIDATE_LOCALITY ) {
// Disable the checking if locality is valid
return true ;
}
2019-09-14 04:55:46 +08:00
std : : set < std : : string > replicationPolicyKeys = storagePolicy - > attributeKeys ( ) ;
2019-09-14 05:55:51 +08:00
for ( auto & policy : replicationPolicyKeys ) {
2019-09-14 06:34:41 +08:00
if ( ! locality . isPresent ( policy ) ) {
2019-09-14 05:55:51 +08:00
return false ;
}
2019-09-14 02:55:04 +08:00
}
2019-09-14 05:57:53 +08:00
2019-09-14 02:55:04 +08:00
return true ;
2019-09-14 01:45:28 +08:00
}
2021-08-13 09:52:57 +08:00
void evaluateTeamQuality ( ) const {
2017-05-26 04:48:44 +08:00
int teamCount = teams . size ( ) , serverCount = allServers . size ( ) ;
2017-09-08 06:32:08 +08:00
double teamsPerServer = ( double ) teamCount * configuration . storageTeamSize / serverCount ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
ASSERT ( serverCount = = server_info . size ( ) ) ;
2017-05-26 04:48:44 +08:00
2018-11-01 07:46:32 +08:00
int minTeams = std : : numeric_limits < int > : : max ( ) ;
2018-08-30 05:40:39 +08:00
int maxTeams = std : : numeric_limits < int > : : min ( ) ;
2017-05-26 04:48:44 +08:00
double varTeams = 0 ;
std : : map < Optional < Standalone < StringRef > > , int > machineTeams ;
2020-12-27 13:46:20 +08:00
for ( const auto & [ id , info ] : server_info ) {
if ( ! server_status . get ( id ) . isUnhealthy ( ) ) {
int stc = info - > teams . size ( ) ;
2017-05-26 04:48:44 +08:00
minTeams = std : : min ( minTeams , stc ) ;
maxTeams = std : : max ( maxTeams , stc ) ;
2021-03-11 02:06:03 +08:00
varTeams + = ( stc - teamsPerServer ) * ( stc - teamsPerServer ) ;
2018-11-22 03:18:26 +08:00
// Use zoneId as server's machine id
2020-12-27 13:46:20 +08:00
machineTeams [ info - > lastKnownInterface . locality . zoneId ( ) ] + = stc ;
2017-05-26 04:48:44 +08:00
}
}
2021-03-11 02:06:03 +08:00
varTeams / = teamsPerServer * teamsPerServer ;
2017-05-26 04:48:44 +08:00
2018-08-30 05:40:39 +08:00
int minMachineTeams = std : : numeric_limits < int > : : max ( ) ;
int maxMachineTeams = std : : numeric_limits < int > : : min ( ) ;
2021-03-11 02:06:03 +08:00
for ( auto m = machineTeams . begin ( ) ; m ! = machineTeams . end ( ) ; + + m ) {
minMachineTeams = std : : min ( minMachineTeams , m - > second ) ;
maxMachineTeams = std : : max ( maxMachineTeams , m - > second ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
TraceEvent ( minTeams > 0 ? SevInfo : SevWarn , " DataDistributionTeamQuality " , distributorId )
. detail ( " Servers " , serverCount )
. detail ( " Teams " , teamCount )
. detail ( " TeamsPerServer " , teamsPerServer )
. detail ( " Variance " , varTeams / serverCount )
. detail ( " ServerMinTeams " , minTeams )
. detail ( " ServerMaxTeams " , maxTeams )
. detail ( " MachineMinTeams " , minMachineTeams )
. detail ( " MachineMaxTeams " , maxMachineTeams ) ;
2017-05-26 04:48:44 +08:00
}
2020-12-27 13:46:20 +08:00
int overlappingMembers ( const vector < UID > & team ) const {
2018-12-13 10:29:03 +08:00
if ( team . empty ( ) ) {
2019-07-29 14:44:23 +08:00
return 0 ;
2018-12-13 10:29:03 +08:00
}
2019-07-29 14:44:23 +08:00
int maxMatchingServers = 0 ;
2020-12-27 13:46:20 +08:00
const UID & serverID = team [ 0 ] ;
2021-01-22 06:39:39 +08:00
const auto it = server_info . find ( serverID ) ;
ASSERT ( it ! = server_info . end ( ) ) ;
const auto & usedTeams = it - > second - > teams ;
2020-12-27 13:46:20 +08:00
for ( const auto & usedTeam : usedTeams ) {
2019-07-29 14:44:23 +08:00
auto used = usedTeam - > getServerIDs ( ) ;
int teamIdx = 0 ;
int usedIdx = 0 ;
int matchingServers = 0 ;
2021-03-11 02:06:03 +08:00
while ( teamIdx < team . size ( ) & & usedIdx < used . size ( ) ) {
if ( team [ teamIdx ] = = used [ usedIdx ] ) {
2019-07-29 14:44:23 +08:00
matchingServers + + ;
teamIdx + + ;
usedIdx + + ;
2021-03-11 02:06:03 +08:00
} else if ( team [ teamIdx ] < used [ usedIdx ] ) {
2019-07-29 14:44:23 +08:00
teamIdx + + ;
} else {
usedIdx + + ;
}
}
ASSERT ( matchingServers > 0 ) ;
maxMatchingServers = std : : max ( maxMatchingServers , matchingServers ) ;
2021-03-11 02:06:03 +08:00
if ( maxMatchingServers = = team . size ( ) ) {
2019-07-29 14:44:23 +08:00
return maxMatchingServers ;
2017-05-26 04:48:44 +08:00
}
}
2018-08-30 05:40:39 +08:00
2019-07-29 14:44:23 +08:00
return maxMatchingServers ;
2017-05-26 04:48:44 +08:00
}
2021-08-13 09:52:57 +08:00
int overlappingMachineMembers ( vector < Standalone < StringRef > > const & team ) const {
2019-07-29 14:44:23 +08:00
if ( team . empty ( ) ) {
return 0 ;
}
int maxMatchingServers = 0 ;
2021-08-13 09:52:57 +08:00
auto const & serverID = team [ 0 ] ;
for ( auto const & usedTeam : machine_info . at ( serverID ) - > machineTeams ) {
2019-07-29 14:44:23 +08:00
auto used = usedTeam - > machineIDs ;
int teamIdx = 0 ;
int usedIdx = 0 ;
int matchingServers = 0 ;
2021-03-11 02:06:03 +08:00
while ( teamIdx < team . size ( ) & & usedIdx < used . size ( ) ) {
if ( team [ teamIdx ] = = used [ usedIdx ] ) {
2019-07-29 14:44:23 +08:00
matchingServers + + ;
teamIdx + + ;
usedIdx + + ;
2021-03-11 02:06:03 +08:00
} else if ( team [ teamIdx ] < used [ usedIdx ] ) {
2019-07-29 14:44:23 +08:00
teamIdx + + ;
} else {
usedIdx + + ;
}
}
ASSERT ( matchingServers > 0 ) ;
maxMatchingServers = std : : max ( maxMatchingServers , matchingServers ) ;
2021-03-11 02:06:03 +08:00
if ( maxMatchingServers = = team . size ( ) ) {
2019-07-29 14:44:23 +08:00
return maxMatchingServers ;
}
}
return maxMatchingServers ;
}
2018-08-30 05:40:39 +08:00
2021-08-13 09:52:57 +08:00
Reference < TCMachineTeamInfo > findMachineTeam ( vector < Standalone < StringRef > > const & machineIDs ) const {
2018-12-13 10:29:03 +08:00
if ( machineIDs . empty ( ) ) {
return Reference < TCMachineTeamInfo > ( ) ;
}
Standalone < StringRef > machineID = machineIDs [ 0 ] ;
2021-08-13 09:52:57 +08:00
for ( auto & machineTeam : machine_info . at ( machineID ) - > machineTeams ) {
2018-12-13 10:29:03 +08:00
if ( machineTeam - > machineIDs = = machineIDs ) {
return machineTeam ;
2018-08-30 05:40:39 +08:00
}
}
2018-10-16 07:25:17 +08:00
return Reference < TCMachineTeamInfo > ( ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-01 07:46:32 +08:00
// Assume begin to end is sorted by std::sort
// Assume InputIt is iterator to UID
// Note: We must allow creating empty teams because empty team is created when a remote DB is initialized.
// The empty team is used as the starting point to move data to the remote DB
// begin : the start of the team member ID
// end : end of the team member ID
2018-11-28 01:10:11 +08:00
// isIntialTeam : False when the team is added by addTeamsBestOf(); True otherwise, e.g.,
// when the team added at init() when we recreate teams by looking up DB
2018-11-22 03:18:26 +08:00
template < class InputIt >
void addTeam ( InputIt begin , InputIt end , bool isInitialTeam ) {
vector < Reference < TCServerInfo > > newTeamServers ;
2018-08-30 05:40:39 +08:00
for ( auto i = begin ; i ! = end ; + + i ) {
if ( server_info . find ( * i ) ! = server_info . end ( ) ) {
newTeamServers . push_back ( server_info [ * i ] ) ;
}
}
2018-11-22 03:18:26 +08:00
addTeam ( newTeamServers , isInitialTeam ) ;
2018-08-30 05:40:39 +08:00
}
2021-03-11 02:06:03 +08:00
void addTeam ( const vector < Reference < TCServerInfo > > & newTeamServers ,
bool isInitialTeam ,
2019-02-13 05:41:18 +08:00
bool redundantTeam = false ) {
2020-11-07 15:50:55 +08:00
auto teamInfo = makeReference < TCTeamInfo > ( newTeamServers ) ;
2018-08-30 05:40:39 +08:00
2019-02-20 06:46:45 +08:00
// Move satisfiesPolicy to the end for performance benefit
2021-03-11 02:06:03 +08:00
bool badTeam = redundantTeam | | teamInfo - > size ( ) ! = configuration . storageTeamSize | |
! satisfiesPolicy ( teamInfo - > getServers ( ) ) ;
2018-08-30 05:40:39 +08:00
2019-02-22 06:16:44 +08:00
teamInfo - > tracker = teamTracker ( this , teamInfo , badTeam , redundantTeam ) ;
2018-11-30 06:27:22 +08:00
// ASSERT( teamInfo->serverIDs.size() > 0 ); //team can be empty at DB initialization
2018-12-07 03:26:30 +08:00
if ( badTeam ) {
2018-11-30 06:27:22 +08:00
badTeams . push_back ( teamInfo ) ;
return ;
}
// For a good team, we add it to teams and create machine team for it when necessary
2018-12-07 03:26:30 +08:00
teams . push_back ( teamInfo ) ;
2018-11-30 06:27:22 +08:00
for ( int i = 0 ; i < newTeamServers . size ( ) ; + + i ) {
newTeamServers [ i ] - > teams . push_back ( teamInfo ) ;
}
// Find or create machine team for the server team
2018-11-22 03:18:26 +08:00
// Add the reference of machineTeam (with machineIDs) into process team
vector < Standalone < StringRef > > machineIDs ;
for ( auto server = newTeamServers . begin ( ) ; server ! = newTeamServers . end ( ) ; + + server ) {
2019-02-19 02:02:40 +08:00
ASSERT_WE_THINK ( ( * server ) - > machine . isValid ( ) ) ;
2018-11-08 06:37:10 +08:00
machineIDs . push_back ( ( * server ) - > machine - > machineID ) ;
2018-08-30 05:40:39 +08:00
}
sort ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
Reference < TCMachineTeamInfo > machineTeamInfo = findMachineTeam ( machineIDs ) ;
// A team is not initial team if it is added by addTeamsBestOf() which always create a team with correct size
// A non-initial team must have its machine team created and its size must be correct
2018-11-30 06:27:22 +08:00
ASSERT ( isInitialTeam | | machineTeamInfo . isValid ( ) ) ;
2018-11-02 04:32:13 +08:00
2018-08-30 05:40:39 +08:00
// Create a machine team if it does not exist
// Note an initial team may be added at init() even though the team size is not storageTeamSize
2018-11-22 03:18:26 +08:00
if ( ! machineTeamInfo . isValid ( ) & & ! machineIDs . empty ( ) ) {
2018-08-30 05:40:39 +08:00
machineTeamInfo = addMachineTeam ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
}
2018-11-22 03:18:26 +08:00
if ( ! machineTeamInfo . isValid ( ) ) {
TraceEvent ( SevWarn , " AddTeamWarning " )
. detail ( " NotFoundMachineTeam " , " OKIfTeamIsEmpty " )
. detail ( " TeamInfo " , teamInfo - > getDesc ( ) ) ;
2018-08-30 05:40:39 +08:00
}
teamInfo - > machineTeam = machineTeamInfo ;
2019-02-13 02:59:54 +08:00
machineTeamInfo - > serverTeams . push_back ( teamInfo ) ;
2019-07-10 02:45:17 +08:00
if ( g_network - > isSimulated ( ) ) {
2019-07-12 13:05:20 +08:00
// Update server team information for consistency check in simulation
2019-07-10 02:45:17 +08:00
traceTeamCollectionInfo ( ) ;
}
2017-05-26 04:48:44 +08:00
}
2018-11-22 03:18:26 +08:00
void addTeam ( std : : set < UID > const & team , bool isInitialTeam ) { addTeam ( team . begin ( ) , team . end ( ) , isInitialTeam ) ; }
2018-08-30 05:40:39 +08:00
2018-11-01 07:46:32 +08:00
// Add a machine team specified by input machines
2018-11-22 03:18:26 +08:00
Reference < TCMachineTeamInfo > addMachineTeam ( vector < Reference < TCMachineInfo > > machines ) {
2020-11-07 15:50:55 +08:00
auto machineTeamInfo = makeReference < TCMachineTeamInfo > ( machines ) ;
2018-10-16 07:25:17 +08:00
machineTeams . push_back ( machineTeamInfo ) ;
2018-08-30 05:40:39 +08:00
2018-10-16 07:25:17 +08:00
// Assign machine teams to machine
2018-11-22 03:18:26 +08:00
for ( auto machine : machines ) {
2019-02-19 13:41:36 +08:00
// A machine's machineTeams vector should not hold duplicate machineTeam members
2021-03-11 02:06:03 +08:00
ASSERT_WE_THINK ( std : : count ( machine - > machineTeams . begin ( ) , machine - > machineTeams . end ( ) , machineTeamInfo ) = =
0 ) ;
2018-10-16 07:25:17 +08:00
machine - > machineTeams . push_back ( machineTeamInfo ) ;
}
return machineTeamInfo ;
2018-08-30 05:40:39 +08:00
}
2018-11-01 07:46:32 +08:00
// Add a machine team by using the machineIDs from begin to end
2018-11-22 03:18:26 +08:00
Reference < TCMachineTeamInfo > addMachineTeam ( vector < Standalone < StringRef > > : : iterator begin ,
vector < Standalone < StringRef > > : : iterator end ) {
vector < Reference < TCMachineInfo > > machines ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
for ( auto i = begin ; i ! = end ; + + i ) {
if ( machine_info . find ( * i ) ! = machine_info . end ( ) ) {
2018-08-30 05:40:39 +08:00
machines . push_back ( machine_info [ * i ] ) ;
} else {
TraceEvent ( SevWarn , " AddMachineTeamError " ) . detail ( " MachineIDNotExist " , i - > contents ( ) . toString ( ) ) ;
2017-05-26 04:48:44 +08:00
}
}
2017-10-11 01:36:33 +08:00
2018-10-16 07:25:17 +08:00
return addMachineTeam ( machines ) ;
2017-05-26 04:48:44 +08:00
}
2018-11-01 07:46:32 +08:00
// Group storage servers (process) based on their machineId in LocalityData
// All created machines are healthy
// Return The number of healthy servers we grouped into machines
2018-08-30 05:40:39 +08:00
int constructMachinesFromServers ( ) {
int totalServerIndex = 0 ;
2021-03-11 02:06:03 +08:00
for ( auto i = server_info . begin ( ) ; i ! = server_info . end ( ) ; + + i ) {
2018-11-22 03:18:26 +08:00
if ( ! server_status . get ( i - > first ) . isUnhealthy ( ) ) {
2018-12-13 03:44:05 +08:00
checkAndCreateMachine ( i - > second ) ;
2018-08-30 05:40:39 +08:00
totalServerIndex + + ;
2017-05-26 04:48:44 +08:00
}
}
2018-08-30 05:40:39 +08:00
return totalServerIndex ;
}
2021-08-13 09:52:57 +08:00
void traceConfigInfo ( ) const {
2019-08-13 01:08:12 +08:00
TraceEvent ( " DDConfig " , distributorId )
2019-02-13 05:41:18 +08:00
. detail ( " StorageTeamSize " , configuration . storageTeamSize )
2019-02-08 03:25:01 +08:00
. detail ( " DesiredTeamsPerServer " , SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER )
2019-08-13 01:08:12 +08:00
. detail ( " MaxTeamsPerServer " , SERVER_KNOBS - > MAX_TEAMS_PER_SERVER )
. detail ( " StoreType " , configuration . storageServerStoreType ) ;
2019-02-08 03:25:01 +08:00
}
2021-08-13 09:52:57 +08:00
void traceServerInfo ( ) const {
2018-08-30 05:40:39 +08:00
int i = 0 ;
2019-08-13 01:08:12 +08:00
TraceEvent ( " ServerInfo " , distributorId ) . detail ( " Size " , server_info . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( auto & server : server_info ) {
2019-08-13 01:08:12 +08:00
TraceEvent ( " ServerInfo " , distributorId )
2020-07-17 01:22:18 +08:00
. detail ( " ServerInfoIndex " , i + + )
. detail ( " ServerID " , server . first . toString ( ) )
. detail ( " ServerTeamOwned " , server . second - > teams . size ( ) )
. detail ( " MachineID " , server . second - > machine - > machineID . contents ( ) . toString ( ) )
. detail ( " StoreType " , server . second - > storeType . toString ( ) )
. detail ( " InDesiredDC " , server . second - > inDesiredDC ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
for ( auto & server : server_info ) {
2018-12-07 03:26:30 +08:00
const UID & uid = server . first ;
2019-08-13 01:08:12 +08:00
TraceEvent ( " ServerStatus " , distributorId )
. detail ( " ServerID " , uid )
2018-11-22 03:18:26 +08:00
. detail ( " Healthy " , ! server_status . get ( uid ) . isUnhealthy ( ) )
2021-08-13 09:52:57 +08:00
. detail ( " MachineIsValid " , server_info . at ( uid ) - > machine . isValid ( ) )
2018-11-22 03:18:26 +08:00
. detail ( " MachineTeamSize " ,
2021-08-13 09:52:57 +08:00
server_info . at ( uid ) - > machine . isValid ( ) ? server_info . at ( uid ) - > machine - > machineTeams . size ( )
: - 1 ) ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
}
2021-08-13 09:52:57 +08:00
void traceServerTeamInfo ( ) const {
2018-08-30 05:40:39 +08:00
int i = 0 ;
2019-08-13 01:08:12 +08:00
TraceEvent ( " ServerTeamInfo " , distributorId ) . detail ( " Size " , teams . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( auto & team : teams ) {
2019-08-13 01:08:12 +08:00
TraceEvent ( " ServerTeamInfo " , distributorId )
2018-11-22 03:18:26 +08:00
. detail ( " TeamIndex " , i + + )
. detail ( " Healthy " , team - > isHealthy ( ) )
2019-07-05 23:54:21 +08:00
. detail ( " TeamSize " , team - > size ( ) )
2020-10-22 02:10:14 +08:00
. detail ( " MemberIDs " , team - > getServerIDsStr ( ) )
2020-10-24 01:06:22 +08:00
. detail ( " TeamID " , team - > getTeamID ( ) ) ;
2018-08-30 05:40:39 +08:00
}
}
2021-08-13 09:52:57 +08:00
void traceMachineInfo ( ) const {
2018-08-30 05:40:39 +08:00
int i = 0 ;
TraceEvent ( " MachineInfo " ) . detail ( " Size " , machine_info . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( auto & machine : machine_info ) {
2019-08-13 01:08:12 +08:00
TraceEvent ( " MachineInfo " , distributorId )
2018-11-22 03:18:26 +08:00
. detail ( " MachineInfoIndex " , i + + )
2019-02-08 03:25:01 +08:00
. detail ( " Healthy " , isMachineHealthy ( machine . second ) )
2018-11-22 03:18:26 +08:00
. detail ( " MachineID " , machine . first . contents ( ) . toString ( ) )
. detail ( " MachineTeamOwned " , machine . second - > machineTeams . size ( ) )
. detail ( " ServerNumOnMachine " , machine . second - > serversOnMachine . size ( ) )
. detail ( " ServersID " , machine . second - > getServersIDStr ( ) ) ;
2018-08-30 05:40:39 +08:00
}
}
2021-08-13 09:52:57 +08:00
void traceMachineTeamInfo ( ) const {
2018-08-30 05:40:39 +08:00
int i = 0 ;
2019-08-13 01:08:12 +08:00
TraceEvent ( " MachineTeamInfo " , distributorId ) . detail ( " Size " , machineTeams . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( auto & team : machineTeams ) {
2019-08-13 01:08:12 +08:00
TraceEvent ( " MachineTeamInfo " , distributorId )
2019-02-13 05:41:18 +08:00
. detail ( " TeamIndex " , i + + )
. detail ( " MachineIDs " , team - > getMachineIDsStr ( ) )
2019-07-05 23:54:21 +08:00
. detail ( " ServerTeams " , team - > serverTeams . size ( ) ) ;
2018-08-30 05:40:39 +08:00
}
}
2019-07-12 13:05:20 +08:00
// Locality string is hashed into integer, used as KeyIndex
// For better understand which KeyIndex is used for locality, we print this info in trace.
2021-08-13 09:52:57 +08:00
void traceLocalityArrayIndexName ( ) const {
2019-07-12 13:05:20 +08:00
TraceEvent ( " LocalityRecordKeyName " ) . detail ( " Size " , machineLocalityMap . _keymap - > _lookuparray . size ( ) ) ;
2019-07-11 10:23:45 +08:00
for ( int i = 0 ; i < machineLocalityMap . _keymap - > _lookuparray . size ( ) ; + + i ) {
TraceEvent ( " LocalityRecordKeyIndexName " )
2019-07-12 13:05:20 +08:00
. detail ( " KeyIndex " , i )
. detail ( " KeyName " , machineLocalityMap . _keymap - > _lookuparray [ i ] ) ;
2018-08-30 05:40:39 +08:00
}
}
2021-08-13 09:52:57 +08:00
void traceMachineLocalityMap ( ) const {
2018-08-30 05:40:39 +08:00
int i = 0 ;
2019-08-13 01:08:12 +08:00
TraceEvent ( " MachineLocalityMap " , distributorId ) . detail ( " Size " , machineLocalityMap . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( auto & uid : machineLocalityMap . getObjects ( ) ) {
2018-08-30 05:40:39 +08:00
Reference < LocalityRecord > record = machineLocalityMap . getRecord ( i ) ;
2018-11-22 03:18:26 +08:00
if ( record . isValid ( ) ) {
2019-08-13 01:08:12 +08:00
TraceEvent ( " MachineLocalityMap " , distributorId )
2018-11-22 03:18:26 +08:00
. detail ( " LocalityIndex " , i + + )
. detail ( " UID " , uid - > toString ( ) )
. detail ( " LocalityRecord " , record - > toString ( ) ) ;
2018-08-30 05:40:39 +08:00
} else {
2018-11-22 03:18:26 +08:00
TraceEvent ( " MachineLocalityMap " )
. detail ( " LocalityIndex " , i + + )
. detail ( " UID " , uid - > toString ( ) )
. detail ( " LocalityRecord " , " [NotFound] " ) ;
2018-08-30 05:40:39 +08:00
}
}
}
2018-11-02 04:32:13 +08:00
// To enable verbose debug info, set shouldPrint to true
2021-08-13 09:52:57 +08:00
void traceAllInfo ( bool shouldPrint = false ) const {
2019-07-12 05:41:17 +08:00
2021-03-11 02:06:03 +08:00
if ( ! shouldPrint )
return ;
2020-07-17 00:48:10 +08:00
// Record all team collections IDs
for ( int i = 0 ; i < teamCollections . size ( ) ; + + i ) {
2020-08-28 00:40:08 +08:00
if ( teamCollections [ i ] ! = nullptr ) {
TraceEvent ( " TraceAllInfo " , distributorId )
. detail ( " TeamCollectionIndex " , i )
. detail ( " Primary " , teamCollections [ i ] - > primary ) ;
}
2020-07-17 00:48:10 +08:00
}
2018-08-30 05:40:39 +08:00
2020-07-17 01:22:18 +08:00
TraceEvent ( " TraceAllInfo " , distributorId ) . detail ( " Primary " , primary ) ;
2019-02-08 03:25:01 +08:00
traceConfigInfo ( ) ;
2018-08-30 05:40:39 +08:00
traceServerInfo ( ) ;
traceServerTeamInfo ( ) ;
traceMachineInfo ( ) ;
traceMachineTeamInfo ( ) ;
2019-07-13 01:33:30 +08:00
traceLocalityArrayIndexName ( ) ;
2018-08-30 05:40:39 +08:00
traceMachineLocalityMap ( ) ;
}
2018-11-02 04:32:13 +08:00
// We must rebuild machine locality map whenever the entry in the map is inserted or removed
2018-08-30 05:40:39 +08:00
void rebuildMachineLocalityMap ( ) {
machineLocalityMap . clear ( ) ;
int numHealthyMachine = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto machine = machine_info . begin ( ) ; machine ! = machine_info . end ( ) ; + + machine ) {
if ( machine - > second - > serversOnMachine . empty ( ) ) {
TraceEvent ( SevWarn , " RebuildMachineLocalityMapError " )
. detail ( " Machine " , machine - > second - > machineID . toString ( ) )
. detail ( " NumServersOnMachine " , 0 ) ;
2018-08-30 05:40:39 +08:00
continue ;
}
2018-11-22 03:18:26 +08:00
if ( ! isMachineHealthy ( machine - > second ) ) {
2018-08-30 05:40:39 +08:00
continue ;
}
Reference < TCServerInfo > representativeServer = machine - > second - > serversOnMachine [ 0 ] ;
2018-11-22 03:18:26 +08:00
auto & locality = representativeServer - > lastKnownInterface . locality ;
2019-09-14 02:55:04 +08:00
if ( ! isValidLocality ( configuration . storagePolicy , locality ) ) {
2019-09-14 01:45:28 +08:00
TraceEvent ( SevWarn , " RebuildMachineLocalityMapError " )
. detail ( " Machine " , machine - > second - > machineID . toString ( ) )
. detail ( " InvalidLocality " , locality . toString ( ) ) ;
continue ;
}
2018-12-07 03:26:30 +08:00
const LocalityEntry & localityEntry = machineLocalityMap . add ( locality , & representativeServer - > id ) ;
2018-08-30 05:40:39 +08:00
machine - > second - > localityEntry = localityEntry ;
+ + numHealthyMachine ;
}
}
2018-11-02 04:32:13 +08:00
// Create machineTeamsToBuild number of machine teams
// No operation if machineTeamsToBuild is 0
2018-11-28 10:06:34 +08:00
// Note: The creation of machine teams should not depend on server teams:
// No matter how server teams will be created, we will create the same set of machine teams;
// We should never use server team number in building machine teams.
2018-11-02 04:32:13 +08:00
//
// Five steps to create each machine team, which are document in the function
// Reuse ReplicationPolicy selectReplicas func to select machine team
// return number of added machine teams
2019-07-13 10:11:40 +08:00
int addBestMachineTeams ( int machineTeamsToBuild ) {
2018-08-30 05:40:39 +08:00
int addedMachineTeams = 0 ;
2019-07-11 02:55:06 +08:00
ASSERT ( machineTeamsToBuild > = 0 ) ;
2018-11-28 01:10:11 +08:00
// The number of machines is always no smaller than the storageTeamSize in a correct configuration
ASSERT ( machine_info . size ( ) > = configuration . storageTeamSize ) ;
2019-07-11 02:55:06 +08:00
// Future: Consider if we should overbuild more machine teams to
// allow machineTeamRemover() to get a more balanced machine teams per machine
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
// Step 1: Create machineLocalityMap which will be used in building machine team
2018-08-30 05:40:39 +08:00
rebuildMachineLocalityMap ( ) ;
2017-05-26 04:48:44 +08:00
2018-11-22 03:18:26 +08:00
// Add a team in each iteration
2019-07-13 10:11:40 +08:00
while ( addedMachineTeams < machineTeamsToBuild | | notEnoughMachineTeamsForAMachine ( ) ) {
2018-11-22 03:18:26 +08:00
// Step 2: Get least used machines from which we choose machines as a machine team
std : : vector < Reference < TCMachineInfo > > leastUsedMachines ; // A less used machine has less number of teams
2018-08-30 05:40:39 +08:00
int minTeamCount = std : : numeric_limits < int > : : max ( ) ;
2018-11-22 03:18:26 +08:00
for ( auto & machine : machine_info ) {
// Skip invalid machine whose representative server is not in server_info
2018-12-13 10:29:03 +08:00
ASSERT_WE_THINK ( server_info . find ( machine . second - > serversOnMachine [ 0 ] - > id ) ! = server_info . end ( ) ) ;
2018-11-22 03:18:26 +08:00
// Skip unhealthy machines
2021-03-11 02:06:03 +08:00
if ( ! isMachineHealthy ( machine . second ) )
continue ;
2019-09-14 01:45:28 +08:00
// Skip machine with incomplete locality
2019-09-14 04:56:43 +08:00
if ( ! isValidLocality ( configuration . storagePolicy ,
2019-09-14 06:34:41 +08:00
machine . second - > serversOnMachine [ 0 ] - > lastKnownInterface . locality ) ) {
2019-09-14 04:56:43 +08:00
continue ;
2019-09-14 06:34:41 +08:00
}
2018-08-30 05:40:39 +08:00
2018-12-06 07:09:35 +08:00
// Invariant: We only create correct size machine teams.
// When configuration (e.g., team size) is changed, the DDTeamCollection will be destroyed and rebuilt
// so that the invariant will not be violated.
int teamCount = machine . second - > machineTeams . size ( ) ;
2018-11-22 03:18:26 +08:00
if ( teamCount < minTeamCount ) {
2018-08-30 05:40:39 +08:00
leastUsedMachines . clear ( ) ;
2017-05-26 04:48:44 +08:00
minTeamCount = teamCount ;
}
2018-11-22 03:18:26 +08:00
if ( teamCount = = minTeamCount ) {
2018-08-30 05:40:39 +08:00
leastUsedMachines . push_back ( machine . second ) ;
2017-05-26 04:48:44 +08:00
}
}
std : : vector < UID * > team ;
std : : vector < LocalityEntry > forcedAttributes ;
2018-08-30 05:40:39 +08:00
// Step 4: Reuse Policy's selectReplicas() to create team for the representative process.
2017-05-26 04:48:44 +08:00
std : : vector < UID * > bestTeam ;
2018-08-30 05:40:39 +08:00
int bestScore = std : : numeric_limits < int > : : max ( ) ;
int maxAttempts = SERVER_KNOBS - > BEST_OF_AMT ; // BEST_OF_AMT = 4
2018-11-22 03:18:26 +08:00
for ( int i = 0 ; i < maxAttempts & & i < 100 ; + + i ) {
2019-07-11 10:23:45 +08:00
// Step 3: Create a representative process for each machine.
// Construct forcedAttribute from leastUsedMachines.
// We will use forcedAttribute to call existing function to form a team
if ( leastUsedMachines . size ( ) ) {
forcedAttributes . clear ( ) ;
// Randomly choose 1 least used machine
Reference < TCMachineInfo > tcMachineInfo = deterministicRandom ( ) - > randomChoice ( leastUsedMachines ) ;
ASSERT ( ! tcMachineInfo - > serversOnMachine . empty ( ) ) ;
LocalityEntry process = tcMachineInfo - > localityEntry ;
forcedAttributes . push_back ( process ) ;
2019-07-12 13:05:20 +08:00
TraceEvent ( " ChosenMachine " )
. detail ( " MachineInfo " , tcMachineInfo - > machineID )
. detail ( " LeaseUsedMachinesSize " , leastUsedMachines . size ( ) )
. detail ( " ForcedAttributesSize " , forcedAttributes . size ( ) ) ;
2019-07-11 10:23:45 +08:00
} else {
// when leastUsedMachine is empty, we will never find a team later, so we can simply return.
return addedMachineTeams ;
}
2018-11-22 03:18:26 +08:00
// Choose a team that balances the # of teams per server among the teams
// that have the least-utilized server
2017-05-26 04:48:44 +08:00
team . clear ( ) ;
2019-07-11 10:23:45 +08:00
ASSERT_WE_THINK ( forcedAttributes . size ( ) = = 1 ) ;
2018-08-30 05:40:39 +08:00
auto success = machineLocalityMap . selectReplicas ( configuration . storagePolicy , forcedAttributes , team ) ;
2019-06-29 06:59:47 +08:00
// NOTE: selectReplicas() should always return success when storageTeamSize = 1
2019-06-29 07:01:05 +08:00
ASSERT_WE_THINK ( configuration . storageTeamSize > 1 | | ( configuration . storageTeamSize = = 1 & & success ) ) ;
2018-11-22 03:18:26 +08:00
if ( ! success ) {
2019-07-11 10:23:45 +08:00
continue ; // Try up to maxAttempts, since next time we may choose a different forcedAttributes
2017-05-26 04:48:44 +08:00
}
2018-12-13 10:29:03 +08:00
ASSERT ( forcedAttributes . size ( ) > 0 ) ;
team . push_back ( ( UID * ) machineLocalityMap . getObject ( forcedAttributes [ 0 ] ) ) ;
2018-08-30 05:40:39 +08:00
2018-11-22 05:16:52 +08:00
// selectReplicas() may NEVER return server not in server_info.
2018-11-22 03:18:26 +08:00
for ( auto & pUID : team ) {
2018-12-13 10:29:03 +08:00
ASSERT_WE_THINK ( server_info . find ( * pUID ) ! = server_info . end ( ) ) ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
2018-11-22 05:16:52 +08:00
// selectReplicas() should always return a team with correct size. otherwise, it has a bug
ASSERT ( team . size ( ) = = configuration . storageTeamSize ) ;
2017-05-26 04:48:44 +08:00
int score = 0 ;
2018-08-30 05:40:39 +08:00
vector < Standalone < StringRef > > machineIDs ;
2018-11-22 03:18:26 +08:00
for ( auto process = team . begin ( ) ; process ! = team . end ( ) ; process + + ) {
2018-11-28 10:06:34 +08:00
Reference < TCServerInfo > server = server_info [ * * process ] ;
score + = server - > machine - > machineTeams . size ( ) ;
Standalone < StringRef > machine_id = server - > lastKnownInterface . locality . zoneId ( ) . get ( ) ;
2018-08-30 05:40:39 +08:00
machineIDs . push_back ( machine_id ) ;
}
2018-12-01 09:40:18 +08:00
2018-11-22 05:16:52 +08:00
// Only choose healthy machines into machine team
2018-12-13 10:29:03 +08:00
ASSERT_WE_THINK ( isMachineTeamHealthy ( machineIDs ) ) ;
2018-11-22 05:16:52 +08:00
2018-08-30 05:40:39 +08:00
std : : sort ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
2019-07-29 14:44:23 +08:00
int overlap = overlappingMachineMembers ( machineIDs ) ;
if ( overlap = = machineIDs . size ( ) ) {
2018-08-30 05:40:39 +08:00
maxAttempts + = 1 ;
continue ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
score + = SERVER_KNOBS - > DD_OVERLAP_PENALTY * overlap ;
2017-05-26 04:48:44 +08:00
2018-08-30 05:40:39 +08:00
// SOMEDAY: randomly pick one from teams with the lowest score
2018-11-22 03:18:26 +08:00
if ( score < bestScore ) {
// bestTeam is the team which has the smallest number of teams its team members belong to.
bestTeam = team ;
2017-05-26 04:48:44 +08:00
bestScore = score ;
}
}
2018-11-28 10:06:34 +08:00
// bestTeam should be a new valid team to be added into machine team now
2018-11-22 03:18:26 +08:00
// Step 5: Restore machine from its representative process team and get the machine team
if ( bestTeam . size ( ) = = configuration . storageTeamSize ) {
// machineIDs is used to quickly check if the machineIDs belong to an existed team
// machines keep machines reference for performance benefit by avoiding looking up machine by machineID
vector < Reference < TCMachineInfo > > machines ;
for ( auto process = bestTeam . begin ( ) ; process < bestTeam . end ( ) ; process + + ) {
2018-10-16 07:25:17 +08:00
Reference < TCMachineInfo > machine = server_info [ * * process ] - > machine ;
machines . push_back ( machine ) ;
2018-08-30 05:40:39 +08:00
}
2017-05-26 04:48:44 +08:00
2018-11-28 10:06:34 +08:00
addMachineTeam ( machines ) ;
addedMachineTeams + + ;
2018-08-30 05:40:39 +08:00
} else {
2019-07-16 08:32:49 +08:00
traceAllInfo ( true ) ;
2018-12-14 05:31:37 +08:00
TraceEvent ( SevWarn , " DataDistributionBuildTeams " , distributorId )
2018-11-22 03:18:26 +08:00
. detail ( " Primary " , primary )
. detail ( " Reason " , " Unable to make desired machine Teams " ) ;
2019-07-29 10:31:21 +08:00
lastBuildTeamsFailed = true ;
2018-08-30 05:40:39 +08:00
break ;
}
}
2017-05-26 04:48:44 +08:00
2018-08-30 05:40:39 +08:00
return addedMachineTeams ;
}
2017-05-26 04:48:44 +08:00
2021-08-13 09:52:57 +08:00
bool isMachineTeamHealthy ( vector < Standalone < StringRef > > const & machineIDs ) const {
2018-08-30 05:40:39 +08:00
int healthyNum = 0 ;
// A healthy machine team should have the desired number of machines
2021-03-11 02:06:03 +08:00
if ( machineIDs . size ( ) ! = configuration . storageTeamSize )
return false ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
for ( auto & id : machineIDs ) {
2021-08-13 09:52:57 +08:00
auto & machine = machine_info . at ( id ) ;
2018-11-22 03:18:26 +08:00
if ( isMachineHealthy ( machine ) ) {
2018-08-30 05:40:39 +08:00
healthyNum + + ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
return ( healthyNum = = machineIDs . size ( ) ) ;
2018-08-30 05:40:39 +08:00
}
2021-08-13 09:52:57 +08:00
bool isMachineTeamHealthy ( TCMachineTeamInfo const & machineTeam ) const {
2018-08-30 05:40:39 +08:00
int healthyNum = 0 ;
// A healthy machine team should have the desired number of machines
2021-08-13 09:52:57 +08:00
if ( machineTeam . size ( ) ! = configuration . storageTeamSize )
2021-03-11 02:06:03 +08:00
return false ;
2018-08-30 05:40:39 +08:00
2021-08-13 09:52:57 +08:00
for ( auto & machine : machineTeam . machines ) {
2018-11-22 03:18:26 +08:00
if ( isMachineHealthy ( machine ) ) {
2018-08-30 05:40:39 +08:00
healthyNum + + ;
}
}
2021-08-13 09:52:57 +08:00
return ( healthyNum = = machineTeam . machines . size ( ) ) ;
2018-08-30 05:40:39 +08:00
}
2021-08-13 09:52:57 +08:00
bool isMachineHealthy ( Reference < TCMachineInfo > const & machine ) const {
2018-11-22 03:18:26 +08:00
if ( ! machine . isValid ( ) | | machine_info . find ( machine - > machineID ) = = machine_info . end ( ) | |
2018-11-28 01:10:11 +08:00
machine - > serversOnMachine . empty ( ) ) {
2018-08-30 05:40:39 +08:00
return false ;
}
// Healthy machine has at least one healthy server
2018-11-22 03:18:26 +08:00
for ( auto & server : machine - > serversOnMachine ) {
if ( ! server_status . get ( server - > id ) . isUnhealthy ( ) ) {
2018-11-01 07:46:32 +08:00
return true ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
}
2018-11-01 07:46:32 +08:00
return false ;
2018-08-30 05:40:39 +08:00
}
2018-11-28 10:06:34 +08:00
// Return the healthy server with the least number of correct-size server teams
2021-08-13 09:52:57 +08:00
Reference < TCServerInfo > findOneLeastUsedServer ( ) const {
2018-11-28 10:06:34 +08:00
vector < Reference < TCServerInfo > > leastUsedServers ;
2019-07-09 06:45:28 +08:00
int minTeams = std : : numeric_limits < int > : : max ( ) ;
2018-11-28 10:06:34 +08:00
for ( auto & server : server_info ) {
// Only pick healthy server, which is not failed or excluded.
2021-03-11 02:06:03 +08:00
if ( server_status . get ( server . first ) . isUnhealthy ( ) )
continue ;
if ( ! isValidLocality ( configuration . storagePolicy , server . second - > lastKnownInterface . locality ) )
continue ;
2018-11-28 10:06:34 +08:00
2018-12-01 09:40:18 +08:00
int numTeams = server . second - > teams . size ( ) ;
2019-07-09 06:45:28 +08:00
if ( numTeams < minTeams ) {
minTeams = numTeams ;
2018-11-28 10:06:34 +08:00
leastUsedServers . clear ( ) ;
}
2019-07-09 06:45:28 +08:00
if ( minTeams = = numTeams ) {
2018-11-28 10:06:34 +08:00
leastUsedServers . push_back ( server . second ) ;
}
}
2019-08-13 01:08:12 +08:00
if ( leastUsedServers . empty ( ) ) {
2019-10-03 04:22:56 +08:00
// If we cannot find a healthy server with valid locality
TraceEvent ( " NoHealthyAndValidLocalityServers " )
2021-03-11 02:06:03 +08:00
. detail ( " Servers " , server_info . size ( ) )
. detail ( " UnhealthyServers " , unhealthyServers ) ;
2019-08-13 01:08:12 +08:00
return Reference < TCServerInfo > ( ) ;
} else {
return deterministicRandom ( ) - > randomChoice ( leastUsedServers ) ;
}
2018-11-28 10:06:34 +08:00
}
2018-11-30 06:27:22 +08:00
// Randomly choose one machine team that has chosenServer and has the correct size
// When configuration is changed, we may have machine teams with old storageTeamSize
2021-08-13 09:52:57 +08:00
Reference < TCMachineTeamInfo > findOneRandomMachineTeam ( TCServerInfo const & chosenServer ) const {
if ( ! chosenServer . machine - > machineTeams . empty ( ) ) {
2019-02-19 13:41:36 +08:00
std : : vector < Reference < TCMachineTeamInfo > > healthyMachineTeamsForChosenServer ;
2021-08-13 09:52:57 +08:00
for ( auto & mt : chosenServer . machine - > machineTeams ) {
if ( isMachineTeamHealthy ( * mt ) ) {
2019-02-19 10:40:52 +08:00
healthyMachineTeamsForChosenServer . push_back ( mt ) ;
2018-11-30 06:27:22 +08:00
}
}
2019-02-19 10:40:52 +08:00
if ( ! healthyMachineTeamsForChosenServer . empty ( ) ) {
2019-05-11 05:01:52 +08:00
return deterministicRandom ( ) - > randomChoice ( healthyMachineTeamsForChosenServer ) ;
2018-11-30 06:27:22 +08:00
}
2018-08-30 05:40:39 +08:00
}
2018-11-30 06:27:22 +08:00
// If we cannot find a healthy machine team
TraceEvent ( " NoHealthyMachineTeamForServer " )
2021-08-13 09:52:57 +08:00
. detail ( " ServerID " , chosenServer . id )
. detail ( " MachineTeams " , chosenServer . machine - > machineTeams . size ( ) ) ;
2018-11-30 06:27:22 +08:00
return Reference < TCMachineTeamInfo > ( ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-01 07:46:32 +08:00
// A server team should always come from servers on a machine team
2018-12-01 09:40:18 +08:00
// Check if it is true
2021-08-13 09:52:57 +08:00
bool isOnSameMachineTeam ( TCTeamInfo const & team ) const {
2018-11-22 03:18:26 +08:00
std : : vector < Standalone < StringRef > > machineIDs ;
2021-08-13 09:52:57 +08:00
for ( const auto & server : team . getServers ( ) ) {
2021-03-11 02:06:03 +08:00
if ( ! server - > machine . isValid ( ) )
return false ;
2018-08-30 05:40:39 +08:00
machineIDs . push_back ( server - > machine - > machineID ) ;
}
std : : sort ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
int numExistance = 0 ;
2021-08-13 09:52:57 +08:00
for ( const auto & server : team . getServers ( ) ) {
2019-02-11 00:58:56 +08:00
for ( const auto & candidateMachineTeam : server - > machine - > machineTeams ) {
2018-08-30 05:40:39 +08:00
std : : sort ( candidateMachineTeam - > machineIDs . begin ( ) , candidateMachineTeam - > machineIDs . end ( ) ) ;
2018-11-22 05:16:52 +08:00
if ( machineIDs = = candidateMachineTeam - > machineIDs ) {
2018-08-30 05:40:39 +08:00
numExistance + + ;
break ;
}
}
}
2021-08-13 09:52:57 +08:00
return ( numExistance = = team . size ( ) ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-01 07:46:32 +08:00
// Sanity check the property of teams in unit test
// Return true if all server teams belong to machine teams
2021-08-13 09:52:57 +08:00
bool sanityCheckTeams ( ) const {
2018-11-22 03:18:26 +08:00
for ( auto & team : teams ) {
2021-08-13 09:52:57 +08:00
if ( isOnSameMachineTeam ( * team ) = = false ) {
2018-12-01 09:40:18 +08:00
return false ;
2018-08-30 05:40:39 +08:00
}
}
2018-12-01 09:40:18 +08:00
return true ;
2018-08-30 05:40:39 +08:00
}
2021-08-13 09:52:57 +08:00
int calculateHealthyServerCount ( ) const {
2019-02-06 06:18:25 +08:00
int serverCount = 0 ;
2019-02-13 05:41:18 +08:00
for ( auto i = server_info . begin ( ) ; i ! = server_info . end ( ) ; + + i ) {
2019-02-06 06:18:25 +08:00
if ( ! server_status . get ( i - > first ) . isUnhealthy ( ) ) {
+ + serverCount ;
}
}
return serverCount ;
}
2021-08-13 09:52:57 +08:00
int calculateHealthyMachineCount ( ) const {
2019-02-06 06:18:25 +08:00
int totalHealthyMachineCount = 0 ;
2019-02-13 05:41:18 +08:00
for ( auto & m : machine_info ) {
2019-02-06 06:18:25 +08:00
if ( isMachineHealthy ( m . second ) ) {
+ + totalHealthyMachineCount ;
}
}
return totalHealthyMachineCount ;
}
2021-08-13 09:52:57 +08:00
std : : pair < int64_t , int64_t > calculateMinMaxServerTeamsOnServer ( ) const {
2019-07-09 06:45:28 +08:00
int64_t minTeams = std : : numeric_limits < int64_t > : : max ( ) ;
int64_t maxTeams = 0 ;
2019-06-27 10:38:12 +08:00
for ( auto & server : server_info ) {
2019-06-29 07:01:05 +08:00
if ( server_status . get ( server . first ) . isUnhealthy ( ) ) {
2019-06-29 06:59:47 +08:00
continue ;
}
2021-03-11 02:06:03 +08:00
minTeams = std : : min ( ( int64_t ) server . second - > teams . size ( ) , minTeams ) ;
maxTeams = std : : max ( ( int64_t ) server . second - > teams . size ( ) , maxTeams ) ;
2019-06-27 04:47:45 +08:00
}
2019-07-09 06:45:28 +08:00
return std : : make_pair ( minTeams , maxTeams ) ;
2019-06-27 04:47:45 +08:00
}
2021-08-13 09:52:57 +08:00
std : : pair < int64_t , int64_t > calculateMinMaxMachineTeamsOnMachine ( ) const {
2019-07-09 06:45:28 +08:00
int64_t minTeams = std : : numeric_limits < int64_t > : : max ( ) ;
int64_t maxTeams = 0 ;
2019-06-27 04:47:45 +08:00
for ( auto & machine : machine_info ) {
2019-06-29 07:01:05 +08:00
if ( ! isMachineHealthy ( machine . second ) ) {
2019-06-29 06:59:47 +08:00
continue ;
}
2021-03-11 02:06:03 +08:00
minTeams = std : : min < int64_t > ( ( int64_t ) machine . second - > machineTeams . size ( ) , minTeams ) ;
maxTeams = std : : max < int64_t > ( ( int64_t ) machine . second - > machineTeams . size ( ) , maxTeams ) ;
2019-06-27 04:47:45 +08:00
}
2019-07-09 06:45:28 +08:00
return std : : make_pair ( minTeams , maxTeams ) ;
2019-06-27 04:47:45 +08:00
}
2019-02-13 05:41:18 +08:00
// Sanity check
2021-08-13 09:52:57 +08:00
bool isServerTeamCountCorrect ( Reference < TCMachineTeamInfo > const & mt ) const {
2019-02-13 02:59:54 +08:00
int num = 0 ;
bool ret = true ;
2019-02-13 05:41:18 +08:00
for ( auto & team : teams ) {
2019-02-13 02:59:54 +08:00
if ( team - > machineTeam - > machineIDs = = mt - > machineIDs ) {
+ + num ;
2019-02-07 15:34:56 +08:00
}
}
2019-02-22 05:18:11 +08:00
if ( num ! = mt - > serverTeams . size ( ) ) {
ret = false ;
2019-07-09 06:45:28 +08:00
TraceEvent ( SevError , " ServerTeamCountOnMachineIncorrect " )
2019-02-13 05:41:18 +08:00
. detail ( " MachineTeam " , mt - > getMachineIDsStr ( ) )
. detail ( " ServerTeamsSize " , mt - > serverTeams . size ( ) )
2019-07-05 23:54:21 +08:00
. detail ( " CountedServerTeams " , num ) ;
2019-02-07 15:34:56 +08:00
}
2019-02-13 02:59:54 +08:00
return ret ;
}
// Find the machine team with the least number of server teams
2021-08-13 09:52:57 +08:00
std : : pair < Reference < TCMachineTeamInfo > , int > getMachineTeamWithLeastProcessTeams ( ) const {
2019-02-22 05:18:11 +08:00
Reference < TCMachineTeamInfo > retMT ;
2019-02-13 02:59:54 +08:00
int minNumProcessTeams = std : : numeric_limits < int > : : max ( ) ;
2019-02-13 05:41:18 +08:00
for ( auto & mt : machineTeams ) {
2019-02-22 05:18:11 +08:00
if ( EXPENSIVE_VALIDATION ) {
2019-07-09 06:45:28 +08:00
ASSERT ( isServerTeamCountCorrect ( mt ) ) ;
2019-02-22 05:18:11 +08:00
}
2019-07-19 07:47:42 +08:00
2019-02-13 02:59:54 +08:00
if ( mt - > serverTeams . size ( ) < minNumProcessTeams ) {
minNumProcessTeams = mt - > serverTeams . size ( ) ;
2019-02-22 05:18:11 +08:00
retMT = mt ;
2019-02-07 15:34:56 +08:00
}
}
2019-02-22 05:18:11 +08:00
return std : : pair < Reference < TCMachineTeamInfo > , int > ( retMT , minNumProcessTeams ) ;
2019-02-07 15:34:56 +08:00
}
2019-07-16 02:48:42 +08:00
// Find the machine team whose members are on the most number of machine teams, same logic as serverTeamRemover
2021-08-13 09:52:57 +08:00
std : : pair < Reference < TCMachineTeamInfo > , int > getMachineTeamWithMostMachineTeams ( ) const {
2019-07-16 02:48:42 +08:00
Reference < TCMachineTeamInfo > retMT ;
int maxNumMachineTeams = 0 ;
2019-07-16 09:40:14 +08:00
int targetMachineTeamNumPerMachine =
( SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * ( configuration . storageTeamSize + 1 ) ) / 2 ;
2019-07-16 02:48:42 +08:00
for ( auto & mt : machineTeams ) {
// The representative team number for the machine team mt is
// the minimum number of machine teams of a machine in the team mt
int representNumMachineTeams = std : : numeric_limits < int > : : max ( ) ;
for ( auto & m : mt - > machines ) {
representNumMachineTeams = std : : min < int > ( representNumMachineTeams , m - > machineTeams . size ( ) ) ;
}
2019-07-16 09:40:14 +08:00
if ( representNumMachineTeams > targetMachineTeamNumPerMachine & &
representNumMachineTeams > maxNumMachineTeams ) {
2019-07-16 02:48:42 +08:00
maxNumMachineTeams = representNumMachineTeams ;
retMT = mt ;
}
}
return std : : pair < Reference < TCMachineTeamInfo > , int > ( retMT , maxNumMachineTeams ) ;
}
2019-07-03 06:58:31 +08:00
// Find the server team whose members are on the most number of server teams
2021-08-13 09:52:57 +08:00
std : : pair < Reference < TCTeamInfo > , int > getServerTeamWithMostProcessTeams ( ) const {
2019-07-03 06:58:31 +08:00
Reference < TCTeamInfo > retST ;
int maxNumProcessTeams = 0 ;
2019-07-16 09:17:46 +08:00
int targetTeamNumPerServer = ( SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * ( configuration . storageTeamSize + 1 ) ) / 2 ;
2019-07-03 06:58:31 +08:00
for ( auto & t : teams ) {
2019-07-16 02:48:42 +08:00
// The minimum number of teams of a server in a team is the representative team number for the team t
2019-07-09 08:04:27 +08:00
int representNumProcessTeams = std : : numeric_limits < int > : : max ( ) ;
2019-07-03 06:58:31 +08:00
for ( auto & server : t - > getServers ( ) ) {
2019-07-09 08:04:27 +08:00
representNumProcessTeams = std : : min < int > ( representNumProcessTeams , server - > teams . size ( ) ) ;
2019-07-03 06:58:31 +08:00
}
2019-07-16 09:17:46 +08:00
// We only remove the team whose representNumProcessTeams is larger than the targetTeamNumPerServer number
2019-07-16 09:40:14 +08:00
// otherwise, teamBuilder will build the to-be-removed team again
2019-07-16 09:17:46 +08:00
if ( representNumProcessTeams > targetTeamNumPerServer & & representNumProcessTeams > maxNumProcessTeams ) {
2019-07-09 07:40:35 +08:00
maxNumProcessTeams = representNumProcessTeams ;
2019-07-03 06:58:31 +08:00
retST = t ;
}
}
return std : : pair < Reference < TCTeamInfo > , int > ( retST , maxNumProcessTeams ) ;
}
2021-08-13 09:52:57 +08:00
int getHealthyMachineTeamCount ( ) const {
2019-02-13 05:41:18 +08:00
int healthyTeamCount = 0 ;
2021-08-13 09:52:57 +08:00
for ( const auto & mt : machineTeams ) {
ASSERT ( mt - > machines . size ( ) = = configuration . storageTeamSize ) ;
2019-02-07 15:34:56 +08:00
if ( isMachineTeamHealthy ( * mt ) ) {
2019-02-08 03:25:01 +08:00
+ + healthyTeamCount ;
2019-02-07 15:34:56 +08:00
}
}
2019-02-08 03:25:01 +08:00
return healthyTeamCount ;
2019-02-13 05:41:18 +08:00
}
2019-02-07 15:34:56 +08:00
2019-07-16 02:48:42 +08:00
// Each machine is expected to have targetMachineTeamNumPerMachine
2019-07-13 10:11:40 +08:00
// Return true if there exists a machine that does not have enough teams.
2021-08-13 09:52:57 +08:00
bool notEnoughMachineTeamsForAMachine ( ) const {
2019-07-16 09:40:14 +08:00
// If we want to remove the machine team with most machine teams, we use the same logic as
// notEnoughTeamsForAServer
int targetMachineTeamNumPerMachine =
SERVER_KNOBS - > TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS
? ( SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * ( configuration . storageTeamSize + 1 ) ) / 2
: SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER ;
2019-06-27 10:38:12 +08:00
for ( auto & m : machine_info ) {
2019-07-16 02:48:42 +08:00
// If SERVER_KNOBS->TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS is false,
2019-07-13 10:11:40 +08:00
// The desired machine team number is not the same with the desired server team number
// in notEnoughTeamsForAServer() below, because the machineTeamRemover() does not
// remove a machine team with the most number of machine teams.
2019-09-13 05:18:10 +08:00
if ( m . second - > machineTeams . size ( ) < targetMachineTeamNumPerMachine & & isMachineHealthy ( m . second ) ) {
2019-07-13 10:11:40 +08:00
return true ;
}
2019-06-27 06:31:05 +08:00
}
2019-07-13 10:11:40 +08:00
return false ;
2019-06-27 06:31:05 +08:00
}
2019-07-13 10:11:40 +08:00
// Each server is expected to have targetTeamNumPerServer teams.
// Return true if there exists a server that does not have enough teams.
2021-08-13 09:52:57 +08:00
bool notEnoughTeamsForAServer ( ) const {
2019-07-13 09:39:01 +08:00
// We build more teams than we finally want so that we can use serverTeamRemover() actor to remove the teams
// whose member belong to too many teams. This allows us to get a more balanced number of teams per server.
2019-07-13 10:11:40 +08:00
// We want to ensure every server has targetTeamNumPerServer teams.
2019-07-13 09:39:01 +08:00
// The numTeamsPerServerFactor is calculated as
// (SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER + ideal_num_of_teams_per_server) / 2
// ideal_num_of_teams_per_server is (#teams * storageTeamSize) / #servers, which is
// (#servers * DESIRED_TEAMS_PER_SERVER * storageTeamSize) / #servers.
int targetTeamNumPerServer = ( SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * ( configuration . storageTeamSize + 1 ) ) / 2 ;
ASSERT ( targetTeamNumPerServer > 0 ) ;
2019-06-27 10:38:12 +08:00
for ( auto & s : server_info ) {
2019-09-13 05:18:10 +08:00
if ( s . second - > teams . size ( ) < targetTeamNumPerServer & & ! server_status . get ( s . first ) . isUnhealthy ( ) ) {
2019-07-13 10:11:40 +08:00
return true ;
}
2019-06-27 06:31:05 +08:00
}
2019-07-13 10:11:40 +08:00
return false ;
2019-06-27 06:31:05 +08:00
}
2018-11-01 07:46:32 +08:00
// Create server teams based on machine teams
// Before the number of machine teams reaches the threshold, build a machine team for each server team
// When it reaches the threshold, first try to build a server team with existing machine teams; if failed,
// build an extra machine team and record the event in trace
2019-07-16 10:29:06 +08:00
int addTeamsBestOf ( int teamsToBuild , int desiredTeams , int maxTeams ) {
2019-06-28 06:06:17 +08:00
ASSERT ( teamsToBuild > = 0 ) ;
2018-12-13 10:29:03 +08:00
ASSERT_WE_THINK ( machine_info . size ( ) > 0 | | server_info . size ( ) = = 0 ) ;
2019-07-10 05:28:39 +08:00
ASSERT_WE_THINK ( SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER > = 1 & & configuration . storageTeamSize > = 1 ) ;
2018-08-30 05:40:39 +08:00
int addedMachineTeams = 0 ;
int addedTeams = 0 ;
2018-11-30 06:27:22 +08:00
// Exclude machine teams who have members in the wrong configuration.
// When we change configuration, we may have machine teams with storageTeamSize in the old configuration.
2019-02-15 12:53:24 +08:00
int healthyMachineTeamCount = getHealthyMachineTeamCount ( ) ;
int totalMachineTeamCount = machineTeams . size ( ) ;
2019-02-06 06:18:25 +08:00
int totalHealthyMachineCount = calculateHealthyMachineCount ( ) ;
2018-11-30 06:27:22 +08:00
2018-12-01 09:40:18 +08:00
int desiredMachineTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * totalHealthyMachineCount ;
int maxMachineTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * totalHealthyMachineCount ;
2018-11-30 06:27:22 +08:00
// machineTeamsToBuild mimics how the teamsToBuild is calculated in buildTeams()
2019-06-29 08:42:24 +08:00
int machineTeamsToBuild = std : : max (
0 , std : : min ( desiredMachineTeams - healthyMachineTeamCount , maxMachineTeams - totalMachineTeamCount ) ) ;
2018-11-28 10:06:34 +08:00
2018-12-07 03:26:30 +08:00
TraceEvent ( " BuildMachineTeams " )
. detail ( " TotalHealthyMachine " , totalHealthyMachineCount )
2018-12-06 06:36:45 +08:00
. detail ( " HealthyMachineTeamCount " , healthyMachineTeamCount )
. detail ( " DesiredMachineTeams " , desiredMachineTeams )
. detail ( " MaxMachineTeams " , maxMachineTeams )
. detail ( " MachineTeamsToBuild " , machineTeamsToBuild ) ;
2018-11-28 10:06:34 +08:00
// Pre-build all machine teams until we have the desired number of machine teams
2019-07-13 10:11:40 +08:00
if ( machineTeamsToBuild > 0 | | notEnoughMachineTeamsForAMachine ( ) ) {
2018-11-28 10:06:34 +08:00
addedMachineTeams = addBestMachineTeams ( machineTeamsToBuild ) ;
}
2018-08-30 05:40:39 +08:00
2019-07-13 10:11:40 +08:00
while ( addedTeams < teamsToBuild | | notEnoughTeamsForAServer ( ) ) {
2018-11-22 03:18:26 +08:00
// Step 1: Create 1 best machine team
2018-08-30 05:40:39 +08:00
std : : vector < UID > bestServerTeam ;
int bestScore = std : : numeric_limits < int > : : max ( ) ;
2018-11-22 03:18:26 +08:00
int maxAttempts = SERVER_KNOBS - > BEST_OF_AMT ; // BEST_OF_AMT = 4
2019-08-13 01:08:12 +08:00
bool earlyQuitBuild = false ;
2018-11-22 03:18:26 +08:00
for ( int i = 0 ; i < maxAttempts & & i < 100 ; + + i ) {
2018-11-28 10:06:34 +08:00
// Step 2: Choose 1 least used server and then choose 1 least used machine team from the server
Reference < TCServerInfo > chosenServer = findOneLeastUsedServer ( ) ;
2019-08-13 01:08:12 +08:00
if ( ! chosenServer . isValid ( ) ) {
TraceEvent ( SevWarn , " NoValidServer " ) . detail ( " Primary " , primary ) ;
earlyQuitBuild = true ;
break ;
}
2018-11-29 07:57:51 +08:00
// Note: To avoid creating correlation of picked machine teams, we simply choose a random machine team
// instead of choosing the least used machine team.
// The correlation happens, for example, when we add two new machines, we may always choose the machine
// team with these two new machines because they are typically less used.
2021-08-13 09:52:57 +08:00
Reference < TCMachineTeamInfo > chosenMachineTeam = findOneRandomMachineTeam ( * chosenServer ) ;
2018-11-29 07:57:51 +08:00
2018-11-22 03:18:26 +08:00
if ( ! chosenMachineTeam . isValid ( ) ) {
2018-12-01 09:40:18 +08:00
// We may face the situation that temporarily we have no healthy machine.
2018-11-22 03:18:26 +08:00
TraceEvent ( SevWarn , " MachineTeamNotFound " )
. detail ( " Primary " , primary )
2019-07-05 23:54:21 +08:00
. detail ( " MachineTeams " , machineTeams . size ( ) ) ;
2018-11-30 06:27:22 +08:00
continue ; // try randomly to find another least used server
2018-08-30 05:40:39 +08:00
}
2018-11-30 06:27:22 +08:00
// From here, chosenMachineTeam must have a healthy server team
2018-11-22 03:18:26 +08:00
// Step 3: Randomly pick 1 server from each machine in the chosen machine team to form a server team
2018-08-30 05:40:39 +08:00
vector < UID > serverTeam ;
2018-12-01 09:40:18 +08:00
int chosenServerCount = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto & machine : chosenMachineTeam - > machines ) {
2018-11-28 10:06:34 +08:00
UID serverID ;
2018-12-01 09:40:18 +08:00
if ( machine = = chosenServer - > machine ) {
2018-11-28 10:06:34 +08:00
serverID = chosenServer - > id ;
2018-12-01 09:40:18 +08:00
+ + chosenServerCount ;
2018-11-28 10:06:34 +08:00
} else {
2018-12-01 09:40:18 +08:00
std : : vector < Reference < TCServerInfo > > healthyProcesses ;
2018-12-07 03:26:30 +08:00
for ( auto it : machine - > serversOnMachine ) {
if ( ! server_status . get ( it - > id ) . isUnhealthy ( ) ) {
2018-12-01 09:40:18 +08:00
healthyProcesses . push_back ( it ) ;
}
}
2019-05-11 05:01:52 +08:00
serverID = deterministicRandom ( ) - > randomChoice ( healthyProcesses ) - > id ;
2018-11-28 10:06:34 +08:00
}
serverTeam . push_back ( serverID ) ;
2018-08-30 05:40:39 +08:00
}
2018-12-01 09:40:18 +08:00
ASSERT ( chosenServerCount = = 1 ) ; // chosenServer should be used exactly once
2018-11-30 06:27:22 +08:00
ASSERT ( serverTeam . size ( ) = = configuration . storageTeamSize ) ;
2018-11-29 07:57:51 +08:00
2018-08-30 05:40:39 +08:00
std : : sort ( serverTeam . begin ( ) , serverTeam . end ( ) ) ;
2019-07-29 14:44:23 +08:00
int overlap = overlappingMembers ( serverTeam ) ;
if ( overlap = = serverTeam . size ( ) ) {
2018-08-30 05:40:39 +08:00
maxAttempts + = 1 ;
continue ;
}
2018-11-29 07:57:51 +08:00
2018-11-22 03:18:26 +08:00
// Pick the server team with smallest score in all attempts
2019-07-16 08:32:49 +08:00
// If we use different metric here, DD may oscillate infinitely in creating and removing teams.
2018-11-22 05:16:52 +08:00
// SOMEDAY: Improve the code efficiency by using reservoir algorithm
2021-03-11 02:06:03 +08:00
int score = SERVER_KNOBS - > DD_OVERLAP_PENALTY * overlap ;
2018-11-22 03:18:26 +08:00
for ( auto & server : serverTeam ) {
2018-08-30 05:40:39 +08:00
score + = server_info [ server ] - > teams . size ( ) ;
}
2020-08-11 12:34:47 +08:00
TraceEvent ( SevDebug , " BuildServerTeams " )
2019-02-19 13:41:36 +08:00
. detail ( " Score " , score )
. detail ( " BestScore " , bestScore )
. detail ( " TeamSize " , serverTeam . size ( ) )
. detail ( " StorageTeamSize " , configuration . storageTeamSize ) ;
2018-11-22 03:18:26 +08:00
if ( score < bestScore ) {
2018-08-30 05:40:39 +08:00
bestScore = score ;
2018-11-22 05:16:52 +08:00
bestServerTeam = serverTeam ;
2018-08-30 05:40:39 +08:00
}
}
2019-08-13 01:08:12 +08:00
if ( earlyQuitBuild ) {
break ;
}
2018-12-01 09:40:18 +08:00
if ( bestServerTeam . size ( ) ! = configuration . storageTeamSize ) {
// Not find any team and will unlikely find a team
2019-07-29 10:31:21 +08:00
lastBuildTeamsFailed = true ;
2018-12-01 09:40:18 +08:00
break ;
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
// Step 4: Add the server team
2018-11-30 06:27:22 +08:00
addTeam ( bestServerTeam . begin ( ) , bestServerTeam . end ( ) , false ) ;
addedTeams + + ;
2018-08-30 05:40:39 +08:00
}
2018-12-01 09:40:18 +08:00
2019-02-08 03:25:01 +08:00
healthyMachineTeamCount = getHealthyMachineTeamCount ( ) ;
2019-02-06 06:18:25 +08:00
2019-07-09 06:45:28 +08:00
std : : pair < uint64_t , uint64_t > minMaxTeamsOnServer = calculateMinMaxServerTeamsOnServer ( ) ;
std : : pair < uint64_t , uint64_t > minMaxMachineTeamsOnMachine = calculateMinMaxMachineTeamsOnMachine ( ) ;
2019-06-27 04:47:45 +08:00
2019-02-16 09:03:40 +08:00
TraceEvent ( " TeamCollectionInfo " , distributorId )
2018-11-22 03:18:26 +08:00
. detail ( " Primary " , primary )
2019-07-05 23:54:21 +08:00
. detail ( " AddedTeams " , addedTeams )
. detail ( " TeamsToBuild " , teamsToBuild )
2021-03-09 07:20:50 +08:00
. detail ( " CurrentServerTeams " , teams . size ( ) )
2019-07-09 06:45:28 +08:00
. detail ( " DesiredTeams " , desiredTeams )
. detail ( " MaxTeams " , maxTeams )
2018-11-22 03:18:26 +08:00
. detail ( " StorageTeamSize " , configuration . storageTeamSize )
2019-07-05 23:54:21 +08:00
. detail ( " CurrentMachineTeams " , machineTeams . size ( ) )
. detail ( " CurrentHealthyMachineTeams " , healthyMachineTeamCount )
2019-02-05 09:35:07 +08:00
. detail ( " DesiredMachineTeams " , desiredMachineTeams )
. detail ( " MaxMachineTeams " , maxMachineTeams )
2019-07-05 23:54:21 +08:00
. detail ( " TotalHealthyMachines " , totalHealthyMachineCount )
2019-07-09 06:45:28 +08:00
. detail ( " MinTeamsOnServer " , minMaxTeamsOnServer . first )
. detail ( " MaxTeamsOnServer " , minMaxTeamsOnServer . second )
. detail ( " MinMachineTeamsOnMachine " , minMaxMachineTeamsOnMachine . first )
. detail ( " MaxMachineTeamsOnMachine " , minMaxMachineTeamsOnMachine . second )
2019-06-28 05:15:51 +08:00
. detail ( " DoBuildTeams " , doBuildTeams )
2019-02-13 05:41:18 +08:00
. trackLatest ( " TeamCollectionInfo " ) ;
2018-08-30 05:40:39 +08:00
2017-05-26 04:48:44 +08:00
return addedTeams ;
}
2019-02-06 06:18:25 +08:00
// Check if the number of server (and machine teams) is larger than the maximum allowed number
2021-08-13 09:52:57 +08:00
void traceTeamCollectionInfo ( ) const {
2019-02-06 06:18:25 +08:00
int totalHealthyServerCount = calculateHealthyServerCount ( ) ;
2019-02-13 05:41:18 +08:00
int desiredServerTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * totalHealthyServerCount ;
int maxServerTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * totalHealthyServerCount ;
2019-02-06 06:18:25 +08:00
int totalHealthyMachineCount = calculateHealthyMachineCount ( ) ;
2019-02-13 02:59:54 +08:00
int desiredMachineTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * totalHealthyMachineCount ;
int maxMachineTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * totalHealthyMachineCount ;
2019-02-07 15:34:56 +08:00
int healthyMachineTeamCount = getHealthyMachineTeamCount ( ) ;
2019-02-06 06:18:25 +08:00
2019-07-09 06:45:28 +08:00
std : : pair < uint64_t , uint64_t > minMaxTeamsOnServer = calculateMinMaxServerTeamsOnServer ( ) ;
std : : pair < uint64_t , uint64_t > minMaxMachineTeamsOnMachine = calculateMinMaxMachineTeamsOnMachine ( ) ;
2019-06-27 04:47:45 +08:00
2019-02-16 09:03:40 +08:00
TraceEvent ( " TeamCollectionInfo " , distributorId )
2019-02-06 06:18:25 +08:00
. detail ( " Primary " , primary )
2019-07-05 23:54:21 +08:00
. detail ( " AddedTeams " , 0 )
. detail ( " TeamsToBuild " , 0 )
2020-10-22 02:10:14 +08:00
. detail ( " CurrentServerTeams " , teams . size ( ) )
2019-07-05 23:54:21 +08:00
. detail ( " DesiredTeams " , desiredServerTeams )
. detail ( " MaxTeams " , maxServerTeams )
2019-02-06 06:18:25 +08:00
. detail ( " StorageTeamSize " , configuration . storageTeamSize )
2019-07-05 23:54:21 +08:00
. detail ( " CurrentMachineTeams " , machineTeams . size ( ) )
. detail ( " CurrentHealthyMachineTeams " , healthyMachineTeamCount )
2019-02-06 06:18:25 +08:00
. detail ( " DesiredMachineTeams " , desiredMachineTeams )
. detail ( " MaxMachineTeams " , maxMachineTeams )
2019-07-05 23:54:21 +08:00
. detail ( " TotalHealthyMachines " , totalHealthyMachineCount )
2019-07-09 06:45:28 +08:00
. detail ( " MinTeamsOnServer " , minMaxTeamsOnServer . first )
. detail ( " MaxTeamsOnServer " , minMaxTeamsOnServer . second )
. detail ( " MinMachineTeamsOnMachine " , minMaxMachineTeamsOnMachine . first )
. detail ( " MaxMachineTeamsOnMachine " , minMaxMachineTeamsOnMachine . second )
2019-07-02 07:37:10 +08:00
. detail ( " DoBuildTeams " , doBuildTeams )
2019-02-13 05:41:18 +08:00
. trackLatest ( " TeamCollectionInfo " ) ;
2019-02-06 06:18:25 +08:00
2019-06-28 09:24:18 +08:00
// Advance time so that we will not have multiple TeamCollectionInfo at the same time, otherwise
// simulation test will randomly pick one TeamCollectionInfo trace, which could be the one before build teams
2019-06-29 08:42:24 +08:00
// wait(delay(0.01));
2019-02-13 11:10:51 +08:00
2019-02-08 03:25:01 +08:00
// Debug purpose
2019-07-02 07:37:10 +08:00
// if (healthyMachineTeamCount > desiredMachineTeams || machineTeams.size() > maxMachineTeams) {
// // When the number of machine teams is over the limit, print out the current team info.
// traceAllInfo(true);
// }
2019-02-06 06:18:25 +08:00
}
2017-05-26 04:48:44 +08:00
// Use the current set of known processes (from server_info) to compute an optimized set of storage server teams.
// The following are guarantees of the process:
// - Each newly-built team will meet the replication policy
// - All newly-built teams will have exactly teamSize machines
//
2021-03-11 02:06:03 +08:00
// buildTeams() only ever adds teams to the list of teams. Teams are only removed from the list when all data has
// been removed.
2017-05-26 04:48:44 +08:00
//
// buildTeams will not count teams larger than teamSize against the desired teams.
2021-03-11 02:06:03 +08:00
ACTOR static Future < Void > buildTeams ( DDTeamCollection * self ) {
2017-05-26 04:48:44 +08:00
state int desiredTeams ;
int serverCount = 0 ;
int uniqueMachines = 0 ;
std : : set < Optional < Standalone < StringRef > > > machines ;
2018-11-22 03:18:26 +08:00
for ( auto i = self - > server_info . begin ( ) ; i ! = self - > server_info . end ( ) ; + + i ) {
2018-06-08 05:05:53 +08:00
if ( ! self - > server_status . get ( i - > first ) . isUnhealthy ( ) ) {
2017-05-26 04:48:44 +08:00
+ + serverCount ;
LocalityData & serverLocation = i - > second - > lastKnownInterface . locality ;
2021-03-11 02:06:03 +08:00
machines . insert ( serverLocation . zoneId ( ) ) ;
2017-05-26 04:48:44 +08:00
}
}
uniqueMachines = machines . size ( ) ;
2020-10-22 02:10:14 +08:00
TraceEvent ( " BuildTeams " , self - > distributorId )
. detail ( " ServerCount " , self - > server_info . size ( ) )
. detail ( " UniqueMachines " , uniqueMachines )
. detail ( " Primary " , self - > primary )
. detail ( " StorageTeamSize " , self - > configuration . storageTeamSize ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
// If there are too few machines to even build teams or there are too few represented datacenters, build no new
// teams
if ( uniqueMachines > = self - > configuration . storageTeamSize ) {
2019-07-09 07:40:35 +08:00
desiredTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * serverCount ;
int maxTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * serverCount ;
2017-05-26 04:48:44 +08:00
2018-12-07 03:26:30 +08:00
// Exclude teams who have members in the wrong configuration, since we don't want these teams
2017-05-26 04:48:44 +08:00
int teamCount = 0 ;
2018-07-08 07:51:16 +08:00
int totalTeamCount = 0 ;
2018-12-07 03:26:30 +08:00
for ( int i = 0 ; i < self - > teams . size ( ) ; + + i ) {
if ( ! self - > teams [ i ] - > isWrongConfiguration ( ) ) {
2021-03-11 02:06:03 +08:00
if ( self - > teams [ i ] - > isHealthy ( ) ) {
2018-07-08 07:51:16 +08:00
teamCount + + ;
}
totalTeamCount + + ;
2017-05-26 04:48:44 +08:00
}
}
2018-11-30 06:27:22 +08:00
// teamsToBuild is calculated such that we will not build too many teams in the situation
// when all (or most of) teams become unhealthy temporarily and then healthy again
2019-06-28 06:06:17 +08:00
state int teamsToBuild = std : : max ( 0 , std : : min ( desiredTeams - teamCount , maxTeams - totalTeamCount ) ) ;
2017-05-26 04:48:44 +08:00
2019-02-19 02:02:40 +08:00
TraceEvent ( " BuildTeamsBegin " , self - > distributorId )
2019-02-19 13:41:36 +08:00
. detail ( " TeamsToBuild " , teamsToBuild )
. detail ( " DesiredTeams " , desiredTeams )
. detail ( " MaxTeams " , maxTeams )
2020-10-22 02:10:14 +08:00
. detail ( " BadServerTeams " , self - > badTeams . size ( ) )
2019-02-19 13:41:36 +08:00
. detail ( " UniqueMachines " , uniqueMachines )
. detail ( " TeamSize " , self - > configuration . storageTeamSize )
. detail ( " Servers " , serverCount )
2020-10-22 02:10:14 +08:00
. detail ( " CurrentTrackedServerTeams " , self - > teams . size ( ) )
2019-02-19 13:41:36 +08:00
. detail ( " HealthyTeamCount " , teamCount )
. detail ( " TotalTeamCount " , totalTeamCount )
. detail ( " MachineTeamCount " , self - > machineTeams . size ( ) )
. detail ( " MachineCount " , self - > machine_info . size ( ) )
. detail ( " DesiredTeamsPerServer " , SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER ) ;
2019-02-19 02:02:40 +08:00
2019-07-29 10:31:21 +08:00
self - > lastBuildTeamsFailed = false ;
2019-07-13 10:11:40 +08:00
if ( teamsToBuild > 0 | | self - > notEnoughTeamsForAServer ( ) ) {
2017-05-26 04:48:44 +08:00
state vector < std : : vector < UID > > builtTeams ;
2019-02-13 02:59:54 +08:00
// addTeamsBestOf() will not add more teams than needed.
// If the team number is more than the desired, the extra teams are added in the code path when
// a team is added as an initial team
2019-02-05 09:35:07 +08:00
int addedTeams = self - > addTeamsBestOf ( teamsToBuild , desiredTeams , maxTeams ) ;
2019-02-16 09:03:40 +08:00
2018-11-22 03:18:26 +08:00
if ( addedTeams < = 0 & & self - > teams . size ( ) = = 0 ) {
2020-10-22 02:10:14 +08:00
TraceEvent ( SevWarn , " NoTeamAfterBuildTeam " , self - > distributorId )
2020-10-22 07:52:17 +08:00
. detail ( " ServerTeamNum " , self - > teams . size ( ) )
2020-10-22 02:10:14 +08:00
. detail ( " Debug " , " Check information below " ) ;
2018-12-01 09:40:18 +08:00
// Debug: set true for traceAllInfo() to print out more information
self - > traceAllInfo ( ) ;
2017-05-26 04:48:44 +08:00
}
2019-02-06 06:18:25 +08:00
} else {
int totalHealthyMachineCount = self - > calculateHealthyMachineCount ( ) ;
2019-02-13 02:59:54 +08:00
int desiredMachineTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * totalHealthyMachineCount ;
int maxMachineTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * totalHealthyMachineCount ;
2019-02-07 15:34:56 +08:00
int healthyMachineTeamCount = self - > getHealthyMachineTeamCount ( ) ;
2019-02-06 06:18:25 +08:00
2019-07-09 06:45:28 +08:00
std : : pair < uint64_t , uint64_t > minMaxTeamsOnServer = self - > calculateMinMaxServerTeamsOnServer ( ) ;
2021-03-11 02:06:03 +08:00
std : : pair < uint64_t , uint64_t > minMaxMachineTeamsOnMachine =
self - > calculateMinMaxMachineTeamsOnMachine ( ) ;
2019-06-27 04:47:45 +08:00
2019-02-16 09:03:40 +08:00
TraceEvent ( " TeamCollectionInfo " , self - > distributorId )
2019-02-13 05:41:18 +08:00
. detail ( " Primary " , self - > primary )
2019-07-05 23:54:21 +08:00
. detail ( " AddedTeams " , 0 )
. detail ( " TeamsToBuild " , teamsToBuild )
2020-10-22 02:10:14 +08:00
. detail ( " CurrentServerTeams " , self - > teams . size ( ) )
2019-07-05 23:54:21 +08:00
. detail ( " DesiredTeams " , desiredTeams )
. detail ( " MaxTeams " , maxTeams )
2019-02-13 05:41:18 +08:00
. detail ( " StorageTeamSize " , self - > configuration . storageTeamSize )
2019-07-05 23:54:21 +08:00
. detail ( " CurrentMachineTeams " , self - > machineTeams . size ( ) )
. detail ( " CurrentHealthyMachineTeams " , healthyMachineTeamCount )
2019-02-13 05:41:18 +08:00
. detail ( " DesiredMachineTeams " , desiredMachineTeams )
. detail ( " MaxMachineTeams " , maxMachineTeams )
2019-07-05 23:54:21 +08:00
. detail ( " TotalHealthyMachines " , totalHealthyMachineCount )
2019-07-09 06:45:28 +08:00
. detail ( " MinTeamsOnServer " , minMaxTeamsOnServer . first )
. detail ( " MaxTeamsOnServer " , minMaxTeamsOnServer . second )
. detail ( " MinMachineTeamsOnMachine " , minMaxMachineTeamsOnMachine . first )
. detail ( " MaxMachineTeamsOnMachine " , minMaxMachineTeamsOnMachine . second )
2019-06-28 05:15:51 +08:00
. detail ( " DoBuildTeams " , self - > doBuildTeams )
2019-02-13 05:41:18 +08:00
. trackLatest ( " TeamCollectionInfo " ) ;
2017-05-26 04:48:44 +08:00
}
2020-02-20 06:13:27 +08:00
} else {
self - > lastBuildTeamsFailed = true ;
2017-05-26 04:48:44 +08:00
}
self - > evaluateTeamQuality ( ) ;
2021-03-11 02:06:03 +08:00
// Building teams can cause servers to become undesired, which can make teams unhealthy.
// Let all of these changes get worked out before responding to the get team request
wait ( delay ( 0 , TaskPriority : : DataDistributionLaunch ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
2021-08-13 09:52:57 +08:00
void noHealthyTeams ( ) const {
2017-05-26 04:48:44 +08:00
std : : set < UID > desiredServerSet ;
std : : string desc ;
2018-11-22 03:18:26 +08:00
for ( auto i = server_info . begin ( ) ; i ! = server_info . end ( ) ; + + i ) {
2017-05-26 04:48:44 +08:00
ASSERT ( i - > first = = i - > second - > id ) ;
if ( ! server_status . get ( i - > first ) . isFailed ) {
desiredServerSet . insert ( i - > first ) ;
desc + = i - > first . shortString ( ) + " ( " + i - > second - > lastKnownInterface . toString ( ) + " ), " ;
}
}
2018-12-14 05:31:37 +08:00
TraceEvent ( SevWarn , " NoHealthyTeams " , distributorId )
2020-10-22 02:10:14 +08:00
. detail ( " CurrentServerTeamCount " , teams . size ( ) )
. detail ( " ServerCount " , server_info . size ( ) )
. detail ( " NonFailedServerCount " , desiredServerSet . size ( ) ) ;
2017-05-26 04:48:44 +08:00
}
2021-08-13 09:52:57 +08:00
bool shouldHandleServer ( const StorageServerInterface & newServer ) const {
2018-11-22 03:18:26 +08:00
return ( includedDCs . empty ( ) | |
std : : find ( includedDCs . begin ( ) , includedDCs . end ( ) , newServer . locality . dcId ( ) ) ! = includedDCs . end ( ) | |
2021-03-11 02:06:03 +08:00
( otherTrackedDCs . present ( ) & &
std : : find ( otherTrackedDCs . get ( ) . begin ( ) , otherTrackedDCs . get ( ) . end ( ) , newServer . locality . dcId ( ) ) = =
otherTrackedDCs . get ( ) . end ( ) ) ) ;
2017-10-11 01:36:33 +08:00
}
2021-03-11 02:06:03 +08:00
void addServer ( StorageServerInterface newServer ,
ProcessClass processClass ,
Promise < Void > errorOut ,
Version addedVersion ,
const DDEnabledState * ddEnabledState ) {
2017-10-11 01:36:33 +08:00
if ( ! shouldHandleServer ( newServer ) ) {
return ;
}
2017-05-26 04:48:44 +08:00
2021-05-13 02:53:20 +08:00
if ( ! newServer . isTss ( ) ) {
2021-03-06 03:28:15 +08:00
allServers . push_back ( newServer . id ( ) ) ;
}
2021-05-13 02:53:20 +08:00
TraceEvent ( newServer . isTss ( ) ? " AddedTSS " : " AddedStorageServer " , distributorId )
2021-03-11 02:06:03 +08:00
. detail ( " ServerID " , newServer . id ( ) )
. detail ( " ProcessClass " , processClass . toString ( ) )
. detail ( " WaitFailureToken " , newServer . waitFailure . getEndpoint ( ) . token )
. detail ( " Address " , newServer . waitFailure . getEndpoint ( ) . getPrimaryAddress ( ) ) ;
2021-03-06 03:28:15 +08:00
auto & r = server_and_tss_info [ newServer . id ( ) ] = makeReference < TCServerInfo > (
2021-03-11 02:06:03 +08:00
newServer ,
this ,
processClass ,
2020-11-07 15:50:55 +08:00
includedDCs . empty ( ) | |
std : : find ( includedDCs . begin ( ) , includedDCs . end ( ) , newServer . locality . dcId ( ) ) ! = includedDCs . end ( ) ,
2021-05-20 11:32:15 +08:00
storageServerSet ,
addedVersion ) ;
2018-08-30 05:40:39 +08:00
2021-05-13 02:53:20 +08:00
if ( newServer . isTss ( ) ) {
tss_info_by_pair [ newServer . tssPairID . get ( ) ] = r ;
2018-08-30 05:40:39 +08:00
2021-05-13 02:53:20 +08:00
if ( server_info . count ( newServer . tssPairID . get ( ) ) ) {
r - > onTSSPairRemoved = server_info [ newServer . tssPairID . get ( ) ] - > onRemoved ;
2021-03-06 03:28:15 +08:00
}
} else {
server_info [ newServer . id ( ) ] = r ;
// Establish the relation between server and machine
checkAndCreateMachine ( r ) ;
2021-06-02 14:12:45 +08:00
// Add storage server to pid map
ASSERT ( r - > lastKnownInterface . locality . processId ( ) . present ( ) ) ;
StringRef pid = r - > lastKnownInterface . locality . processId ( ) . get ( ) ;
pid2server_info [ pid ] . push_back ( r ) ;
2021-03-06 03:28:15 +08:00
}
r - > tracker =
2021-05-13 02:53:20 +08:00
storageServerTracker ( this , cx , r . getPtr ( ) , errorOut , addedVersion , ddEnabledState , newServer . isTss ( ) ) ;
2021-03-06 03:28:15 +08:00
2021-05-13 02:53:20 +08:00
if ( ! newServer . isTss ( ) ) {
2021-03-06 03:28:15 +08:00
// link and wake up tss' tracker so it knows when this server gets removed
if ( tss_info_by_pair . count ( newServer . id ( ) ) ) {
tss_info_by_pair [ newServer . id ( ) ] - > onTSSPairRemoved = r - > onRemoved ;
if ( tss_info_by_pair [ newServer . id ( ) ] - > wakeUpTracker . canBeSet ( ) ) {
tss_info_by_pair [ newServer . id ( ) ] - > wakeUpTracker . send ( Void ( ) ) ;
}
}
doBuildTeams = true ; // Adding a new server triggers to build new teams
restartTeamBuilder . trigger ( ) ;
}
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
bool removeTeam ( Reference < TCTeamInfo > team ) {
2020-10-22 02:10:14 +08:00
TraceEvent ( " RemovedServerTeam " , distributorId ) . detail ( " Team " , team - > getDesc ( ) ) ;
2018-11-08 13:05:31 +08:00
bool found = false ;
2021-03-11 02:06:03 +08:00
for ( int t = 0 ; t < teams . size ( ) ; t + + ) {
if ( teams [ t ] = = team ) {
2018-11-08 13:05:31 +08:00
teams [ t - - ] = teams . back ( ) ;
teams . pop_back ( ) ;
found = true ;
break ;
}
}
2021-03-11 02:06:03 +08:00
for ( const auto & server : team - > getServers ( ) ) {
for ( int t = 0 ; t < server - > teams . size ( ) ; t + + ) {
if ( server - > teams [ t ] = = team ) {
2018-11-08 13:05:31 +08:00
ASSERT ( found ) ;
server - > teams [ t - - ] = server - > teams . back ( ) ;
server - > teams . pop_back ( ) ;
2019-02-13 11:10:51 +08:00
break ; // The teams on a server should never duplicate
2018-11-08 13:05:31 +08:00
}
}
}
2019-02-13 02:59:54 +08:00
// Remove the team from its machine team
bool foundInMachineTeam = false ;
for ( int t = 0 ; t < team - > machineTeam - > serverTeams . size ( ) ; + + t ) {
if ( team - > machineTeam - > serverTeams [ t ] = = team ) {
team - > machineTeam - > serverTeams [ t - - ] = team - > machineTeam - > serverTeams . back ( ) ;
team - > machineTeam - > serverTeams . pop_back ( ) ;
foundInMachineTeam = true ;
break ; // The same team is added to the serverTeams only once
}
}
2019-02-15 07:59:20 +08:00
ASSERT_WE_THINK ( foundInMachineTeam ) ;
2018-11-08 13:05:31 +08:00
team - > tracker . cancel ( ) ;
2019-07-10 02:45:17 +08:00
if ( g_network - > isSimulated ( ) ) {
2019-07-12 13:05:20 +08:00
// Update server team information for consistency check in simulation
2019-07-10 02:45:17 +08:00
traceTeamCollectionInfo ( ) ;
}
2018-11-08 13:05:31 +08:00
return found ;
}
2018-12-13 03:44:05 +08:00
// Check if the server belongs to a machine; if not, create the machine.
// Establish the two-direction link between server and machine
Reference < TCMachineInfo > checkAndCreateMachine ( Reference < TCServerInfo > server ) {
ASSERT ( server . isValid ( ) & & server_info . find ( server - > id ) ! = server_info . end ( ) ) ;
auto & locality = server - > lastKnownInterface . locality ;
Standalone < StringRef > machine_id = locality . zoneId ( ) . get ( ) ; // locality to machine_id with std::string type
Reference < TCMachineInfo > machineInfo ;
2019-07-11 10:23:45 +08:00
if ( machine_info . find ( machine_id ) = = machine_info . end ( ) ) {
// uid is the first storage server process on the machine
2020-11-11 05:01:07 +08:00
TEST ( true ) ; // First storage server in process on the machine
2018-12-13 03:44:05 +08:00
// For each machine, store the first server's localityEntry into machineInfo for later use.
LocalityEntry localityEntry = machineLocalityMap . add ( locality , & server - > id ) ;
2020-11-07 15:50:55 +08:00
machineInfo = makeReference < TCMachineInfo > ( server , localityEntry ) ;
2018-12-13 03:44:05 +08:00
machine_info . insert ( std : : make_pair ( machine_id , machineInfo ) ) ;
} else {
machineInfo = machine_info . find ( machine_id ) - > second ;
machineInfo - > serversOnMachine . push_back ( server ) ;
}
server - > machine = machineInfo ;
return machineInfo ;
}
2018-12-06 14:23:11 +08:00
// Check if the serverTeam belongs to a machine team; If not, create the machine team
2019-02-13 02:59:54 +08:00
// Note: This function may make the machine team number larger than the desired machine team number
2018-12-13 03:44:05 +08:00
Reference < TCMachineTeamInfo > checkAndCreateMachineTeam ( Reference < TCTeamInfo > serverTeam ) {
2018-12-06 14:23:11 +08:00
std : : vector < Standalone < StringRef > > machineIDs ;
2019-02-11 00:58:56 +08:00
for ( auto & server : serverTeam - > getServers ( ) ) {
2018-12-06 14:23:11 +08:00
Reference < TCMachineInfo > machine = server - > machine ;
machineIDs . push_back ( machine - > machineID ) ;
}
std : : sort ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
2018-12-13 03:44:05 +08:00
Reference < TCMachineTeamInfo > machineTeam = findMachineTeam ( machineIDs ) ;
if ( ! machineTeam . isValid ( ) ) { // Create the machine team if it does not exist
machineTeam = addMachineTeam ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
2018-12-06 14:23:11 +08:00
}
2018-12-13 03:44:05 +08:00
2019-02-13 08:18:17 +08:00
machineTeam - > serverTeams . push_back ( serverTeam ) ;
2018-12-13 03:44:05 +08:00
return machineTeam ;
2018-12-06 14:23:11 +08:00
}
// Remove the removedMachineInfo machine and any related machine team
2019-11-23 02:20:13 +08:00
void removeMachine ( Reference < TCMachineInfo > removedMachineInfo ) {
2018-12-06 14:23:11 +08:00
// Find machines that share teams with the removed machine
std : : set < Standalone < StringRef > > machinesWithAjoiningTeams ;
for ( auto & machineTeam : removedMachineInfo - > machineTeams ) {
machinesWithAjoiningTeams . insert ( machineTeam - > machineIDs . begin ( ) , machineTeam - > machineIDs . end ( ) ) ;
}
machinesWithAjoiningTeams . erase ( removedMachineInfo - > machineID ) ;
// For each machine in a machine team with the removed machine,
// erase shared machine teams from the list of teams.
for ( auto it = machinesWithAjoiningTeams . begin ( ) ; it ! = machinesWithAjoiningTeams . end ( ) ; + + it ) {
auto & machineTeams = machine_info [ * it ] - > machineTeams ;
for ( int t = 0 ; t < machineTeams . size ( ) ; t + + ) {
auto & machineTeam = machineTeams [ t ] ;
2021-03-11 02:06:03 +08:00
if ( std : : count ( machineTeam - > machineIDs . begin ( ) ,
machineTeam - > machineIDs . end ( ) ,
2018-12-07 03:26:30 +08:00
removedMachineInfo - > machineID ) ) {
2018-12-06 14:23:11 +08:00
machineTeams [ t - - ] = machineTeams . back ( ) ;
machineTeams . pop_back ( ) ;
}
}
}
2019-02-19 10:40:52 +08:00
removedMachineInfo - > machineTeams . clear ( ) ;
2018-12-06 14:23:11 +08:00
// Remove global machine team that includes removedMachineInfo
for ( int t = 0 ; t < machineTeams . size ( ) ; t + + ) {
auto & machineTeam = machineTeams [ t ] ;
2021-03-11 02:06:03 +08:00
if ( std : : count (
machineTeam - > machineIDs . begin ( ) , machineTeam - > machineIDs . end ( ) , removedMachineInfo - > machineID ) ) {
2019-02-16 01:35:27 +08:00
removeMachineTeam ( machineTeam ) ;
2019-02-19 02:02:40 +08:00
// removeMachineTeam will swap the last team in machineTeams vector into [t];
// t-- to avoid skipping the element
t - - ;
2018-12-06 14:23:11 +08:00
}
}
// Remove removedMachineInfo from machine's global info
machine_info . erase ( removedMachineInfo - > machineID ) ;
2018-12-07 03:26:30 +08:00
TraceEvent ( " MachineLocalityMapUpdate " ) . detail ( " MachineUIDRemoved " , removedMachineInfo - > machineID . toString ( ) ) ;
2018-12-06 14:23:11 +08:00
2018-12-13 10:29:03 +08:00
// We do not update macineLocalityMap when a machine is removed because we will do so when we use it in
// addBestMachineTeams()
// rebuildMachineLocalityMap();
2018-12-06 14:23:11 +08:00
}
2019-02-16 01:35:27 +08:00
// Invariant: Remove a machine team only when the server teams on it has been removed
// We never actively remove a machine team.
2019-02-19 02:02:40 +08:00
// A machine team is removed when a machine is removed,
// which is caused by the event when all servers on the machine is removed.
// NOTE: When this function is called in the loop of iterating machineTeams, make sure NOT increase the index
// in the next iteration of the loop. Otherwise, you may miss checking some elements in machineTeams
2019-02-07 15:34:56 +08:00
bool removeMachineTeam ( Reference < TCMachineTeamInfo > targetMT ) {
bool foundMachineTeam = false ;
for ( int i = 0 ; i < machineTeams . size ( ) ; i + + ) {
Reference < TCMachineTeamInfo > mt = machineTeams [ i ] ;
if ( mt - > machineIDs = = targetMT - > machineIDs ) {
machineTeams [ i - - ] = machineTeams . back ( ) ;
machineTeams . pop_back ( ) ;
foundMachineTeam = true ;
break ;
}
}
2019-02-13 11:10:51 +08:00
// Remove machine team on each machine
for ( auto & machine : targetMT - > machines ) {
for ( int i = 0 ; i < machine - > machineTeams . size ( ) ; + + i ) {
if ( machine - > machineTeams [ i ] - > machineIDs = = targetMT - > machineIDs ) {
machine - > machineTeams [ i - - ] = machine - > machineTeams . back ( ) ;
machine - > machineTeams . pop_back ( ) ;
2019-02-19 15:18:15 +08:00
break ; // The machineTeams on a machine should never duplicate
2019-02-07 15:34:56 +08:00
}
}
}
2019-02-16 01:35:27 +08:00
2019-02-07 15:34:56 +08:00
return foundMachineTeam ;
}
2021-03-06 03:28:15 +08:00
void removeTSS ( UID removedServer ) {
// much simpler than remove server. tss isn't in any teams, so just remove it from data structures
TraceEvent ( " RemovedTSS " , distributorId ) . detail ( " ServerID " , removedServer ) ;
Reference < TCServerInfo > removedServerInfo = server_and_tss_info [ removedServer ] ;
2021-05-13 02:53:20 +08:00
tss_info_by_pair . erase ( removedServerInfo - > lastKnownInterface . tssPairID . get ( ) ) ;
2021-03-06 03:28:15 +08:00
server_and_tss_info . erase ( removedServer ) ;
server_status . clear ( removedServer ) ;
}
2019-11-23 02:20:13 +08:00
void removeServer ( UID removedServer ) {
2018-12-14 05:31:37 +08:00
TraceEvent ( " RemovedStorageServer " , distributorId ) . detail ( " ServerID " , removedServer ) ;
2019-02-16 09:03:40 +08:00
2017-05-26 04:48:44 +08:00
// ASSERT( !shardsAffectedByTeamFailure->getServersForTeam( t ) for all t in teams that contain removedServer )
2018-11-22 03:18:26 +08:00
Reference < TCServerInfo > removedServerInfo = server_info [ removedServer ] ;
2021-05-20 11:32:15 +08:00
// Step: Remove TCServerInfo from pid2server_info
ASSERT ( removedServerInfo - > lastKnownInterface . locality . processId ( ) . present ( ) ) ;
StringRef pid = removedServerInfo - > lastKnownInterface . locality . processId ( ) . get ( ) ;
auto & info_vec = pid2server_info [ pid ] ;
for ( size_t i = 0 ; i < info_vec . size ( ) ; + + i ) {
if ( info_vec [ i ] = = removedServerInfo ) {
info_vec [ i - - ] = info_vec . back ( ) ;
info_vec . pop_back ( ) ;
}
}
if ( info_vec . size ( ) = = 0 ) {
pid2server_info . erase ( pid ) ;
}
2017-05-26 04:48:44 +08:00
2018-08-30 05:40:39 +08:00
// Step: Remove server team that relate to removedServer
2017-05-26 04:48:44 +08:00
// Find all servers with which the removedServer shares teams
std : : set < UID > serversWithAjoiningTeams ;
2018-11-22 03:18:26 +08:00
auto & sharedTeams = removedServerInfo - > teams ;
2018-12-07 03:26:30 +08:00
for ( int i = 0 ; i < sharedTeams . size ( ) ; + + i ) {
2018-11-22 03:18:26 +08:00
auto & teamIds = sharedTeams [ i ] - > getServerIDs ( ) ;
2021-03-11 02:06:03 +08:00
serversWithAjoiningTeams . insert ( teamIds . begin ( ) , teamIds . end ( ) ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
serversWithAjoiningTeams . erase ( removedServer ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
// For each server in a team with the removedServer, erase shared teams from the list of teams in that other
// server
for ( auto it = serversWithAjoiningTeams . begin ( ) ; it ! = serversWithAjoiningTeams . end ( ) ; + + it ) {
2018-11-22 03:18:26 +08:00
auto & serverTeams = server_info [ * it ] - > teams ;
for ( int t = 0 ; t < serverTeams . size ( ) ; t + + ) {
auto & serverIds = serverTeams [ t ] - > getServerIDs ( ) ;
2021-03-11 02:06:03 +08:00
if ( std : : count ( serverIds . begin ( ) , serverIds . end ( ) , removedServer ) ) {
2018-08-30 05:40:39 +08:00
serverTeams [ t - - ] = serverTeams . back ( ) ;
serverTeams . pop_back ( ) ;
2017-05-26 04:48:44 +08:00
}
}
}
2019-02-16 01:59:27 +08:00
// Step: Remove all teams that contain removedServer
2021-03-11 02:06:03 +08:00
// SOMEDAY: can we avoid walking through all teams, since we have an index of teams in which removedServer
// participated
2019-02-16 01:59:27 +08:00
int removedCount = 0 ;
for ( int t = 0 ; t < teams . size ( ) ; t + + ) {
2021-03-11 02:06:03 +08:00
if ( std : : count ( teams [ t ] - > getServerIDs ( ) . begin ( ) , teams [ t ] - > getServerIDs ( ) . end ( ) , removedServer ) ) {
2020-10-22 02:10:14 +08:00
TraceEvent ( " ServerTeamRemoved " )
2019-02-16 01:59:27 +08:00
. detail ( " Primary " , primary )
2020-10-22 02:10:14 +08:00
. detail ( " TeamServerIDs " , teams [ t ] - > getServerIDsStr ( ) )
2020-10-24 01:06:22 +08:00
. detail ( " TeamID " , teams [ t ] - > getTeamID ( ) ) ;
2019-02-16 01:59:27 +08:00
// removeTeam also needs to remove the team from the machine team info.
removeTeam ( teams [ t ] ) ;
2019-02-22 07:54:14 +08:00
t - - ;
2019-02-16 01:59:27 +08:00
removedCount + + ;
}
}
if ( removedCount = = 0 ) {
2019-07-05 23:54:21 +08:00
TraceEvent ( SevInfo , " NoTeamsRemovedWhenServerRemoved " )
2019-02-16 01:59:27 +08:00
. detail ( " Primary " , primary )
. detail ( " Debug " , " ThisShouldRarelyHappen_CheckInfoBelow " ) ;
}
2019-11-23 02:20:13 +08:00
for ( int t = 0 ; t < badTeams . size ( ) ; t + + ) {
2021-03-11 02:06:03 +08:00
if ( std : : count ( badTeams [ t ] - > getServerIDs ( ) . begin ( ) , badTeams [ t ] - > getServerIDs ( ) . end ( ) , removedServer ) ) {
2019-11-23 02:20:13 +08:00
badTeams [ t ] - > tracker . cancel ( ) ;
badTeams [ t - - ] = badTeams . back ( ) ;
badTeams . pop_back ( ) ;
}
2019-02-16 01:59:27 +08:00
}
2018-08-30 05:40:39 +08:00
// Step: Remove machine info related to removedServer
// Remove the server from its machine
Reference < TCMachineInfo > removedMachineInfo = removedServerInfo - > machine ;
2018-11-22 03:18:26 +08:00
for ( int i = 0 ; i < removedMachineInfo - > serversOnMachine . size ( ) ; + + i ) {
if ( removedMachineInfo - > serversOnMachine [ i ] = = removedServerInfo ) {
// Safe even when removedServerInfo is the last one
2018-08-30 05:40:39 +08:00
removedMachineInfo - > serversOnMachine [ i - - ] = removedMachineInfo - > serversOnMachine . back ( ) ;
removedMachineInfo - > serversOnMachine . pop_back ( ) ;
break ;
}
}
// Remove machine if no server on it
2019-02-16 01:59:27 +08:00
// Note: Remove machine (and machine team) after server teams have been removed, because
// we remove a machine team only when the server teams on it have been removed
2018-11-22 03:18:26 +08:00
if ( removedMachineInfo - > serversOnMachine . size ( ) = = 0 ) {
2019-11-23 02:20:13 +08:00
removeMachine ( removedMachineInfo ) ;
2018-08-30 05:40:39 +08:00
}
2019-02-16 01:35:27 +08:00
2018-12-13 10:29:03 +08:00
// If the machine uses removedServer's locality and the machine still has servers, the the machine's
// representative server will be updated when it is used in addBestMachineTeams()
// Note that since we do not rebuildMachineLocalityMap() here, the machineLocalityMap can be stale.
// This is ok as long as we do not arbitrarily validate if machine team satisfies replication policy.
2018-08-30 05:40:39 +08:00
2019-08-14 08:28:10 +08:00
if ( server_info [ removedServer ] - > wrongStoreTypeToRemove . get ( ) ) {
2019-11-23 08:19:34 +08:00
if ( wrongStoreTypeRemover . isReady ( ) ) {
wrongStoreTypeRemover = removeWrongStoreType ( this ) ;
addActor . send ( wrongStoreTypeRemover ) ;
2019-08-17 01:48:50 +08:00
}
2019-08-14 08:28:10 +08:00
}
2018-08-30 05:40:39 +08:00
// Step: Remove removedServer from server's global data
2018-11-22 03:18:26 +08:00
for ( int s = 0 ; s < allServers . size ( ) ; s + + ) {
2017-05-26 04:48:44 +08:00
if ( allServers [ s ] = = removedServer ) {
allServers [ s - - ] = allServers . back ( ) ;
allServers . pop_back ( ) ;
}
}
2021-03-11 02:06:03 +08:00
server_info . erase ( removedServer ) ;
2021-03-06 03:28:15 +08:00
server_and_tss_info . erase ( removedServer ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
if ( server_status . get ( removedServer ) . initialized & & server_status . get ( removedServer ) . isUnhealthy ( ) ) {
2018-09-01 08:40:27 +08:00
unhealthyServers - - ;
}
2021-03-11 02:06:03 +08:00
server_status . clear ( removedServer ) ;
2018-09-01 08:54:55 +08:00
2021-03-11 02:06:03 +08:00
// FIXME: add remove support to localitySet so we do not have to recreate it
2018-09-01 08:54:55 +08:00
resetLocalitySet ( ) ;
2017-05-26 04:48:44 +08:00
doBuildTeams = true ;
restartTeamBuilder . trigger ( ) ;
2018-12-14 05:31:37 +08:00
TraceEvent ( " DataDistributionTeamCollectionUpdate " , distributorId )
2020-10-22 02:10:14 +08:00
. detail ( " ServerTeams " , teams . size ( ) )
. detail ( " BadServerTeams " , badTeams . size ( ) )
2019-02-16 01:35:27 +08:00
. detail ( " Servers " , allServers . size ( ) )
. detail ( " Machines " , machine_info . size ( ) )
. detail ( " MachineTeams " , machineTeams . size ( ) )
. detail ( " DesiredTeamsPerServer " , SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER ) ;
2017-05-26 04:48:44 +08:00
}
2021-05-21 07:31:08 +08:00
2021-06-03 02:49:01 +08:00
// Adds storage servers held on process of which the Process Id is “pid” into excludeServers which prevent
// recruiting the wiggling storage servers and let teamTracker start to move data off the affected teams;
// Return a vector of futures wait for all data is moved to other teams.
2021-06-02 14:12:45 +08:00
std : : vector < Future < Void > > excludeStorageServersForWiggle ( const Value & pid ) {
2021-05-21 07:31:08 +08:00
std : : vector < Future < Void > > moveFutures ;
if ( this - > pid2server_info . count ( pid ) ! = 0 ) {
for ( auto & info : this - > pid2server_info [ pid ] ) {
2021-07-12 11:36:10 +08:00
AddressExclusion addr ( info - > lastKnownInterface . address ( ) . ip , info - > lastKnownInterface . address ( ) . port ) ;
2021-05-21 07:31:08 +08:00
if ( this - > excludedServers . count ( addr ) & &
this - > excludedServers . get ( addr ) ! = DDTeamCollection : : Status : : NONE ) {
continue ; // don't overwrite the value set by actor trackExcludedServer
}
2021-06-12 06:58:05 +08:00
this - > wiggle_addresses . push_back ( addr ) ;
2021-05-21 07:31:08 +08:00
this - > excludedServers . set ( addr , DDTeamCollection : : Status : : WIGGLING ) ;
2021-06-21 13:18:19 +08:00
moveFutures . push_back ( info - > onRemoved ) ;
2021-05-21 07:31:08 +08:00
}
if ( ! moveFutures . empty ( ) ) {
this - > restartRecruiting . trigger ( ) ;
}
}
return moveFutures ;
}
2021-06-12 06:58:05 +08:00
// Include wiggled storage servers by setting their status from `WIGGLING`
2021-06-03 02:49:01 +08:00
// to `NONE`. The storage recruiter will recruit them as new storage servers
2021-06-12 06:58:05 +08:00
void includeStorageServersForWiggle ( ) {
2021-05-21 07:31:08 +08:00
bool included = false ;
2021-06-12 06:58:05 +08:00
for ( auto & address : this - > wiggle_addresses ) {
if ( ! this - > excludedServers . count ( address ) | |
this - > excludedServers . get ( address ) ! = DDTeamCollection : : Status : : WIGGLING ) {
2021-05-21 07:31:08 +08:00
continue ;
}
included = true ;
2021-06-12 06:58:05 +08:00
this - > excludedServers . set ( address , DDTeamCollection : : Status : : NONE ) ;
2021-05-21 07:31:08 +08:00
}
2021-06-12 06:58:05 +08:00
this - > wiggle_addresses . clear ( ) ;
2021-05-21 07:31:08 +08:00
if ( included ) {
this - > restartRecruiting . trigger ( ) ;
}
}
2017-05-26 04:48:44 +08:00
} ;
2020-04-07 14:37:11 +08:00
TCServerInfo : : ~ TCServerInfo ( ) {
2021-05-13 02:53:20 +08:00
if ( collection & & ssVersionTooFarBehind . get ( ) & & ! lastKnownInterface . isTss ( ) ) {
2021-03-11 02:06:03 +08:00
collection - > removeLaggingStorageServer ( lastKnownInterface . locality . zoneId ( ) . get ( ) ) ;
2020-04-07 14:37:11 +08:00
}
}
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > updateServerMetrics ( TCServerInfo * server ) {
2020-04-07 14:37:11 +08:00
state StorageServerInterface ssi = server - > lastKnownInterface ;
2021-03-11 02:06:03 +08:00
state Future < ErrorOr < GetStorageMetricsReply > > metricsRequest =
ssi . getStorageMetrics . tryGetReply ( GetStorageMetricsRequest ( ) , TaskPriority : : DataDistributionLaunch ) ;
2020-04-07 14:37:11 +08:00
state Future < Void > resetRequest = Never ( ) ;
2021-03-11 02:06:03 +08:00
state Future < std : : pair < StorageServerInterface , ProcessClass > > interfaceChanged ( server - > onInterfaceChanged ) ;
state Future < Void > serverRemoved ( server - > onRemoved ) ;
2020-04-07 14:37:11 +08:00
loop {
choose {
2021-03-11 02:06:03 +08:00
when ( ErrorOr < GetStorageMetricsReply > rep = wait ( metricsRequest ) ) {
if ( rep . present ( ) ) {
2020-04-07 14:37:11 +08:00
server - > serverMetrics = rep ;
2021-03-11 02:06:03 +08:00
if ( server - > updated . canBeSet ( ) ) {
2020-04-07 14:37:11 +08:00
server - > updated . send ( Void ( ) ) ;
}
break ;
}
metricsRequest = Never ( ) ;
2021-03-11 02:06:03 +08:00
resetRequest = delay ( SERVER_KNOBS - > METRIC_DELAY , TaskPriority : : DataDistributionLaunch ) ;
2020-04-07 14:37:11 +08:00
}
2021-03-11 02:06:03 +08:00
when ( std : : pair < StorageServerInterface , ProcessClass > _ssi = wait ( interfaceChanged ) ) {
2020-04-07 14:37:11 +08:00
ssi = _ssi . first ;
interfaceChanged = server - > onInterfaceChanged ;
resetRequest = Void ( ) ;
}
2021-03-11 02:06:03 +08:00
when ( wait ( serverRemoved ) ) { return Void ( ) ; }
when ( wait ( resetRequest ) ) { // To prevent a tight spin loop
if ( IFailureMonitor : : failureMonitor ( ) . getState ( ssi . getStorageMetrics . getEndpoint ( ) ) . isFailed ( ) ) {
resetRequest = IFailureMonitor : : failureMonitor ( ) . onStateEqual ( ssi . getStorageMetrics . getEndpoint ( ) ,
FailureStatus ( false ) ) ;
} else {
2020-04-07 14:37:11 +08:00
resetRequest = Never ( ) ;
2021-03-11 02:06:03 +08:00
metricsRequest = ssi . getStorageMetrics . tryGetReply ( GetStorageMetricsRequest ( ) ,
TaskPriority : : DataDistributionLaunch ) ;
2020-04-07 14:37:11 +08:00
}
}
}
}
2021-03-11 02:06:03 +08:00
if ( server - > serverMetrics . get ( ) . lastUpdate < now ( ) - SERVER_KNOBS - > DD_SS_STUCK_TIME_LIMIT ) {
if ( server - > ssVersionTooFarBehind . get ( ) = = false ) {
TraceEvent ( " StorageServerStuck " , server - > collection - > distributorId )
. detail ( " ServerId " , server - > id . toString ( ) )
. detail ( " LastUpdate " , server - > serverMetrics . get ( ) . lastUpdate ) ;
server - > ssVersionTooFarBehind . set ( true ) ;
server - > collection - > addLaggingStorageServer ( server - > lastKnownInterface . locality . zoneId ( ) . get ( ) ) ;
}
} else if ( server - > serverMetrics . get ( ) . versionLag > SERVER_KNOBS - > DD_SS_FAILURE_VERSIONLAG ) {
2020-04-09 01:48:32 +08:00
if ( server - > ssVersionTooFarBehind . get ( ) = = false ) {
2021-03-04 15:33:48 +08:00
TraceEvent ( SevWarn , " SSVersionDiffLarge " , server - > collection - > distributorId )
. detail ( " ServerId " , server - > id . toString ( ) )
. detail ( " VersionLag " , server - > serverMetrics . get ( ) . versionLag ) ;
2020-04-07 14:37:11 +08:00
server - > ssVersionTooFarBehind . set ( true ) ;
server - > collection - > addLaggingStorageServer ( server - > lastKnownInterface . locality . zoneId ( ) . get ( ) ) ;
2020-04-09 01:48:32 +08:00
}
2021-03-11 02:06:03 +08:00
} else if ( server - > serverMetrics . get ( ) . versionLag < SERVER_KNOBS - > DD_SS_ALLOWED_VERSIONLAG ) {
2020-04-09 01:48:32 +08:00
if ( server - > ssVersionTooFarBehind . get ( ) = = true ) {
2021-03-11 02:06:03 +08:00
TraceEvent ( " SSVersionDiffNormal " , server - > collection - > distributorId )
. detail ( " ServerId " , server - > id . toString ( ) )
. detail ( " VersionLag " , server - > serverMetrics . get ( ) . versionLag ) ;
2020-04-07 14:37:11 +08:00
server - > ssVersionTooFarBehind . set ( false ) ;
server - > collection - > removeLaggingStorageServer ( server - > lastKnownInterface . locality . zoneId ( ) . get ( ) ) ;
2020-04-09 01:48:32 +08:00
}
2020-04-07 14:37:11 +08:00
}
return Void ( ) ;
}
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > updateServerMetrics ( Reference < TCServerInfo > server ) {
wait ( updateServerMetrics ( server . getPtr ( ) ) ) ;
2020-04-07 14:37:11 +08:00
return Void ( ) ;
}
2019-07-20 07:48:50 +08:00
ACTOR Future < Void > waitUntilHealthy ( DDTeamCollection * self , double extraDelay = 0 ) {
state int waitCount = 0 ;
2019-02-07 15:34:56 +08:00
loop {
2021-03-11 02:06:03 +08:00
while ( self - > zeroHealthyTeams - > get ( ) | | self - > processingUnhealthy - > get ( ) ) {
2019-07-09 10:00:24 +08:00
// processingUnhealthy: true when there exists data movement
2021-03-11 02:06:03 +08:00
TraceEvent ( " WaitUntilHealthyStalled " , self - > distributorId )
. detail ( " Primary " , self - > primary )
. detail ( " ZeroHealthy " , self - > zeroHealthyTeams - > get ( ) )
. detail ( " ProcessingUnhealthy " , self - > processingUnhealthy - > get ( ) ) ;
2019-02-07 15:34:56 +08:00
wait ( self - > zeroHealthyTeams - > onChange ( ) | | self - > processingUnhealthy - > onChange ( ) ) ;
2019-07-20 07:48:50 +08:00
waitCount = 0 ;
2019-02-07 15:34:56 +08:00
}
2021-03-11 02:06:03 +08:00
wait ( delay ( SERVER_KNOBS - > DD_STALL_CHECK_DELAY ,
TaskPriority : : Low ) ) ; // After the team trackers wait on the initial failure reaction delay, they
// yield. We want to make sure every tracker has had the opportunity to send
// their relocations to the queue.
if ( ! self - > zeroHealthyTeams - > get ( ) & & ! self - > processingUnhealthy - > get ( ) ) {
2019-07-20 07:48:50 +08:00
if ( extraDelay < = 0.01 | | waitCount > = 1 ) {
// Return healthy if we do not need extraDelay or when DD are healthy in at least two consecutive check
return Void ( ) ;
} else {
wait ( delay ( extraDelay , TaskPriority : : Low ) ) ;
waitCount + + ;
}
2019-02-07 15:34:56 +08:00
}
}
2019-02-23 06:38:13 +08:00
}
2020-10-16 06:25:00 +08:00
// Take a snapshot of necessary data structures from `DDTeamCollection` and print them out with yields to avoid slow
2020-10-24 05:01:53 +08:00
// task on the run loop.
ACTOR Future < Void > printSnapshotTeamsInfo ( Reference < DDTeamCollection > self ) {
2020-10-16 01:01:56 +08:00
state DatabaseConfiguration configuration ;
state std : : map < UID , Reference < TCServerInfo > > server_info ;
2020-10-16 06:25:00 +08:00
state std : : map < UID , ServerStatus > server_status ;
2020-10-16 01:01:56 +08:00
state vector < Reference < TCTeamInfo > > teams ;
state std : : map < Standalone < StringRef > , Reference < TCMachineInfo > > machine_info ;
state std : : vector < Reference < TCMachineTeamInfo > > machineTeams ;
2020-10-16 06:25:00 +08:00
// state std::vector<std::string> internedLocalityRecordKeyNameStrings;
// state int machineLocalityMapEntryArraySize;
// state std::vector<Reference<LocalityRecord>> machineLocalityMapRecordArray;
2020-10-16 01:01:56 +08:00
state int traceEventsPrinted = 0 ;
2020-10-16 06:25:00 +08:00
state std : : vector < const UID * > serverIDs ;
2020-10-24 06:29:08 +08:00
state double lastPrintTime = 0 ;
2020-11-13 08:27:55 +08:00
state ReadYourWritesTransaction tr ( self - > cx ) ;
2020-10-16 01:01:56 +08:00
loop {
2020-11-18 02:46:03 +08:00
try {
tr . setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
state Future < Void > watchFuture = tr . watch ( triggerDDTeamInfoPrintKey ) ;
wait ( tr . commit ( ) ) ;
wait ( self - > printDetailedTeamsInfo . onTrigger ( ) | | watchFuture ) ;
tr . reset ( ) ;
if ( now ( ) - lastPrintTime < SERVER_KNOBS - > DD_TEAMS_INFO_PRINT_INTERVAL ) {
continue ;
}
lastPrintTime = now ( ) ;
2020-10-16 01:01:56 +08:00
2020-11-18 02:46:03 +08:00
traceEventsPrinted = 0 ;
2020-10-16 01:01:56 +08:00
2020-11-18 02:46:03 +08:00
double snapshotStart = now ( ) ;
2020-10-24 05:01:53 +08:00
2020-11-18 02:46:03 +08:00
configuration = self - > configuration ;
server_info = self - > server_info ;
teams = self - > teams ;
machine_info = self - > machine_info ;
machineTeams = self - > machineTeams ;
// internedLocalityRecordKeyNameStrings = self->machineLocalityMap._keymap->_lookuparray;
// machineLocalityMapEntryArraySize = self->machineLocalityMap.size();
// machineLocalityMapRecordArray = self->machineLocalityMap.getRecordArray();
std : : vector < const UID * > _uids = self - > machineLocalityMap . getObjects ( ) ;
serverIDs = _uids ;
2020-10-16 01:01:56 +08:00
2020-11-18 02:46:03 +08:00
auto const & keys = self - > server_status . getKeys ( ) ;
for ( auto const & key : keys ) {
server_status . emplace ( key , self - > server_status . get ( key ) ) ;
}
2020-10-24 05:01:53 +08:00
2020-11-18 02:46:03 +08:00
TraceEvent ( " DDPrintSnapshotTeasmInfo " , self - > distributorId )
. detail ( " SnapshotSpeed " , now ( ) - snapshotStart )
2020-10-24 05:01:53 +08:00
. detail ( " Primary " , self - > primary ) ;
2020-10-16 01:01:56 +08:00
2020-11-18 02:46:03 +08:00
// Print to TraceEvents
TraceEvent ( " DDConfig " , self - > distributorId )
. detail ( " StorageTeamSize " , configuration . storageTeamSize )
. detail ( " DesiredTeamsPerServer " , SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER )
. detail ( " MaxTeamsPerServer " , SERVER_KNOBS - > MAX_TEAMS_PER_SERVER )
2020-10-24 05:01:53 +08:00
. detail ( " Primary " , self - > primary ) ;
2020-10-16 01:01:56 +08:00
2020-11-18 02:46:03 +08:00
TraceEvent ( " ServerInfo " , self - > distributorId )
. detail ( " Size " , server_info . size ( ) )
2020-10-24 05:01:53 +08:00
. detail ( " Primary " , self - > primary ) ;
2020-11-18 02:46:03 +08:00
state int i ;
state std : : map < UID , Reference < TCServerInfo > > : : iterator server = server_info . begin ( ) ;
for ( i = 0 ; i < server_info . size ( ) ; i + + ) {
TraceEvent ( " ServerInfo " , self - > distributorId )
. detail ( " ServerInfoIndex " , i )
. detail ( " ServerID " , server - > first . toString ( ) )
. detail ( " ServerTeamOwned " , server - > second - > teams . size ( ) )
. detail ( " MachineID " , server - > second - > machine - > machineID . contents ( ) . toString ( ) )
. detail ( " Primary " , self - > primary ) ;
server + + ;
if ( + + traceEventsPrinted % SERVER_KNOBS - > DD_TEAMS_INFO_PRINT_YIELD_COUNT = = 0 ) {
wait ( yield ( ) ) ;
}
2020-10-16 01:01:56 +08:00
}
2020-11-18 02:46:03 +08:00
server = server_info . begin ( ) ;
for ( i = 0 ; i < server_info . size ( ) ; i + + ) {
const UID & uid = server - > first ;
TraceEvent ( " ServerStatus " , self - > distributorId )
. detail ( " ServerUID " , uid )
. detail ( " Healthy " , ! server_status . at ( uid ) . isUnhealthy ( ) )
. detail ( " MachineIsValid " , server_info [ uid ] - > machine . isValid ( ) )
. detail ( " MachineTeamSize " ,
server_info [ uid ] - > machine . isValid ( ) ? server_info [ uid ] - > machine - > machineTeams . size ( ) : - 1 )
. detail ( " Primary " , self - > primary ) ;
server + + ;
if ( + + traceEventsPrinted % SERVER_KNOBS - > DD_TEAMS_INFO_PRINT_YIELD_COUNT = = 0 ) {
wait ( yield ( ) ) ;
}
2020-10-16 01:01:56 +08:00
}
2020-11-18 02:46:03 +08:00
TraceEvent ( " ServerTeamInfo " , self - > distributorId )
. detail ( " Size " , teams . size ( ) )
. detail ( " Primary " , self - > primary ) ;
for ( i = 0 ; i < teams . size ( ) ; i + + ) {
const auto & team = teams [ i ] ;
TraceEvent ( " ServerTeamInfo " , self - > distributorId )
. detail ( " TeamIndex " , i )
. detail ( " Healthy " , team - > isHealthy ( ) )
. detail ( " TeamSize " , team - > size ( ) )
. detail ( " MemberIDs " , team - > getServerIDsStr ( ) )
. detail ( " Primary " , self - > primary ) ;
if ( + + traceEventsPrinted % SERVER_KNOBS - > DD_TEAMS_INFO_PRINT_YIELD_COUNT = = 0 ) {
wait ( yield ( ) ) ;
2020-10-16 01:01:56 +08:00
}
}
2020-10-24 05:01:53 +08:00
TraceEvent ( " MachineInfo " , self - > distributorId )
2020-11-18 02:46:03 +08:00
. detail ( " Size " , machine_info . size ( ) )
2020-10-24 05:01:53 +08:00
. detail ( " Primary " , self - > primary ) ;
2020-11-18 02:46:03 +08:00
state std : : map < Standalone < StringRef > , Reference < TCMachineInfo > > : : iterator machine = machine_info . begin ( ) ;
state bool isMachineHealthy = false ;
for ( i = 0 ; i < machine_info . size ( ) ; i + + ) {
Reference < TCMachineInfo > _machine = machine - > second ;
if ( ! _machine . isValid ( ) | | machine_info . find ( _machine - > machineID ) = = machine_info . end ( ) | |
_machine - > serversOnMachine . empty ( ) ) {
isMachineHealthy = false ;
}
// Healthy machine has at least one healthy server
for ( auto & server : _machine - > serversOnMachine ) {
if ( ! server_status . at ( server - > id ) . isUnhealthy ( ) ) {
isMachineHealthy = true ;
}
}
isMachineHealthy = false ;
TraceEvent ( " MachineInfo " , self - > distributorId )
. detail ( " MachineInfoIndex " , i )
. detail ( " Healthy " , isMachineHealthy )
. detail ( " MachineID " , machine - > first . contents ( ) . toString ( ) )
. detail ( " MachineTeamOwned " , machine - > second - > machineTeams . size ( ) )
. detail ( " ServerNumOnMachine " , machine - > second - > serversOnMachine . size ( ) )
. detail ( " ServersID " , machine - > second - > getServersIDStr ( ) )
. detail ( " Primary " , self - > primary ) ;
machine + + ;
if ( + + traceEventsPrinted % SERVER_KNOBS - > DD_TEAMS_INFO_PRINT_YIELD_COUNT = = 0 ) {
wait ( yield ( ) ) ;
}
2020-10-16 01:01:56 +08:00
}
2020-10-24 05:01:53 +08:00
TraceEvent ( " MachineTeamInfo " , self - > distributorId )
2020-11-18 02:46:03 +08:00
. detail ( " Size " , machineTeams . size ( ) )
2020-10-24 05:01:53 +08:00
. detail ( " Primary " , self - > primary ) ;
2020-11-18 02:46:03 +08:00
for ( i = 0 ; i < machineTeams . size ( ) ; i + + ) {
const auto & team = machineTeams [ i ] ;
TraceEvent ( " MachineTeamInfo " , self - > distributorId )
. detail ( " TeamIndex " , i )
. detail ( " MachineIDs " , team - > getMachineIDsStr ( ) )
. detail ( " ServerTeams " , team - > serverTeams . size ( ) )
. detail ( " Primary " , self - > primary ) ;
if ( + + traceEventsPrinted % SERVER_KNOBS - > DD_TEAMS_INFO_PRINT_YIELD_COUNT = = 0 ) {
wait ( yield ( ) ) ;
}
}
2020-10-16 01:01:56 +08:00
2020-11-18 02:46:03 +08:00
// TODO: re-enable the following logging or remove them.
// TraceEvent("LocalityRecordKeyName", self->distributorId)
// .detail("Size", internedLocalityRecordKeyNameStrings.size())
// .detail("Primary", self->primary);
// for (i = 0; i < internedLocalityRecordKeyNameStrings.size(); i++) {
// TraceEvent("LocalityRecordKeyIndexName", self->distributorId)
// .detail("KeyIndex", i)
// .detail("KeyName", internedLocalityRecordKeyNameStrings[i])
// .detail("Primary", self->primary);
// if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
// wait(yield());
// }
// }
// TraceEvent("MachineLocalityMap", self->distributorId)
// .detail("Size", machineLocalityMapEntryArraySize)
// .detail("Primary", self->primary);
// for (i = 0; i < serverIDs.size(); i++) {
// const auto& serverID = serverIDs[i];
// Reference<LocalityRecord> record = machineLocalityMapRecordArray[i];
// if (record.isValid()) {
// TraceEvent("MachineLocalityMap", self->distributorId)
// .detail("LocalityIndex", i)
// .detail("UID", serverID->toString())
// .detail("LocalityRecord", record->toString())
// .detail("Primary", self->primary);
// } else {
// TraceEvent("MachineLocalityMap", self->distributorId)
// .detail("LocalityIndex", i)
// .detail("UID", serverID->toString())
// .detail("LocalityRecord", "[NotFound]")
// .detail("Primary", self->primary);
// }
// if (++traceEventsPrinted % SERVER_KNOBS->DD_TEAMS_INFO_PRINT_YIELD_COUNT == 0) {
// wait(yield());
// }
// }
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
}
2020-10-16 01:01:56 +08:00
}
}
2019-02-23 06:38:13 +08:00
ACTOR Future < Void > removeBadTeams ( DDTeamCollection * self ) {
wait ( self - > initialFailureReactionDelay ) ;
wait ( waitUntilHealthy ( self ) ) ;
2019-02-07 15:34:56 +08:00
wait ( self - > addSubsetComplete . getFuture ( ) ) ;
2020-10-22 02:10:14 +08:00
TraceEvent ( " DDRemovingBadServerTeams " , self - > distributorId ) . detail ( " Primary " , self - > primary ) ;
2021-03-11 02:06:03 +08:00
for ( auto it : self - > badTeams ) {
2019-02-07 15:34:56 +08:00
it - > tracker . cancel ( ) ;
}
self - > badTeams . clear ( ) ;
return Void ( ) ;
}
2019-10-03 05:48:35 +08:00
bool isCorrectDC ( DDTeamCollection * self , TCServerInfo * server ) {
2019-08-13 01:08:12 +08:00
return ( self - > includedDCs . empty ( ) | |
std : : find ( self - > includedDCs . begin ( ) , self - > includedDCs . end ( ) , server - > lastKnownInterface . locality . dcId ( ) ) ! =
self - > includedDCs . end ( ) ) ;
}
ACTOR Future < Void > removeWrongStoreType ( DDTeamCollection * self ) {
2019-08-14 08:28:10 +08:00
// Wait for storage servers to initialize its storeType
2019-08-17 07:46:54 +08:00
wait ( delay ( SERVER_KNOBS - > DD_REMOVE_STORE_ENGINE_DELAY ) ) ;
2019-08-17 07:30:33 +08:00
2019-08-20 06:54:57 +08:00
state Future < Void > fisServerRemoved = Never ( ) ;
2019-08-13 01:08:12 +08:00
2019-08-20 06:54:57 +08:00
TraceEvent ( " WrongStoreTypeRemoverStart " , self - > distributorId ) . detail ( " Servers " , self - > server_info . size ( ) ) ;
2019-08-13 01:08:12 +08:00
loop {
2019-08-20 06:54:57 +08:00
// Removing a server here when DD is not healthy may lead to rare failure scenarios, for example,
// the server with wrong storeType is shutting down while this actor marks it as to-be-removed.
// In addition, removing servers cause extra data movement, which should be done while a cluster is healthy
wait ( waitUntilHealthy ( self ) ) ;
2019-08-17 07:46:54 +08:00
2019-08-20 06:54:57 +08:00
bool foundSSToRemove = false ;
2019-08-13 08:38:17 +08:00
for ( auto & server : self - > server_info ) {
2019-08-14 07:10:18 +08:00
if ( ! server . second - > isCorrectStoreType ( self - > configuration . storageServerStoreType ) ) {
2019-08-23 02:49:39 +08:00
// Server may be removed due to failure while the wrongStoreTypeToRemove is sent to the
// storageServerTracker. This race may cause the server to be removed before react to
// wrongStoreTypeToRemove
2019-08-20 06:54:57 +08:00
server . second - > wrongStoreTypeToRemove . set ( true ) ;
foundSSToRemove = true ;
TraceEvent ( " WrongStoreTypeRemover " , self - > distributorId )
. detail ( " Server " , server . first )
. detail ( " StoreType " , server . second - > storeType )
. detail ( " ConfiguredStoreType " , self - > configuration . storageServerStoreType ) ;
break ;
2019-08-13 01:08:12 +08:00
}
}
2019-08-17 07:30:33 +08:00
2019-08-17 07:11:28 +08:00
if ( ! foundSSToRemove ) {
break ;
}
2019-08-13 01:08:12 +08:00
}
2019-08-17 07:11:28 +08:00
return Void ( ) ;
2019-08-13 01:08:12 +08:00
}
2019-07-03 06:16:34 +08:00
ACTOR Future < Void > machineTeamRemover ( DDTeamCollection * self ) {
2019-02-09 05:50:27 +08:00
state int numMachineTeamRemoved = 0 ;
2019-02-07 15:34:56 +08:00
loop {
2019-07-03 06:16:34 +08:00
// In case the machineTeamRemover cause problems in production, we can disable it
2019-07-03 06:58:31 +08:00
if ( SERVER_KNOBS - > TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER ) {
2019-02-21 07:55:07 +08:00
return Void ( ) ; // Directly return Void()
2019-02-14 07:11:53 +08:00
}
// To avoid removing machine teams too fast, which is unlikely happen though
2021-03-11 02:06:03 +08:00
wait ( delay ( SERVER_KNOBS - > TR_REMOVE_MACHINE_TEAM_DELAY , TaskPriority : : DataDistribution ) ) ;
2019-02-14 07:11:53 +08:00
2020-09-02 06:55:33 +08:00
wait ( waitUntilHealthy ( self , SERVER_KNOBS - > TR_REMOVE_SERVER_TEAM_EXTRA_DELAY ) ) ;
2019-02-13 02:59:54 +08:00
// Wait for the badTeamRemover() to avoid the potential race between adding the bad team (add the team tracker)
// and remove bad team (cancel the team tracker).
2019-02-13 05:41:18 +08:00
wait ( self - > badTeamRemover ) ;
2019-02-13 11:10:51 +08:00
2019-02-19 07:18:52 +08:00
state int healthyMachineCount = self - > calculateHealthyMachineCount ( ) ;
2019-02-13 02:59:54 +08:00
// Check if all machines are healthy, if not, we wait for 1 second and loop back.
// Eventually, all machines will become healthy.
2019-02-19 07:18:52 +08:00
if ( healthyMachineCount ! = self - > machine_info . size ( ) ) {
2019-02-13 02:59:54 +08:00
continue ;
}
2019-02-09 05:50:27 +08:00
2019-02-13 11:10:51 +08:00
// From this point, all machine teams and server teams should be healthy, because we wait above
// until processingUnhealthy is done, and all machines are healthy
2019-02-13 02:59:54 +08:00
// Sanity check all machine teams are healthy
2019-02-19 13:41:36 +08:00
// int currentHealthyMTCount = self->getHealthyMachineTeamCount();
// if (currentHealthyMTCount != self->machineTeams.size()) {
// TraceEvent(SevError, "InvalidAssumption")
2019-07-05 23:54:21 +08:00
// .detail("HealthyMachineCount", healthyMachineCount)
// .detail("Machines", self->machine_info.size())
2019-02-19 13:41:36 +08:00
// .detail("CurrentHealthyMTCount", currentHealthyMTCount)
2019-07-05 23:54:21 +08:00
// .detail("MachineTeams", self->machineTeams.size());
2019-02-19 13:41:36 +08:00
// self->traceAllInfo(true);
// }
2019-02-07 15:34:56 +08:00
2019-02-19 02:22:38 +08:00
// In most cases, all machine teams should be healthy teams at this point.
2019-02-19 07:18:52 +08:00
int desiredMachineTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * healthyMachineCount ;
int totalMTCount = self - > machineTeams . size ( ) ;
2019-07-16 09:17:46 +08:00
// Pick the machine team to remove. After release-6.2 version,
// we remove the machine team with most machine teams, the same logic as serverTeamRemover
2019-07-16 09:40:14 +08:00
std : : pair < Reference < TCMachineTeamInfo > , int > foundMTInfo = SERVER_KNOBS - > TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS
? self - > getMachineTeamWithMostMachineTeams ( )
: self - > getMachineTeamWithLeastProcessTeams ( ) ;
2019-02-07 15:34:56 +08:00
2019-07-16 09:17:46 +08:00
if ( totalMTCount > desiredMachineTeams & & foundMTInfo . first . isValid ( ) ) {
2019-07-06 02:02:07 +08:00
Reference < TCMachineTeamInfo > mt = foundMTInfo . first ;
int minNumProcessTeams = foundMTInfo . second ;
2019-02-07 15:34:56 +08:00
ASSERT ( mt . isValid ( ) ) ;
// Pick one process team, and mark it as a bad team
// Remove the machine by removing its process team one by one
2019-07-06 02:02:07 +08:00
Reference < TCTeamInfo > team ;
int teamIndex = 0 ;
2019-02-13 02:59:54 +08:00
for ( teamIndex = 0 ; teamIndex < mt - > serverTeams . size ( ) ; + + teamIndex ) {
team = mt - > serverTeams [ teamIndex ] ;
ASSERT ( team - > machineTeam - > machineIDs = = mt - > machineIDs ) ; // Sanity check
2019-06-28 02:12:48 +08:00
// Check if a server will have 0 team after the team is removed
for ( auto & s : team - > getServers ( ) ) {
2019-06-28 05:15:51 +08:00
if ( s - > teams . size ( ) = = 0 ) {
2020-10-22 02:10:14 +08:00
TraceEvent ( SevError , " MachineTeamRemoverTooAggressive " , self - > distributorId )
2019-06-28 05:15:51 +08:00
. detail ( " Server " , s - > id )
2020-10-22 02:10:14 +08:00
. detail ( " ServerTeam " , team - > getDesc ( ) ) ;
2019-06-28 02:12:48 +08:00
self - > traceAllInfo ( true ) ;
}
}
2019-02-07 15:34:56 +08:00
// The team will be marked as a bad team
2019-02-19 15:25:07 +08:00
bool foundTeam = self - > removeTeam ( team ) ;
2019-02-13 02:59:54 +08:00
ASSERT ( foundTeam = = true ) ;
2019-02-13 05:41:18 +08:00
// removeTeam() has side effect of swapping the last element to the current pos
// in the serverTeams vector in the machine team.
2019-02-13 02:59:54 +08:00
- - teamIndex ;
2019-02-16 09:03:40 +08:00
self - > addTeam ( team - > getServers ( ) , true , true ) ;
2020-11-11 05:01:07 +08:00
TEST ( true ) ; // Removed machine team
2019-02-07 15:34:56 +08:00
}
2019-04-23 08:29:46 +08:00
self - > doBuildTeams = true ;
2019-02-13 05:41:18 +08:00
if ( self - > badTeamRemover . isReady ( ) ) {
2019-02-13 02:59:54 +08:00
self - > badTeamRemover = removeBadTeams ( self ) ;
self - > addActor . send ( self - > badTeamRemover ) ;
}
2019-07-10 02:45:17 +08:00
TraceEvent ( " MachineTeamRemover " , self - > distributorId )
2020-10-22 02:10:14 +08:00
. detail ( " MachineTeamIDToRemove " , mt - > id . shortString ( ) )
2019-02-13 05:41:18 +08:00
. detail ( " MachineTeamToRemove " , mt - > getMachineIDsStr ( ) )
. detail ( " NumProcessTeamsOnTheMachineTeam " , minNumProcessTeams )
2019-07-05 23:54:21 +08:00
. detail ( " CurrentMachineTeams " , self - > machineTeams . size ( ) )
. detail ( " DesiredMachineTeams " , desiredMachineTeams ) ;
2019-02-13 02:59:54 +08:00
2019-02-07 15:34:56 +08:00
// Remove the machine team
2019-02-13 02:59:54 +08:00
bool foundRemovedMachineTeam = self - > removeMachineTeam ( mt ) ;
2019-02-19 07:18:52 +08:00
// When we remove the last server team on a machine team in removeTeam(), we also remove the machine team
2019-02-19 13:41:36 +08:00
// This is needed for removeTeam() functoin.
// So here the removeMachineTeam() should not find the machine team
2019-02-19 15:25:07 +08:00
ASSERT ( foundRemovedMachineTeam ) ;
2019-02-09 05:50:27 +08:00
numMachineTeamRemoved + + ;
2019-02-07 15:34:56 +08:00
} else {
2019-02-09 05:50:27 +08:00
if ( numMachineTeamRemoved > 0 ) {
2019-02-13 02:59:54 +08:00
// Only trace the information when we remove a machine team
2020-10-22 02:10:14 +08:00
TraceEvent ( " MachineTeamRemoverDone " , self - > distributorId )
2019-07-05 23:54:21 +08:00
. detail ( " HealthyMachines " , healthyMachineCount )
// .detail("CurrentHealthyMachineTeams", currentHealthyMTCount)
. detail ( " CurrentMachineTeams " , self - > machineTeams . size ( ) )
. detail ( " DesiredMachineTeams " , desiredMachineTeams )
. detail ( " NumMachineTeamsRemoved " , numMachineTeamRemoved ) ;
2019-02-09 05:50:27 +08:00
self - > traceTeamCollectionInfo ( ) ;
2021-03-11 02:06:03 +08:00
numMachineTeamRemoved = 0 ; // Reset the counter to avoid keep printing the message
2019-02-09 05:50:27 +08:00
}
2019-02-07 15:34:56 +08:00
}
}
}
2019-07-03 06:58:31 +08:00
// Remove the server team whose members have the most number of process teams
// until the total number of server teams is no larger than the desired number
ACTOR Future < Void > serverTeamRemover ( DDTeamCollection * self ) {
state int numServerTeamRemoved = 0 ;
loop {
// In case the serverTeamRemover cause problems in production, we can disable it
if ( SERVER_KNOBS - > TR_FLAG_DISABLE_SERVER_TEAM_REMOVER ) {
return Void ( ) ; // Directly return Void()
}
2019-07-09 09:17:19 +08:00
double removeServerTeamDelay = SERVER_KNOBS - > TR_REMOVE_SERVER_TEAM_DELAY ;
if ( g_network - > isSimulated ( ) ) {
2019-07-12 13:05:20 +08:00
// Speed up the team remover in simulation; otherwise,
// it may time out because we need to remove hundreds of teams
2019-07-09 10:00:24 +08:00
removeServerTeamDelay = removeServerTeamDelay / 100 ;
2019-07-09 09:17:19 +08:00
}
2019-07-13 08:50:42 +08:00
// To avoid removing server teams too fast, which is unlikely happen though
2019-09-28 09:33:13 +08:00
wait ( delay ( removeServerTeamDelay , TaskPriority : : DataDistribution ) ) ;
2019-07-03 06:58:31 +08:00
2019-07-20 07:48:50 +08:00
wait ( waitUntilHealthy ( self , SERVER_KNOBS - > TR_REMOVE_SERVER_TEAM_EXTRA_DELAY ) ) ;
2019-07-12 13:05:20 +08:00
// Wait for the badTeamRemover() to avoid the potential race between
// adding the bad team (add the team tracker) and remove bad team (cancel the team tracker).
2019-07-03 06:58:31 +08:00
wait ( self - > badTeamRemover ) ;
// From this point, all server teams should be healthy, because we wait above
// until processingUnhealthy is done, and all machines are healthy
2019-07-12 14:47:29 +08:00
int desiredServerTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * self - > server_info . size ( ) ;
2019-07-03 06:58:31 +08:00
int totalSTCount = self - > teams . size ( ) ;
2019-07-16 09:17:46 +08:00
// Pick the server team whose members are on the most number of server teams, and mark it undesired
std : : pair < Reference < TCTeamInfo > , int > foundSTInfo = self - > getServerTeamWithMostProcessTeams ( ) ;
2019-07-03 06:58:31 +08:00
2019-07-16 09:17:46 +08:00
if ( totalSTCount > desiredServerTeams & & foundSTInfo . first . isValid ( ) ) {
2019-07-09 08:04:27 +08:00
ASSERT ( foundSTInfo . first . isValid ( ) ) ;
2019-07-06 02:02:07 +08:00
Reference < TCTeamInfo > st = foundSTInfo . first ;
int maxNumProcessTeams = foundSTInfo . second ;
2019-07-03 06:58:31 +08:00
ASSERT ( st . isValid ( ) ) ;
// The team will be marked as a bad team
bool foundTeam = self - > removeTeam ( st ) ;
ASSERT ( foundTeam = = true ) ;
self - > addTeam ( st - > getServers ( ) , true , true ) ;
2020-11-12 05:07:54 +08:00
TEST ( true ) ; // Marked team as a bad team
2019-07-09 08:04:27 +08:00
2019-07-03 06:58:31 +08:00
self - > doBuildTeams = true ;
if ( self - > badTeamRemover . isReady ( ) ) {
self - > badTeamRemover = removeBadTeams ( self ) ;
self - > addActor . send ( self - > badTeamRemover ) ;
}
2019-07-10 02:45:17 +08:00
TraceEvent ( " ServerTeamRemover " , self - > distributorId )
2019-07-03 06:58:31 +08:00
. detail ( " ServerTeamToRemove " , st - > getServerIDsStr ( ) )
2020-10-24 01:06:22 +08:00
. detail ( " ServerTeamID " , st - > getTeamID ( ) )
2019-07-03 06:58:31 +08:00
. detail ( " NumProcessTeamsOnTheServerTeam " , maxNumProcessTeams )
2020-02-11 00:15:00 +08:00
. detail ( " CurrentServerTeams " , self - > teams . size ( ) )
. detail ( " DesiredServerTeams " , desiredServerTeams ) ;
2019-07-03 06:58:31 +08:00
numServerTeamRemoved + + ;
} else {
if ( numServerTeamRemoved > 0 ) {
// Only trace the information when we remove a machine team
2019-07-10 02:45:17 +08:00
TraceEvent ( " ServerTeamRemoverDone " , self - > distributorId )
2020-02-11 00:15:00 +08:00
. detail ( " CurrentServerTeams " , self - > teams . size ( ) )
. detail ( " DesiredServerTeams " , desiredServerTeams )
2019-07-03 06:58:31 +08:00
. detail ( " NumServerTeamRemoved " , numServerTeamRemoved ) ;
self - > traceTeamCollectionInfo ( ) ;
2021-03-11 02:06:03 +08:00
numServerTeamRemoved = 0 ; // Reset the counter to avoid keep printing the message
2019-07-03 06:58:31 +08:00
}
}
}
}
2020-10-21 07:40:56 +08:00
ACTOR Future < Void > zeroServerLeftLogger_impl ( DDTeamCollection * self , Reference < TCTeamInfo > team ) {
wait ( delay ( SERVER_KNOBS - > DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY ) ) ;
state vector < KeyRange > shards = self - > shardsAffectedByTeamFailure - > getShardsFor (
ShardsAffectedByTeamFailure : : Team ( team - > getServerIDs ( ) , self - > primary ) ) ;
state std : : vector < Future < StorageMetrics > > sizes ;
sizes . reserve ( shards . size ( ) ) ;
for ( auto const & shard : shards ) {
sizes . emplace_back ( brokenPromiseToNever ( self - > getShardMetrics . getReply ( GetMetricsRequest ( shard ) ) ) ) ;
2020-10-22 02:10:14 +08:00
TraceEvent ( SevWarnAlways , " DDShardLost " , self - > distributorId )
2020-10-24 01:06:22 +08:00
. detail ( " ServerTeamID " , team - > getTeamID ( ) )
2020-10-22 02:10:14 +08:00
. detail ( " ShardBegin " , shard . begin )
. detail ( " ShardEnd " , shard . end ) ;
2019-08-23 04:44:53 +08:00
}
2020-10-21 07:40:56 +08:00
wait ( waitForAll ( sizes ) ) ;
int64_t bytesLost = 0 ;
for ( auto const & size : sizes ) {
bytesLost + = size . get ( ) . bytes ;
}
2020-10-22 02:10:14 +08:00
TraceEvent ( SevWarnAlways , " DDZeroServerLeftInTeam " , self - > distributorId )
. detail ( " Team " , team - > getDesc ( ) )
. detail ( " TotalBytesLost " , bytesLost ) ;
2020-10-21 07:40:56 +08:00
return Void ( ) ;
2019-08-23 04:44:53 +08:00
}
bool teamContainsFailedServer ( DDTeamCollection * self , Reference < TCTeamInfo > team ) {
auto ssis = team - > getLastKnownServerInterfaces ( ) ;
2021-03-11 02:06:03 +08:00
for ( const auto & ssi : ssis ) {
2019-08-23 04:44:53 +08:00
AddressExclusion addr ( ssi . address ( ) . ip , ssi . address ( ) . port ) ;
AddressExclusion ipaddr ( ssi . address ( ) . ip ) ;
2019-09-25 01:04:56 +08:00
if ( self - > excludedServers . get ( addr ) = = DDTeamCollection : : Status : : FAILED | |
self - > excludedServers . get ( ipaddr ) = = DDTeamCollection : : Status : : FAILED ) {
2019-08-23 04:44:53 +08:00
return true ;
}
2021-03-11 02:06:03 +08:00
if ( ssi . secondaryAddress ( ) . present ( ) ) {
2020-04-11 04:45:16 +08:00
AddressExclusion saddr ( ssi . secondaryAddress ( ) . get ( ) . ip , ssi . secondaryAddress ( ) . get ( ) . port ) ;
AddressExclusion sipaddr ( ssi . secondaryAddress ( ) . get ( ) . ip ) ;
if ( self - > excludedServers . get ( saddr ) = = DDTeamCollection : : Status : : FAILED | |
2021-03-11 02:06:03 +08:00
self - > excludedServers . get ( sipaddr ) = = DDTeamCollection : : Status : : FAILED ) {
2020-04-11 04:45:16 +08:00
return true ;
}
}
2019-08-23 04:44:53 +08:00
}
return false ;
}
2017-05-26 04:48:44 +08:00
// Track a team and issue RelocateShards when the level of degradation changes
2019-07-03 06:16:34 +08:00
// A badTeam can be unhealthy or just a redundantTeam removed by machineTeamRemover() or serverTeamRemover()
2019-02-22 06:16:44 +08:00
ACTOR Future < Void > teamTracker ( DDTeamCollection * self , Reference < TCTeamInfo > team , bool badTeam , bool redundantTeam ) {
2019-02-11 00:58:56 +08:00
state int lastServersLeft = team - > size ( ) ;
2017-05-26 04:48:44 +08:00
state bool lastAnyUndesired = false ;
2021-06-05 02:35:35 +08:00
state bool lastAnyWigglingServer = false ;
2021-03-11 02:06:03 +08:00
state bool logTeamEvents =
g_network - > isSimulated ( ) | | ! badTeam | | team - > size ( ) < = self - > configuration . storageTeamSize ;
2018-11-03 04:00:15 +08:00
state bool lastReady = false ;
2018-09-22 07:14:39 +08:00
state bool lastHealthy ;
state bool lastOptimal ;
2017-05-26 04:48:44 +08:00
state bool lastWrongConfiguration = team - > isWrongConfiguration ( ) ;
2018-08-30 05:40:39 +08:00
2018-02-03 03:46:04 +08:00
state bool lastZeroHealthy = self - > zeroHealthyTeams - > get ( ) ;
2018-09-13 09:29:49 +08:00
state bool firstCheck = true ;
2018-01-31 09:00:51 +08:00
2020-10-21 07:40:56 +08:00
state Future < Void > zeroServerLeftLogger ;
2021-03-11 02:06:03 +08:00
if ( logTeamEvents ) {
2020-10-22 02:10:14 +08:00
TraceEvent ( " ServerTeamTrackerStarting " , self - > distributorId )
. detail ( " Reason " , " Initial wait complete (sc) " )
. detail ( " ServerTeam " , team - > getDesc ( ) ) ;
2018-10-18 02:45:47 +08:00
}
2018-08-10 04:16:09 +08:00
self - > priority_teams [ team - > getPriority ( ) ] + + ;
2017-05-26 04:48:44 +08:00
try {
loop {
2021-03-11 02:06:03 +08:00
if ( logTeamEvents ) {
2020-10-22 02:10:14 +08:00
TraceEvent ( " ServerTeamHealthChangeDetected " , self - > distributorId )
. detail ( " ServerTeam " , team - > getDesc ( ) )
. detail ( " Primary " , self - > primary )
. detail ( " IsReady " , self - > initialFailureReactionDelay . isReady ( ) ) ;
2019-07-09 14:01:25 +08:00
self - > traceTeamCollectionInfo ( ) ;
2019-02-21 06:18:36 +08:00
}
2021-03-06 03:28:15 +08:00
2017-05-26 04:48:44 +08:00
// Check if the number of degraded machines has changed
state vector < Future < Void > > change ;
bool anyUndesired = false ;
bool anyWrongConfiguration = false ;
2021-05-21 07:31:08 +08:00
bool anyWigglingServer = false ;
2021-07-12 11:36:10 +08:00
int serversLeft = 0 , serverUndesired = 0 , serverWrongConf = 0 , serverWiggling = 0 ;
2017-05-26 04:48:44 +08:00
2019-02-11 00:58:56 +08:00
for ( const UID & uid : team - > getServerIDs ( ) ) {
2021-03-11 02:06:03 +08:00
change . push_back ( self - > server_status . onChange ( uid ) ) ;
2019-02-11 00:58:56 +08:00
auto & status = self - > server_status . get ( uid ) ;
2018-09-01 08:54:55 +08:00
if ( ! status . isFailed ) {
serversLeft + + ;
}
if ( status . isUndesired ) {
2017-05-26 04:48:44 +08:00
anyUndesired = true ;
2021-07-12 11:36:10 +08:00
serverUndesired + + ;
2018-09-01 08:54:55 +08:00
}
if ( status . isWrongConfiguration ) {
2017-05-26 04:48:44 +08:00
anyWrongConfiguration = true ;
2021-07-12 11:36:10 +08:00
serverWrongConf + + ;
2018-09-01 08:54:55 +08:00
}
2021-05-21 07:31:08 +08:00
if ( status . isWiggling ) {
anyWigglingServer = true ;
2021-07-12 11:36:10 +08:00
serverWiggling + + ;
2021-05-21 07:31:08 +08:00
}
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
if ( serversLeft = = 0 ) {
2020-03-14 06:19:33 +08:00
logTeamEvents = true ;
}
2019-08-06 06:00:17 +08:00
// Failed server should not trigger DD if SS failures are set to be ignored
2021-03-11 02:06:03 +08:00
if ( ! badTeam & & self - > healthyZone . get ( ) . present ( ) & &
( self - > healthyZone . get ( ) . get ( ) = = ignoreSSFailuresZoneString ) ) {
2019-08-06 06:00:17 +08:00
ASSERT_WE_THINK ( serversLeft = = self - > configuration . storageTeamSize ) ;
}
2021-03-11 02:06:03 +08:00
if ( ! self - > initialFailureReactionDelay . isReady ( ) ) {
change . push_back ( self - > initialFailureReactionDelay ) ;
2018-04-09 12:24:05 +08:00
}
2021-03-11 02:06:03 +08:00
change . push_back ( self - > zeroHealthyTeams - > onChange ( ) ) ;
2017-05-26 04:48:44 +08:00
2021-06-12 06:58:05 +08:00
bool healthy = ! badTeam & & ! anyUndesired & & serversLeft = = self - > configuration . storageTeamSize ;
2021-03-11 02:06:03 +08:00
team - > setHealthy ( healthy ) ; // Unhealthy teams won't be chosen by bestTeam
2018-09-13 09:29:49 +08:00
bool optimal = team - > isOptimal ( ) & & healthy ;
2019-10-05 05:02:12 +08:00
bool containsFailed = teamContainsFailedServer ( self , team ) ;
2021-03-11 02:06:03 +08:00
bool recheck = ! healthy & & ( lastReady ! = self - > initialFailureReactionDelay . isReady ( ) | |
( lastZeroHealthy & & ! self - > zeroHealthyTeams - > get ( ) ) | | containsFailed ) ;
2021-03-06 03:28:15 +08:00
2020-07-17 01:22:18 +08:00
// TraceEvent("TeamHealthChangeDetected", self->distributorId)
// .detail("Team", team->getDesc())
// .detail("ServersLeft", serversLeft)
// .detail("LastServersLeft", lastServersLeft)
// .detail("AnyUndesired", anyUndesired)
// .detail("LastAnyUndesired", lastAnyUndesired)
// .detail("AnyWrongConfiguration", anyWrongConfiguration)
// .detail("LastWrongConfiguration", lastWrongConfiguration)
// .detail("Recheck", recheck)
// .detail("BadTeam", badTeam)
// .detail("LastZeroHealthy", lastZeroHealthy)
// .detail("ZeroHealthyTeam", self->zeroHealthyTeams->get());
2018-08-30 05:40:39 +08:00
2017-05-26 04:48:44 +08:00
lastReady = self - > initialFailureReactionDelay . isReady ( ) ;
2018-02-03 03:46:04 +08:00
lastZeroHealthy = self - > zeroHealthyTeams - > get ( ) ;
2017-05-26 04:48:44 +08:00
2018-09-13 09:29:49 +08:00
if ( firstCheck ) {
2018-09-22 07:14:39 +08:00
firstCheck = false ;
2018-09-13 09:29:49 +08:00
if ( healthy ) {
self - > healthyTeamCount + + ;
self - > zeroHealthyTeams - > set ( false ) ;
}
2018-09-22 07:14:39 +08:00
lastHealthy = healthy ;
2018-09-13 09:29:49 +08:00
if ( optimal ) {
self - > optimalTeamCount + + ;
self - > zeroOptimalTeams . set ( false ) ;
}
2018-09-22 07:14:39 +08:00
lastOptimal = optimal ;
2018-09-13 09:29:49 +08:00
}
2018-11-22 03:18:26 +08:00
if ( serversLeft ! = lastServersLeft | | anyUndesired ! = lastAnyUndesired | |
2021-06-05 02:35:35 +08:00
anyWrongConfiguration ! = lastWrongConfiguration | | anyWigglingServer ! = lastAnyWigglingServer | |
recheck ) { // NOTE: do not check wrongSize
2021-03-11 02:06:03 +08:00
if ( logTeamEvents ) {
2020-10-22 02:10:14 +08:00
TraceEvent ( " ServerTeamHealthChanged " , self - > distributorId )
. detail ( " ServerTeam " , team - > getDesc ( ) )
. detail ( " ServersLeft " , serversLeft )
. detail ( " LastServersLeft " , lastServersLeft )
. detail ( " ContainsUndesiredServer " , anyUndesired )
2021-06-05 02:35:35 +08:00
. detail ( " ContainsWigglingServer " , anyWigglingServer )
2020-10-22 02:10:14 +08:00
. detail ( " HealthyTeamsCount " , self - > healthyTeamCount )
. detail ( " IsWrongConfiguration " , anyWrongConfiguration ) ;
2018-10-18 02:45:47 +08:00
}
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
team - > setWrongConfiguration ( anyWrongConfiguration ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
if ( optimal ! = lastOptimal ) {
2018-09-22 07:14:39 +08:00
lastOptimal = optimal ;
2018-01-31 09:00:51 +08:00
self - > optimalTeamCount + = optimal ? 1 : - 1 ;
2021-03-11 02:06:03 +08:00
ASSERT ( self - > optimalTeamCount > = 0 ) ;
2018-01-31 09:00:51 +08:00
self - > zeroOptimalTeams . set ( self - > optimalTeamCount = = 0 ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
if ( lastHealthy ! = healthy ) {
2018-09-22 07:14:39 +08:00
lastHealthy = healthy ;
2018-11-22 03:18:26 +08:00
// Update healthy team count when the team healthy changes
self - > healthyTeamCount + = healthy ? 1 : - 1 ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
ASSERT ( self - > healthyTeamCount > = 0 ) ;
2018-02-03 03:46:04 +08:00
self - > zeroHealthyTeams - > set ( self - > healthyTeamCount = = 0 ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
if ( self - > healthyTeamCount = = 0 ) {
2020-10-22 02:10:14 +08:00
TraceEvent ( SevWarn , " ZeroServerTeamsHealthySignalling " , self - > distributorId )
. detail ( " SignallingTeam " , team - > getDesc ( ) )
. detail ( " Primary " , self - > primary ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
if ( logTeamEvents ) {
2020-10-22 07:52:17 +08:00
TraceEvent ( " ServerTeamHealthDifference " , self - > distributorId )
2020-10-22 02:10:14 +08:00
. detail ( " ServerTeam " , team - > getDesc ( ) )
. detail ( " LastOptimal " , lastOptimal )
. detail ( " LastHealthy " , lastHealthy )
. detail ( " Optimal " , optimal )
. detail ( " OptimalTeamCount " , self - > optimalTeamCount ) ;
2019-02-21 06:18:36 +08:00
}
2017-05-26 04:48:44 +08:00
}
lastServersLeft = serversLeft ;
lastAnyUndesired = anyUndesired ;
lastWrongConfiguration = anyWrongConfiguration ;
2021-06-05 02:35:35 +08:00
lastAnyWigglingServer = anyWigglingServer ;
2017-05-26 04:48:44 +08:00
state int lastPriority = team - > getPriority ( ) ;
2021-03-11 02:06:03 +08:00
if ( team - > size ( ) = = 0 ) {
team - > setPriority ( SERVER_KNOBS - > PRIORITY_POPULATE_REGION ) ;
} else if ( serversLeft < self - > configuration . storageTeamSize ) {
if ( serversLeft = = 0 )
team - > setPriority ( SERVER_KNOBS - > PRIORITY_TEAM_0_LEFT ) ;
else if ( serversLeft = = 1 )
team - > setPriority ( SERVER_KNOBS - > PRIORITY_TEAM_1_LEFT ) ;
else if ( serversLeft = = 2 )
team - > setPriority ( SERVER_KNOBS - > PRIORITY_TEAM_2_LEFT ) ;
2017-05-26 04:48:44 +08:00
else
2021-03-11 02:06:03 +08:00
team - > setPriority ( SERVER_KNOBS - > PRIORITY_TEAM_UNHEALTHY ) ;
2021-07-12 11:36:10 +08:00
} else if ( ! badTeam & & anyWigglingServer & & serverWiggling = = serverWrongConf & &
serverWiggling = = serverUndesired ) {
// the wrong configured and undesired server is the wiggling server
team - > setPriority ( SERVER_KNOBS - > PRIORITY_PERPETUAL_STORAGE_WIGGLE ) ;
2021-03-11 02:06:03 +08:00
} else if ( badTeam | | anyWrongConfiguration ) {
if ( redundantTeam ) {
team - > setPriority ( SERVER_KNOBS - > PRIORITY_TEAM_REDUNDANT ) ;
2019-02-22 06:16:44 +08:00
} else {
2021-03-11 02:06:03 +08:00
team - > setPriority ( SERVER_KNOBS - > PRIORITY_TEAM_UNHEALTHY ) ;
2019-02-22 06:16:44 +08:00
}
2021-03-11 02:06:03 +08:00
} else if ( anyUndesired ) {
team - > setPriority ( SERVER_KNOBS - > PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER ) ;
2020-03-05 06:07:32 +08:00
} else {
2021-03-11 02:06:03 +08:00
team - > setPriority ( SERVER_KNOBS - > PRIORITY_TEAM_HEALTHY ) ;
2020-03-05 06:07:32 +08:00
}
2018-08-10 04:16:09 +08:00
2021-03-11 02:06:03 +08:00
if ( lastPriority ! = team - > getPriority ( ) ) {
2018-08-10 04:16:09 +08:00
self - > priority_teams [ lastPriority ] - - ;
self - > priority_teams [ team - > getPriority ( ) ] + + ;
2020-10-21 07:40:56 +08:00
if ( lastPriority = = SERVER_KNOBS - > PRIORITY_TEAM_0_LEFT & &
team - > getPriority ( ) < SERVER_KNOBS - > PRIORITY_TEAM_0_LEFT ) {
zeroServerLeftLogger = Void ( ) ;
}
if ( logTeamEvents ) {
2020-10-22 02:24:18 +08:00
int dataLoss = team - > getPriority ( ) = = SERVER_KNOBS - > PRIORITY_TEAM_0_LEFT ;
Severity severity = dataLoss ? SevWarnAlways : SevInfo ;
2020-10-22 02:10:14 +08:00
TraceEvent ( severity , " ServerTeamPriorityChange " , self - > distributorId )
2020-10-21 07:40:56 +08:00
. detail ( " Priority " , team - > getPriority ( ) )
. detail ( " Info " , team - > getDesc ( ) )
2021-01-16 07:00:48 +08:00
. detail ( " ZeroHealthyServerTeams " , self - > zeroHealthyTeams - > get ( ) )
2021-03-11 02:06:03 +08:00
. detail ( " Hint " ,
severity = = SevWarnAlways ? " No replicas remain of some data "
: " The priority of this team changed " ) ;
2020-10-21 07:40:56 +08:00
if ( team - > getPriority ( ) = = SERVER_KNOBS - > PRIORITY_TEAM_0_LEFT ) {
// 0 servers left in this team, data might be lost.
zeroServerLeftLogger = zeroServerLeftLogger_impl ( self , team ) ;
}
}
2018-08-10 04:16:09 +08:00
}
2021-03-11 02:06:03 +08:00
lastZeroHealthy =
self - > zeroHealthyTeams - > get ( ) ; // set this again in case it changed from this teams health changing
2019-09-25 01:04:56 +08:00
if ( ( self - > initialFailureReactionDelay . isReady ( ) & & ! self - > zeroHealthyTeams - > get ( ) ) | | containsFailed ) {
2021-05-21 07:31:08 +08:00
2021-03-11 02:06:03 +08:00
vector < KeyRange > shards = self - > shardsAffectedByTeamFailure - > getShardsFor (
ShardsAffectedByTeamFailure : : Team ( team - > getServerIDs ( ) , self - > primary ) ) ;
2020-07-17 01:22:18 +08:00
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < shards . size ( ) ; i + + ) {
// Make it high priority to move keys off failed server or else RelocateShards may never be
// addressed
2019-10-22 04:31:35 +08:00
int maxPriority = containsFailed ? SERVER_KNOBS - > PRIORITY_TEAM_FAILED : team - > getPriority ( ) ;
2019-08-13 01:08:12 +08:00
// The shard split/merge and DD rebooting may make a shard mapped to multiple teams,
// so we need to recalculate the shard's priority
2019-10-22 04:31:35 +08:00
if ( maxPriority < SERVER_KNOBS - > PRIORITY_TEAM_FAILED ) {
2020-07-17 00:48:10 +08:00
std : : pair < vector < ShardsAffectedByTeamFailure : : Team > ,
vector < ShardsAffectedByTeamFailure : : Team > >
teams = self - > shardsAffectedByTeamFailure - > getTeamsFor ( shards [ i ] ) ;
2021-03-11 02:06:03 +08:00
for ( int j = 0 ; j < teams . first . size ( ) + teams . second . size ( ) ; j + + ) {
2019-02-13 06:57:33 +08:00
// t is the team in primary DC or the remote DC
2021-03-11 02:06:03 +08:00
auto & t =
j < teams . first . size ( ) ? teams . first [ j ] : teams . second [ j - teams . first . size ( ) ] ;
if ( ! t . servers . size ( ) ) {
maxPriority = std : : max ( maxPriority , SERVER_KNOBS - > PRIORITY_POPULATE_REGION ) ;
2018-11-12 04:33:31 +08:00
break ;
}
auto tc = self - > teamCollections [ t . primary ? 0 : 1 ] ;
2020-07-17 11:26:07 +08:00
if ( tc = = nullptr ) {
// teamTracker only works when all teamCollections are valid.
// Always check if all teamCollections are valid, and throw error if any
// teamCollection has been destructed, because the teamTracker can be triggered
// after a DDTeamCollection was destroyed and before the other DDTeamCollection is
// destroyed. Do not throw actor_cancelled() because flow treat it differently.
throw dd_cancelled ( ) ;
}
2018-11-12 04:33:31 +08:00
ASSERT ( tc - > primary = = t . primary ) ;
2020-07-17 01:22:18 +08:00
// tc->traceAllInfo();
2021-03-11 02:06:03 +08:00
if ( tc - > server_info . count ( t . servers [ 0 ] ) ) {
2018-11-12 04:33:31 +08:00
auto & info = tc - > server_info [ t . servers [ 0 ] ] ;
bool found = false ;
2021-03-11 02:06:03 +08:00
for ( int k = 0 ; k < info - > teams . size ( ) ; k + + ) {
if ( info - > teams [ k ] - > getServerIDs ( ) = = t . servers ) {
maxPriority = std : : max ( maxPriority , info - > teams [ k ] - > getPriority ( ) ) ;
2018-11-12 04:33:31 +08:00
found = true ;
2020-07-17 01:22:18 +08:00
2018-11-12 04:33:31 +08:00
break ;
2018-08-10 03:37:46 +08:00
}
2017-05-26 04:48:44 +08:00
}
2018-11-12 04:33:31 +08:00
2021-03-11 02:06:03 +08:00
// If we cannot find the team, it could be a bad team so assume unhealthy priority
if ( ! found ) {
2019-07-06 05:29:35 +08:00
// If the input team (in function parameters) is a redundant team, found will be
// false We want to differentiate the redundant_team from unhealthy_team in
// terms of relocate priority
maxPriority =
2021-03-11 02:06:03 +08:00
std : : max < int > ( maxPriority ,
redundantTeam ? SERVER_KNOBS - > PRIORITY_TEAM_REDUNDANT
: SERVER_KNOBS - > PRIORITY_TEAM_UNHEALTHY ) ;
2018-11-12 04:33:31 +08:00
}
} else {
2020-07-13 09:30:02 +08:00
TEST ( true ) ; // A removed server is still associated with a team in
// ShardsAffectedByTeamFailure
2018-08-10 03:37:46 +08:00
}
2017-05-26 04:48:44 +08:00
}
}
2018-11-12 04:33:31 +08:00
RelocateShard rs ;
rs . keys = shards [ i ] ;
rs . priority = maxPriority ;
self - > output . send ( rs ) ;
2020-07-18 05:10:17 +08:00
TraceEvent ( " SendRelocateToDDQueue " , self - > distributorId )
2020-07-17 11:26:07 +08:00
. suppressFor ( 1.0 )
2020-11-17 02:15:23 +08:00
. detail ( " ServerPrimary " , self - > primary )
. detail ( " ServerTeam " , team - > getDesc ( ) )
2020-07-17 11:26:07 +08:00
. detail ( " KeyBegin " , rs . keys . begin )
. detail ( " KeyEnd " , rs . keys . end )
. detail ( " Priority " , rs . priority )
2020-11-17 02:15:23 +08:00
. detail ( " ServerTeamFailedMachines " , team - > size ( ) - serversLeft )
. detail ( " ServerTeamOKMachines " , serversLeft ) ;
2017-05-26 04:48:44 +08:00
}
} else {
2021-03-11 02:06:03 +08:00
if ( logTeamEvents ) {
2020-10-22 02:10:14 +08:00
TraceEvent ( " ServerTeamHealthNotReady " , self - > distributorId )
. detail ( " HealthyServerTeamCount " , self - > healthyTeamCount )
2020-10-24 01:39:32 +08:00
. detail ( " ServerTeamID " , team - > getTeamID ( ) ) ;
2018-10-18 02:45:47 +08:00
}
2017-05-26 04:48:44 +08:00
}
}
// Wait for any of the machines to change status
2021-03-11 02:06:03 +08:00
wait ( quorum ( change , 1 ) ) ;
wait ( yield ( ) ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
} catch ( Error & e ) {
if ( logTeamEvents ) {
2020-07-17 00:48:10 +08:00
TraceEvent ( " TeamTrackerStopping " , self - > distributorId )
2020-11-17 02:15:23 +08:00
. detail ( " ServerPrimary " , self - > primary )
2020-07-17 00:48:10 +08:00
. detail ( " Team " , team - > getDesc ( ) )
. detail ( " Priority " , team - > getPriority ( ) ) ;
2019-02-21 06:18:36 +08:00
}
2018-08-10 04:16:09 +08:00
self - > priority_teams [ team - > getPriority ( ) ] - - ;
2019-07-12 13:05:20 +08:00
if ( team - > isHealthy ( ) ) {
2017-05-26 04:48:44 +08:00
self - > healthyTeamCount - - ;
2021-03-11 02:06:03 +08:00
ASSERT ( self - > healthyTeamCount > = 0 ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
if ( self - > healthyTeamCount = = 0 ) {
2020-07-17 00:48:10 +08:00
TraceEvent ( SevWarn , " ZeroTeamsHealthySignalling " , self - > distributorId )
2020-11-17 02:15:23 +08:00
. detail ( " ServerPrimary " , self - > primary )
2020-10-22 02:10:14 +08:00
. detail ( " SignallingServerTeam " , team - > getDesc ( ) ) ;
2018-02-03 03:46:04 +08:00
self - > zeroHealthyTeams - > set ( true ) ;
2017-05-26 04:48:44 +08:00
}
}
2019-07-19 07:47:42 +08:00
if ( lastOptimal ) {
self - > optimalTeamCount - - ;
2021-03-11 02:06:03 +08:00
ASSERT ( self - > optimalTeamCount > = 0 ) ;
2019-07-19 07:47:42 +08:00
self - > zeroOptimalTeams . set ( self - > optimalTeamCount = = 0 ) ;
}
2017-05-26 04:48:44 +08:00
throw ;
}
}
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > trackExcludedServers ( DDTeamCollection * self ) {
2019-10-04 04:18:05 +08:00
// Fetch the list of excluded servers
state ReadYourWritesTransaction tr ( self - > cx ) ;
2017-05-26 04:48:44 +08:00
loop {
2019-10-04 04:18:05 +08:00
try {
2019-10-04 04:24:28 +08:00
tr . setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
2021-05-04 04:14:16 +08:00
state Future < RangeResult > fresultsExclude = tr . getRange ( excludedServersKeys , CLIENT_KNOBS - > TOO_MANY ) ;
state Future < RangeResult > fresultsFailed = tr . getRange ( failedServersKeys , CLIENT_KNOBS - > TOO_MANY ) ;
2021-05-19 14:48:04 +08:00
state Future < RangeResult > flocalitiesExclude = tr . getRange ( excludedLocalityKeys , CLIENT_KNOBS - > TOO_MANY ) ;
state Future < RangeResult > flocalitiesFailed = tr . getRange ( failedLocalityKeys , CLIENT_KNOBS - > TOO_MANY ) ;
state Future < std : : vector < ProcessData > > fworkers = getWorkers ( self - > cx ) ;
2021-06-05 06:23:04 +08:00
wait ( success ( fresultsExclude ) & & success ( fresultsFailed ) & & success ( flocalitiesExclude ) & &
success ( flocalitiesFailed ) ) ;
2019-10-04 04:18:05 +08:00
2021-05-19 14:48:04 +08:00
state RangeResult excludedResults = fresultsExclude . get ( ) ;
2019-10-04 04:18:05 +08:00
ASSERT ( ! excludedResults . more & & excludedResults . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
2021-05-19 14:48:04 +08:00
state RangeResult failedResults = fresultsFailed . get ( ) ;
2019-10-04 04:18:05 +08:00
ASSERT ( ! failedResults . more & & failedResults . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
2021-05-19 14:48:04 +08:00
state RangeResult excludedLocalityResults = flocalitiesExclude . get ( ) ;
ASSERT ( ! excludedLocalityResults . more & & excludedLocalityResults . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
state RangeResult failedLocalityResults = flocalitiesFailed . get ( ) ;
ASSERT ( ! failedLocalityResults . more & & failedLocalityResults . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
state std : : set < AddressExclusion > excluded ;
state std : : set < AddressExclusion > failed ;
2019-10-17 02:30:20 +08:00
for ( const auto & r : excludedResults ) {
AddressExclusion addr = decodeExcludedServersKey ( r . key ) ;
2019-10-04 04:18:05 +08:00
if ( addr . isValid ( ) ) {
excluded . insert ( addr ) ;
2017-05-26 04:48:44 +08:00
}
2019-10-04 04:18:05 +08:00
}
2019-10-17 02:30:20 +08:00
for ( const auto & r : failedResults ) {
AddressExclusion addr = decodeFailedServersKey ( r . key ) ;
2019-10-04 04:18:05 +08:00
if ( addr . isValid ( ) ) {
failed . insert ( addr ) ;
2019-07-31 02:45:32 +08:00
}
2017-05-26 04:48:44 +08:00
}
2021-05-19 14:48:04 +08:00
wait ( success ( fworkers ) ) ;
std : : vector < ProcessData > workers = fworkers . get ( ) ;
for ( const auto & r : excludedLocalityResults ) {
std : : string locality = decodeExcludedLocalityKey ( r . key ) ;
std : : set < AddressExclusion > localityExcludedAddresses = getAddressesByLocality ( workers , locality ) ;
excluded . insert ( localityExcludedAddresses . begin ( ) , localityExcludedAddresses . end ( ) ) ;
}
for ( const auto & r : failedLocalityResults ) {
std : : string locality = decodeFailedLocalityKey ( r . key ) ;
std : : set < AddressExclusion > localityFailedAddresses = getAddressesByLocality ( workers , locality ) ;
failed . insert ( localityFailedAddresses . begin ( ) , localityFailedAddresses . end ( ) ) ;
}
2019-10-04 04:18:05 +08:00
// Reset and reassign self->excludedServers based on excluded, but we only
// want to trigger entries that are different
2021-06-12 06:58:05 +08:00
// Do not retrigger and double-overwrite failed or wiggling servers
2019-10-04 04:18:05 +08:00
auto old = self - > excludedServers . getKeys ( ) ;
2019-10-17 02:30:20 +08:00
for ( const auto & o : old ) {
2021-06-12 06:58:05 +08:00
if ( ! excluded . count ( o ) & & ! failed . count ( o ) & &
! ( self - > excludedServers . count ( o ) & &
self - > excludedServers . get ( o ) = = DDTeamCollection : : Status : : WIGGLING ) ) {
2019-10-04 04:18:05 +08:00
self - > excludedServers . set ( o , DDTeamCollection : : Status : : NONE ) ;
2019-09-25 01:04:56 +08:00
}
2019-10-04 04:18:05 +08:00
}
2019-10-17 02:30:20 +08:00
for ( const auto & n : excluded ) {
2019-10-05 05:02:12 +08:00
if ( ! failed . count ( n ) ) {
2019-10-04 04:18:05 +08:00
self - > excludedServers . set ( n , DDTeamCollection : : Status : : EXCLUDED ) ;
2019-09-25 01:04:56 +08:00
}
2019-10-04 04:18:05 +08:00
}
2017-05-26 04:48:44 +08:00
2019-10-17 02:30:20 +08:00
for ( const auto & f : failed ) {
2019-10-04 04:18:05 +08:00
self - > excludedServers . set ( f , DDTeamCollection : : Status : : FAILED ) ;
2017-05-26 04:48:44 +08:00
}
2019-09-25 01:04:56 +08:00
2019-10-04 04:18:05 +08:00
TraceEvent ( " DDExcludedServersChanged " , self - > distributorId )
2021-05-19 14:48:04 +08:00
. detail ( " AddressesExcluded " , excludedResults . size ( ) )
. detail ( " AddressesFailed " , failedResults . size ( ) )
2021-06-05 06:23:04 +08:00
. detail ( " LocalitiesExcluded " , excludedLocalityResults . size ( ) )
. detail ( " LocalitiesFailed " , failedLocalityResults . size ( ) ) ;
2017-05-26 04:48:44 +08:00
2019-10-04 04:18:05 +08:00
self - > restartRecruiting . trigger ( ) ;
2021-05-19 14:48:04 +08:00
state Future < Void > watchFuture = tr . watch ( excludedServersVersionKey ) | | tr . watch ( failedServersVersionKey ) | |
tr . watch ( excludedLocalityVersionKey ) | | tr . watch ( failedLocalityVersionKey ) ;
2019-10-04 04:18:05 +08:00
wait ( tr . commit ( ) ) ;
wait ( watchFuture ) ;
tr . reset ( ) ;
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
}
2021-03-11 02:06:03 +08:00
ACTOR Future < vector < std : : pair < StorageServerInterface , ProcessClass > > > getServerListAndProcessClasses ( Transaction * tr ) {
2017-05-26 04:48:44 +08:00
state Future < vector < ProcessData > > workers = getWorkers ( tr ) ;
2021-05-04 04:14:16 +08:00
state Future < RangeResult > serverList = tr - > getRange ( serverListKeys , CLIENT_KNOBS - > TOO_MANY ) ;
2021-03-11 02:06:03 +08:00
wait ( success ( workers ) & & success ( serverList ) ) ;
ASSERT ( ! serverList . get ( ) . more & & serverList . get ( ) . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
2017-05-26 04:48:44 +08:00
std : : map < Optional < Standalone < StringRef > > , ProcessData > id_data ;
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < workers . get ( ) . size ( ) ; i + + )
2017-05-26 04:48:44 +08:00
id_data [ workers . get ( ) [ i ] . locality . processId ( ) ] = workers . get ( ) [ i ] ;
vector < std : : pair < StorageServerInterface , ProcessClass > > results ;
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < serverList . get ( ) . size ( ) ; i + + ) {
auto ssi = decodeServerListValue ( serverList . get ( ) [ i ] . value ) ;
2021-05-11 07:32:02 +08:00
results . emplace_back ( ssi , id_data [ ssi . locality . processId ( ) ] . processClass ) ;
2017-05-26 04:48:44 +08:00
}
return results ;
}
2021-06-03 02:49:01 +08:00
// Create a transaction reading the value of `wigglingStorageServerKey` and update it to the next Process ID according
2021-06-04 03:58:28 +08:00
// to a sorted PID set maintained by the data distributor. If now no storage server exists, the new Process ID is 0.
ACTOR Future < Void > updateNextWigglingStoragePID ( DDTeamCollection * teamCollection ) {
2021-05-20 11:32:15 +08:00
state ReadYourWritesTransaction tr ( teamCollection - > cx ) ;
2021-06-20 00:57:04 +08:00
state Value writeValue ;
2021-05-25 03:11:28 +08:00
loop {
try {
tr . setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
Optional < Value > value = wait ( tr . get ( wigglingStorageServerKey ) ) ;
if ( teamCollection - > pid2server_info . empty ( ) ) {
2021-06-20 00:57:04 +08:00
writeValue = LiteralStringRef ( " " ) ;
2021-05-25 03:11:28 +08:00
} else {
2021-05-26 02:43:08 +08:00
Value pid = teamCollection - > pid2server_info . begin ( ) - > first ;
2021-05-25 03:11:28 +08:00
if ( value . present ( ) ) {
auto nextIt = teamCollection - > pid2server_info . upper_bound ( value . get ( ) ) ;
if ( nextIt = = teamCollection - > pid2server_info . end ( ) ) {
2021-06-12 06:58:05 +08:00
writeValue = pid ;
2021-05-20 11:32:15 +08:00
} else {
2021-06-12 06:58:05 +08:00
writeValue = nextIt - > first ;
2021-05-20 11:32:15 +08:00
}
2021-05-25 03:11:28 +08:00
} else {
2021-06-12 06:58:05 +08:00
writeValue = pid ;
2021-05-20 11:32:15 +08:00
}
2021-05-25 03:11:28 +08:00
}
2021-06-20 00:57:04 +08:00
tr . set ( wigglingStorageServerKey , writeValue ) ;
2021-05-25 03:11:28 +08:00
wait ( tr . commit ( ) ) ;
break ;
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
}
}
2021-06-12 06:58:05 +08:00
TraceEvent ( SevDebug , " PerpetualNextWigglingStoragePID " , teamCollection - > distributorId )
. detail ( " WriteValue " , writeValue ) ;
2021-05-25 03:11:28 +08:00
return Void ( ) ;
}
2021-06-03 02:49:01 +08:00
// Iterate over each storage process to do storage wiggle. After initializing the first Process ID, it waits a signal
// from `perpetualStorageWiggler` indicating the wiggling of current process is finished. Then it writes the next
// Process ID to a system key: `wigglingStorageServerKey` to show the next process to wiggle.
2021-06-16 13:30:58 +08:00
ACTOR Future < Void > perpetualStorageWiggleIterator ( AsyncVar < bool > * stopSignal ,
2021-05-26 04:25:21 +08:00
FutureStream < Void > finishStorageWiggleSignal ,
2021-05-26 02:43:08 +08:00
DDTeamCollection * teamCollection ) {
2021-06-16 13:30:58 +08:00
loop {
choose {
when ( wait ( stopSignal - > onChange ( ) ) ) { }
2021-06-20 00:57:04 +08:00
when ( waitNext ( finishStorageWiggleSignal ) ) {
2021-06-23 13:49:55 +08:00
state bool takeRest = true ; // delay to avoid delete and update ServerList too frequently
while ( takeRest ) {
wait ( delayJittered ( SERVER_KNOBS - > PERPETUAL_WIGGLE_DELAY ) ) ;
// there must not have other teams to place wiggled data
takeRest = teamCollection - > server_info . size ( ) < = teamCollection - > configuration . storageTeamSize | |
2021-07-12 11:36:10 +08:00
teamCollection - > machine_info . size ( ) < teamCollection - > configuration . storageTeamSize ;
2021-06-20 00:57:04 +08:00
}
wait ( updateNextWigglingStoragePID ( teamCollection ) ) ;
}
2021-06-16 13:30:58 +08:00
}
if ( stopSignal - > get ( ) ) {
break ;
}
2021-05-20 11:32:15 +08:00
}
return Void ( ) ;
}
2021-06-03 02:49:01 +08:00
// Watch the value change of `wigglingStorageServerKey`.
// Return the watch future and the current value of `wigglingStorageServerKey`.
2021-06-12 06:58:05 +08:00
ACTOR Future < std : : pair < Future < Void > , Value > > watchPerpetualStoragePIDChange ( DDTeamCollection * self ) {
state ReadYourWritesTransaction tr ( self - > cx ) ;
2021-05-20 11:32:15 +08:00
state Future < Void > watchFuture ;
2021-05-26 04:25:21 +08:00
state Value ret ;
2021-05-20 11:32:15 +08:00
loop {
try {
tr . setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
2021-05-26 04:25:21 +08:00
Optional < Value > value = wait ( tr . get ( wigglingStorageServerKey ) ) ;
2021-05-20 11:32:15 +08:00
if ( value . present ( ) ) {
2021-05-26 04:25:21 +08:00
ret = value . get ( ) ;
2021-05-20 11:32:15 +08:00
}
watchFuture = tr . watch ( wigglingStorageServerKey ) ;
wait ( tr . commit ( ) ) ;
break ;
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
}
}
2021-05-26 04:25:21 +08:00
return std : : make_pair ( watchFuture , ret ) ;
2021-05-20 11:32:15 +08:00
}
2021-06-03 02:49:01 +08:00
2021-06-12 07:00:44 +08:00
// periodically check whether the cluster is healthy if we continue perpetual wiggle
2021-06-20 00:57:04 +08:00
ACTOR Future < Void > clusterHealthCheckForPerpetualWiggle ( DDTeamCollection * self , int * extraTeamCount ) {
2021-06-21 13:18:19 +08:00
state int pausePenalty = 1 ;
2021-06-12 07:00:44 +08:00
loop {
2021-06-15 07:00:02 +08:00
Promise < int > countp ;
self - > getUnhealthyRelocationCount . send ( countp ) ;
int count = wait ( countp . getFuture ( ) ) ;
2021-06-12 07:00:44 +08:00
// pause wiggle when
// a. DDQueue is busy with unhealthy relocation request
2021-06-20 00:57:04 +08:00
// b. healthy teams are not enough
2021-06-12 07:00:44 +08:00
// c. the overall disk space is not enough
2021-06-20 00:57:04 +08:00
if ( count > = SERVER_KNOBS - > DD_STORAGE_WIGGLE_PAUSE_THRESHOLD | | self - > healthyTeamCount < = * extraTeamCount | |
2021-07-12 11:36:10 +08:00
self - > bestTeamKeepStuckCount > SERVER_KNOBS - > DD_STORAGE_WIGGLE_STUCK_THRESHOLD ) {
2021-06-20 00:57:04 +08:00
// if we pause wiggle not because the reason a, increase extraTeamCount. This helps avoid oscillation
// between pause and non-pause status.
2021-07-12 11:36:10 +08:00
if ( ( self - > healthyTeamCount < = * extraTeamCount | |
self - > bestTeamKeepStuckCount > SERVER_KNOBS - > DD_STORAGE_WIGGLE_PAUSE_THRESHOLD ) & &
! self - > pauseWiggle - > get ( ) ) {
2021-06-21 13:18:19 +08:00
* extraTeamCount = std : : min ( * extraTeamCount + pausePenalty , ( int ) self - > teams . size ( ) ) ;
pausePenalty = std : : min ( pausePenalty * 2 , ( int ) self - > teams . size ( ) ) ;
2021-06-20 00:57:04 +08:00
}
2021-06-15 07:00:02 +08:00
self - > pauseWiggle - > set ( true ) ;
} else {
self - > pauseWiggle - > set ( false ) ;
}
wait ( delay ( SERVER_KNOBS - > CHECK_TEAM_DELAY , TaskPriority : : DataDistributionLow ) ) ;
}
2021-06-12 07:00:44 +08:00
}
2021-06-03 02:49:01 +08:00
// Watches the value (pid) change of \xff/storageWigglePID, and adds storage servers held on process of which the
// Process Id is “pid” into excludeServers which prevent recruiting the wiggling storage servers and let teamTracker
// start to move data off the affected teams. The wiggling process of current storage servers will be paused if the
// cluster is unhealthy and restarted once the cluster is healthy again.
2021-06-16 13:30:58 +08:00
ACTOR Future < Void > perpetualStorageWiggler ( AsyncVar < bool > * stopSignal ,
2021-05-26 04:25:21 +08:00
PromiseStream < Void > finishStorageWiggleSignal ,
2021-08-04 01:15:34 +08:00
DDTeamCollection * self ) {
2021-06-12 06:58:05 +08:00
state Future < Void > watchFuture = Never ( ) ;
2021-05-21 07:31:08 +08:00
state Future < Void > moveFinishFuture = Never ( ) ;
2021-06-20 00:57:04 +08:00
state int extraTeamCount = 0 ;
state Future < Void > ddQueueCheck = clusterHealthCheckForPerpetualWiggle ( self , & extraTeamCount ) ;
2021-05-27 01:06:35 +08:00
state int movingCount = 0 ;
2021-06-12 06:58:05 +08:00
state std : : pair < Future < Void > , Value > res = wait ( watchPerpetualStoragePIDChange ( self ) ) ;
ASSERT ( ! self - > wigglingPid . present ( ) ) ; // only single process wiggle is allowed
2021-06-02 14:12:45 +08:00
self - > wigglingPid = Optional < Key > ( res . second ) ;
2021-06-03 02:49:01 +08:00
2021-05-27 01:06:35 +08:00
loop {
2021-06-12 07:00:44 +08:00
if ( self - > wigglingPid . present ( ) ) {
StringRef pid = self - > wigglingPid . get ( ) ;
if ( self - > pauseWiggle - > get ( ) ) {
TEST ( true ) ; // paused because cluster is unhealthy
moveFinishFuture = Never ( ) ;
self - > includeStorageServersForWiggle ( ) ;
TraceEvent ( " PerpetualStorageWigglePause " , self - > distributorId )
. detail ( " ProcessId " , pid )
2021-07-12 11:36:10 +08:00
. detail ( " BestTeamKeepStuckCount " , self - > bestTeamKeepStuckCount )
2021-06-21 13:18:19 +08:00
. detail ( " ExtraHealthyTeamCount " , extraTeamCount )
. detail ( " HealthyTeamCount " , self - > healthyTeamCount )
2021-06-12 07:00:44 +08:00
. detail ( " StorageCount " , movingCount ) ;
} else {
2021-06-20 00:57:04 +08:00
TEST ( true ) ; // start wiggling
auto fv = self - > excludeStorageServersForWiggle ( pid ) ;
movingCount = fv . size ( ) ;
moveFinishFuture = waitForAll ( fv ) ;
TraceEvent ( " PerpetualStorageWiggleStart " , self - > distributorId )
. detail ( " ProcessId " , pid )
2021-06-21 13:18:19 +08:00
. detail ( " ExtraHealthyTeamCount " , extraTeamCount )
. detail ( " HealthyTeamCount " , self - > healthyTeamCount )
2021-06-20 00:57:04 +08:00
. detail ( " StorageCount " , movingCount ) ;
2021-05-27 01:06:35 +08:00
}
2021-06-12 07:00:44 +08:00
}
2021-06-15 07:00:02 +08:00
2021-06-12 07:00:44 +08:00
choose {
when ( wait ( watchFuture ) ) {
ASSERT ( ! self - > wigglingPid . present ( ) ) ; // the previous wiggle must be finished
watchFuture = Never ( ) ;
// read new pid and set the next watch Future
wait ( store ( res , watchPerpetualStoragePIDChange ( self ) ) ) ;
self - > wigglingPid = Optional < Key > ( res . second ) ;
2021-06-16 13:30:58 +08:00
// random delay
wait ( delayJittered ( 5.0 , TaskPriority : : DataDistributionLow ) ) ;
2021-05-21 07:31:08 +08:00
}
2021-05-27 01:06:35 +08:00
when ( wait ( moveFinishFuture ) ) {
2021-06-03 00:31:47 +08:00
ASSERT ( self - > wigglingPid . present ( ) ) ;
2021-06-02 14:12:45 +08:00
StringRef pid = self - > wigglingPid . get ( ) ;
2021-06-21 14:55:34 +08:00
TEST ( pid ! = LiteralStringRef ( " " ) ) ; // finish wiggling this process
2021-06-02 14:12:45 +08:00
2021-05-27 01:06:35 +08:00
moveFinishFuture = Never ( ) ;
2021-06-12 06:58:05 +08:00
self - > includeStorageServersForWiggle ( ) ;
2021-05-27 01:06:35 +08:00
TraceEvent ( " PerpetualStorageWiggleFinish " , self - > distributorId )
2021-06-02 14:12:45 +08:00
. detail ( " ProcessId " , pid . toString ( ) )
2021-05-27 01:06:35 +08:00
. detail ( " StorageCount " , movingCount ) ;
2021-06-02 14:12:45 +08:00
self - > wigglingPid . reset ( ) ;
2021-06-12 06:58:05 +08:00
watchFuture = res . first ;
2021-06-02 14:12:45 +08:00
finishStorageWiggleSignal . send ( Void ( ) ) ;
2021-06-20 00:57:04 +08:00
extraTeamCount = std : : max ( 0 , extraTeamCount - 1 ) ;
2021-06-02 14:12:45 +08:00
}
2021-06-16 13:30:58 +08:00
when ( wait ( ddQueueCheck | | self - > pauseWiggle - > onChange ( ) | | stopSignal - > onChange ( ) ) ) { }
}
if ( stopSignal - > get ( ) ) {
break ;
2021-05-20 11:32:15 +08:00
}
}
2021-05-21 07:31:08 +08:00
2021-06-03 02:49:01 +08:00
if ( self - > wigglingPid . present ( ) ) {
2021-06-12 06:58:05 +08:00
self - > includeStorageServersForWiggle ( ) ;
TraceEvent ( " PerpetualStorageWiggleExitingPause " , self - > distributorId )
. detail ( " ProcessId " , self - > wigglingPid . get ( ) ) ;
2021-06-04 03:58:28 +08:00
self - > wigglingPid . reset ( ) ;
2021-06-02 14:12:45 +08:00
}
2021-05-20 11:32:15 +08:00
return Void ( ) ;
}
2021-06-03 02:49:01 +08:00
// This coroutine sets a watch to monitor the value change of `perpetualStorageWiggleKey` which is controlled by command
2021-06-03 07:15:04 +08:00
// `configure perpetual_storage_wiggle=$value` if the value is 1, this actor start 2 actors,
2021-06-03 02:49:01 +08:00
// `perpetualStorageWiggleIterator` and `perpetualStorageWiggler`. Otherwise, it sends stop signal to them.
2021-08-04 01:15:34 +08:00
ACTOR Future < Void > monitorPerpetualStorageWiggle ( DDTeamCollection * teamCollection ) {
2021-05-20 11:32:15 +08:00
state int speed = 0 ;
2021-06-18 04:59:47 +08:00
state AsyncVar < bool > stopWiggleSignal ( true ) ;
2021-05-26 04:25:21 +08:00
state PromiseStream < Void > finishStorageWiggleSignal ;
2021-05-20 11:32:15 +08:00
state SignalableActorCollection collection ;
2021-06-15 07:00:02 +08:00
teamCollection - > pauseWiggle = makeReference < AsyncVar < bool > > ( true ) ;
2021-06-12 07:00:44 +08:00
2021-06-15 07:00:02 +08:00
loop {
2021-05-20 11:32:15 +08:00
state ReadYourWritesTransaction tr ( teamCollection - > cx ) ;
loop {
try {
tr . setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
Optional < Standalone < StringRef > > value = wait ( tr . get ( perpetualStorageWiggleKey ) ) ;
if ( value . present ( ) ) {
speed = std : : stoi ( value . get ( ) . toString ( ) ) ;
}
state Future < Void > watchFuture = tr . watch ( perpetualStorageWiggleKey ) ;
wait ( tr . commit ( ) ) ;
ASSERT ( speed = = 1 | | speed = = 0 ) ;
2021-06-16 13:30:58 +08:00
if ( speed = = 1 & & stopWiggleSignal . get ( ) ) { // avoid duplicated start
stopWiggleSignal . set ( false ) ;
2021-05-27 01:06:35 +08:00
collection . add ( perpetualStorageWiggleIterator (
& stopWiggleSignal , finishStorageWiggleSignal . getFuture ( ) , teamCollection ) ) ;
2021-08-04 01:15:34 +08:00
collection . add (
perpetualStorageWiggler ( & stopWiggleSignal , finishStorageWiggleSignal , teamCollection ) ) ;
2021-07-27 10:55:10 +08:00
TraceEvent ( " PerpetualStorageWiggleOpen " , teamCollection - > distributorId ) . log ( ) ;
2021-06-18 04:59:47 +08:00
} else if ( speed = = 0 ) {
if ( ! stopWiggleSignal . get ( ) ) {
stopWiggleSignal . set ( true ) ;
wait ( collection . signalAndReset ( ) ) ;
teamCollection - > pauseWiggle - > set ( true ) ;
}
2021-07-27 10:55:10 +08:00
TraceEvent ( " PerpetualStorageWiggleClose " , teamCollection - > distributorId ) . log ( ) ;
2021-05-20 11:32:15 +08:00
}
wait ( watchFuture ) ;
break ;
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
}
}
}
}
2019-08-20 04:47:48 +08:00
// The serverList system keyspace keeps the StorageServerInterface for each serverID. Storage server's storeType
2019-08-13 01:08:12 +08:00
// and serverID are decided by the server's filename. By parsing storage server file's filename on each disk, process on
// each machine creates the TCServer with the correct serverID and StorageServerInterface.
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > waitServerListChange ( DDTeamCollection * self ,
FutureStream < Void > serverRemoved ,
2020-09-28 06:26:50 +08:00
const DDEnabledState * ddEnabledState ) {
2019-09-28 10:39:53 +08:00
state Future < Void > checkSignal = delay ( SERVER_KNOBS - > SERVER_LIST_DELAY , TaskPriority : : DataDistributionLaunch ) ;
2017-05-26 04:48:44 +08:00
state Future < vector < std : : pair < StorageServerInterface , ProcessClass > > > serverListAndProcessClasses = Never ( ) ;
state bool isFetchingResults = false ;
2018-11-10 02:07:55 +08:00
state Transaction tr ( self - > cx ) ;
2017-05-26 04:48:44 +08:00
loop {
try {
choose {
2021-03-11 02:06:03 +08:00
when ( wait ( checkSignal ) ) {
2017-05-26 04:48:44 +08:00
checkSignal = Never ( ) ;
isFetchingResults = true ;
serverListAndProcessClasses = getServerListAndProcessClasses ( & tr ) ;
}
2021-03-11 02:06:03 +08:00
when ( vector < std : : pair < StorageServerInterface , ProcessClass > > results =
wait ( serverListAndProcessClasses ) ) {
2017-05-26 04:48:44 +08:00
serverListAndProcessClasses = Never ( ) ;
isFetchingResults = false ;
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < results . size ( ) ; i + + ) {
2017-05-26 04:48:44 +08:00
UID serverId = results [ i ] . first . id ( ) ;
StorageServerInterface const & ssi = results [ i ] . first ;
ProcessClass const & processClass = results [ i ] . second ;
2017-10-11 01:36:33 +08:00
if ( ! self - > shouldHandleServer ( ssi ) ) {
continue ;
2021-03-06 03:28:15 +08:00
} else if ( self - > server_and_tss_info . count ( serverId ) ) {
auto & serverInfo = self - > server_and_tss_info [ serverId ] ;
2021-03-11 02:06:03 +08:00
if ( ssi . getValue . getEndpoint ( ) ! = serverInfo - > lastKnownInterface . getValue . getEndpoint ( ) | |
processClass ! = serverInfo - > lastKnownClass . classType ( ) ) {
Promise < std : : pair < StorageServerInterface , ProcessClass > > currentInterfaceChanged =
serverInfo - > interfaceChanged ;
serverInfo - > interfaceChanged =
Promise < std : : pair < StorageServerInterface , ProcessClass > > ( ) ;
serverInfo - > onInterfaceChanged =
Future < std : : pair < StorageServerInterface , ProcessClass > > (
serverInfo - > interfaceChanged . getFuture ( ) ) ;
currentInterfaceChanged . send ( std : : make_pair ( ssi , processClass ) ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
} else if ( ! self - > recruitingIds . count ( ssi . id ( ) ) ) {
self - > addServer ( ssi ,
processClass ,
self - > serverTrackerErrorOut ,
tr . getReadVersion ( ) . get ( ) ,
2020-09-28 06:26:50 +08:00
ddEnabledState ) ;
2021-05-13 02:53:20 +08:00
if ( ! ssi . isTss ( ) ) {
2021-03-06 03:28:15 +08:00
self - > doBuildTeams = true ;
}
2017-05-26 04:48:44 +08:00
}
}
2018-11-10 02:07:55 +08:00
tr = Transaction ( self - > cx ) ;
2019-09-28 10:39:53 +08:00
checkSignal = delay ( SERVER_KNOBS - > SERVER_LIST_DELAY , TaskPriority : : DataDistributionLaunch ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
when ( waitNext ( serverRemoved ) ) {
if ( isFetchingResults ) {
2018-11-10 02:07:55 +08:00
tr = Transaction ( self - > cx ) ;
2017-05-26 04:48:44 +08:00
serverListAndProcessClasses = getServerListAndProcessClasses ( & tr ) ;
}
}
}
2021-03-11 02:06:03 +08:00
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
serverListAndProcessClasses = Never ( ) ;
isFetchingResults = false ;
checkSignal = Void ( ) ;
}
}
}
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > waitHealthyZoneChange ( DDTeamCollection * self ) {
2019-04-02 08:55:13 +08:00
state ReadYourWritesTransaction tr ( self - > cx ) ;
loop {
try {
tr . setOption ( FDBTransactionOptions : : READ_SYSTEM_KEYS ) ;
2019-04-03 05:27:48 +08:00
tr . setOption ( FDBTransactionOptions : : LOCK_AWARE ) ;
2019-04-02 08:55:13 +08:00
Optional < Value > val = wait ( tr . get ( healthyZoneKey ) ) ;
2019-05-22 04:49:16 +08:00
state Future < Void > healthyZoneTimeout = Never ( ) ;
2021-03-11 02:06:03 +08:00
if ( val . present ( ) ) {
2019-04-02 08:55:13 +08:00
auto p = decodeHealthyZoneValue ( val . get ( ) ) ;
2019-07-19 04:18:36 +08:00
if ( p . first = = ignoreSSFailuresZoneString ) {
2019-07-12 05:53:00 +08:00
// healthyZone is now overloaded for DD diabling purpose, which does not timeout
2021-07-27 10:55:10 +08:00
TraceEvent ( " DataDistributionDisabledForStorageServerFailuresStart " , self - > distributorId ) . log ( ) ;
2019-07-12 05:53:00 +08:00
healthyZoneTimeout = Never ( ) ;
} else if ( p . second > tr . getReadVersion ( ) . get ( ) ) {
2021-03-11 02:06:03 +08:00
double timeoutSeconds =
( p . second - tr . getReadVersion ( ) . get ( ) ) / ( double ) SERVER_KNOBS - > VERSIONS_PER_SECOND ;
2019-09-28 09:33:13 +08:00
healthyZoneTimeout = delay ( timeoutSeconds , TaskPriority : : DataDistribution ) ;
2021-03-11 02:06:03 +08:00
if ( self - > healthyZone . get ( ) ! = p . first ) {
TraceEvent ( " MaintenanceZoneStart " , self - > distributorId )
. detail ( " ZoneID " , printable ( p . first ) )
. detail ( " EndVersion " , p . second )
. detail ( " Duration " , timeoutSeconds ) ;
2019-07-03 07:25:29 +08:00
self - > healthyZone . set ( p . first ) ;
}
2019-07-12 05:53:00 +08:00
} else if ( self - > healthyZone . get ( ) . present ( ) ) {
2019-07-19 04:18:36 +08:00
// maintenance hits timeout
2021-07-27 10:55:10 +08:00
TraceEvent ( " MaintenanceZoneEndTimeout " , self - > distributorId ) . log ( ) ;
2019-04-02 08:55:13 +08:00
self - > healthyZone . set ( Optional < Key > ( ) ) ;
}
2021-03-11 02:06:03 +08:00
} else if ( self - > healthyZone . get ( ) . present ( ) ) {
2019-07-19 04:18:36 +08:00
// `healthyZone` has been cleared
if ( self - > healthyZone . get ( ) . get ( ) = = ignoreSSFailuresZoneString ) {
2021-07-27 10:55:10 +08:00
TraceEvent ( " DataDistributionDisabledForStorageServerFailuresEnd " , self - > distributorId ) . log ( ) ;
2019-07-19 04:18:36 +08:00
} else {
2021-07-27 10:55:10 +08:00
TraceEvent ( " MaintenanceZoneEndManualClear " , self - > distributorId ) . log ( ) ;
2019-07-19 04:18:36 +08:00
}
2019-04-02 08:55:13 +08:00
self - > healthyZone . set ( Optional < Key > ( ) ) ;
}
2019-07-10 07:09:51 +08:00
2019-04-02 08:55:13 +08:00
state Future < Void > watchFuture = tr . watch ( healthyZoneKey ) ;
wait ( tr . commit ( ) ) ;
2019-05-22 04:49:16 +08:00
wait ( watchFuture | | healthyZoneTimeout ) ;
2019-04-03 05:15:51 +08:00
tr . reset ( ) ;
2021-03-11 02:06:03 +08:00
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
2019-04-02 08:55:13 +08:00
}
}
}
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > serverMetricsPolling ( TCServerInfo * server ) {
2017-05-26 04:48:44 +08:00
state double lastUpdate = now ( ) ;
loop {
2021-03-11 02:06:03 +08:00
wait ( updateServerMetrics ( server ) ) ;
wait ( delayUntil ( lastUpdate + SERVER_KNOBS - > STORAGE_METRICS_POLLING_DELAY +
SERVER_KNOBS - > STORAGE_METRICS_RANDOM_DELAY * deterministicRandom ( ) - > random01 ( ) ,
TaskPriority : : DataDistributionLaunch ) ) ;
2017-05-26 04:48:44 +08:00
lastUpdate = now ( ) ;
}
}
2019-08-20 06:54:57 +08:00
// Set the server's storeType; Error is catched by the caller
2019-08-17 07:46:54 +08:00
ACTOR Future < Void > keyValueStoreTypeTracker ( DDTeamCollection * self , TCServerInfo * server ) {
2019-08-20 06:54:57 +08:00
// Update server's storeType, especially when it was created
state KeyValueStoreType type =
wait ( brokenPromiseToNever ( server - > lastKnownInterface . getKeyValueStoreType . getReplyWithTaskID < KeyValueStoreType > (
TaskPriority : : DataDistribution ) ) ) ;
server - > storeType = type ;
2017-05-26 04:48:44 +08:00
2019-08-20 06:54:57 +08:00
if ( type ! = self - > configuration . storageServerStoreType ) {
if ( self - > wrongStoreTypeRemover . isReady ( ) ) {
self - > wrongStoreTypeRemover = removeWrongStoreType ( self ) ;
self - > addActor . send ( self - > wrongStoreTypeRemover ) ;
}
2019-08-17 01:48:50 +08:00
}
2017-05-26 04:48:44 +08:00
2019-08-20 06:54:57 +08:00
return Never ( ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > waitForAllDataRemoved ( Database cx , UID serverID , Version addedVersion , DDTeamCollection * teams ) {
2021-03-06 03:28:15 +08:00
state Reference < ReadYourWritesTransaction > tr = makeReference < ReadYourWritesTransaction > ( cx ) ;
2019-02-08 07:31:03 +08:00
loop {
try {
2021-03-06 03:28:15 +08:00
tr - > setOption ( FDBTransactionOptions : : PRIORITY_SYSTEM_IMMEDIATE ) ;
tr - > setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
Version ver = wait ( tr - > getReadVersion ( ) ) ;
2019-02-08 07:31:03 +08:00
2021-03-11 02:06:03 +08:00
// we cannot remove a server immediately after adding it, because a perfectly timed master recovery could
// cause us to not store the mutations sent to the short lived storage server.
if ( ver > addedVersion + SERVER_KNOBS - > MAX_READ_TRANSACTION_LIFE_VERSIONS ) {
2021-03-06 03:28:15 +08:00
bool canRemove = wait ( canRemoveStorageServer ( tr , serverID ) ) ;
2019-08-20 04:47:48 +08:00
// TraceEvent("WaitForAllDataRemoved")
// .detail("Server", serverID)
// .detail("CanRemove", canRemove)
// .detail("Shards", teams->shardsAffectedByTeamFailure->getNumberOfShards(serverID));
2019-08-17 06:13:53 +08:00
ASSERT ( teams - > shardsAffectedByTeamFailure - > getNumberOfShards ( serverID ) > = 0 ) ;
2019-02-08 07:31:03 +08:00
if ( canRemove & & teams - > shardsAffectedByTeamFailure - > getNumberOfShards ( serverID ) = = 0 ) {
return Void ( ) ;
}
}
// Wait for any change to the serverKeys for this server
2021-03-11 02:06:03 +08:00
wait ( delay ( SERVER_KNOBS - > ALL_DATA_REMOVED_DELAY , TaskPriority : : DataDistribution ) ) ;
2021-03-06 03:28:15 +08:00
tr - > reset ( ) ;
2019-02-08 07:31:03 +08:00
} catch ( Error & e ) {
2021-03-06 03:28:15 +08:00
wait ( tr - > onError ( e ) ) ;
2019-02-08 07:31:03 +08:00
}
}
}
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > storageServerFailureTracker ( DDTeamCollection * self ,
TCServerInfo * server ,
Database cx ,
ServerStatus * status ,
Version addedVersion ) {
2018-11-10 10:06:00 +08:00
state StorageServerInterface interf = server - > lastKnownInterface ;
2021-03-11 02:06:03 +08:00
state int targetTeamNumPerServer =
( SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * ( self - > configuration . storageTeamSize + 1 ) ) / 2 ;
2018-11-10 10:06:00 +08:00
loop {
2019-07-17 06:12:18 +08:00
state bool inHealthyZone = false ; // healthChanged actor will be Never() if this flag is true
2019-07-10 07:09:51 +08:00
if ( self - > healthyZone . get ( ) . present ( ) ) {
if ( interf . locality . zoneId ( ) = = self - > healthyZone . get ( ) ) {
status - > isFailed = false ;
inHealthyZone = true ;
2019-07-19 04:18:36 +08:00
} else if ( self - > healthyZone . get ( ) . get ( ) = = ignoreSSFailuresZoneString ) {
2019-07-10 07:09:51 +08:00
// Ignore all SS failures
status - > isFailed = false ;
2019-07-17 06:12:18 +08:00
inHealthyZone = true ;
2019-07-10 07:09:51 +08:00
TraceEvent ( " SSFailureTracker " , self - > distributorId )
2019-07-17 06:12:18 +08:00
. suppressFor ( 1.0 )
2019-07-10 07:09:51 +08:00
. detail ( " IgnoredFailure " , " BeforeChooseWhen " )
. detail ( " ServerID " , interf . id ( ) )
. detail ( " Status " , status - > toString ( ) ) ;
}
2019-04-02 08:55:13 +08:00
}
2021-05-13 02:53:20 +08:00
if ( ! interf . isTss ( ) ) {
2021-03-06 03:28:15 +08:00
if ( self - > server_status . get ( interf . id ( ) ) . initialized ) {
bool unhealthy = self - > server_status . get ( interf . id ( ) ) . isUnhealthy ( ) ;
if ( unhealthy & & ! status - > isUnhealthy ( ) ) {
self - > unhealthyServers - - ;
}
if ( ! unhealthy & & status - > isUnhealthy ( ) ) {
self - > unhealthyServers + + ;
}
} else if ( status - > isUnhealthy ( ) ) {
2018-11-10 10:06:00 +08:00
self - > unhealthyServers + + ;
}
}
2021-03-11 02:06:03 +08:00
self - > server_status . set ( interf . id ( ) , * status ) ;
2019-08-13 01:08:12 +08:00
if ( status - > isFailed ) {
2018-11-10 10:06:00 +08:00
self - > restartRecruiting . trigger ( ) ;
2019-08-13 01:08:12 +08:00
}
2018-11-10 10:06:00 +08:00
2019-04-02 08:55:13 +08:00
Future < Void > healthChanged = Never ( ) ;
2021-03-11 02:06:03 +08:00
if ( status - > isFailed ) {
2019-04-02 08:55:13 +08:00
ASSERT ( ! inHealthyZone ) ;
2021-03-11 02:06:03 +08:00
healthChanged =
IFailureMonitor : : failureMonitor ( ) . onStateEqual ( interf . waitFailure . getEndpoint ( ) , FailureStatus ( false ) ) ;
} else if ( ! inHealthyZone ) {
healthChanged = waitFailureClientStrict ( interf . waitFailure ,
SERVER_KNOBS - > DATA_DISTRIBUTION_FAILURE_REACTION_TIME ,
TaskPriority : : DataDistribution ) ;
2019-04-02 08:55:13 +08:00
}
2018-11-10 10:06:00 +08:00
choose {
2021-03-11 02:06:03 +08:00
when ( wait ( healthChanged ) ) {
2018-11-10 10:06:00 +08:00
status - > isFailed = ! status - > isFailed ;
2021-05-13 02:53:20 +08:00
if ( ! status - > isFailed & & ! server - > lastKnownInterface . isTss ( ) & &
2021-03-11 02:06:03 +08:00
( server - > teams . size ( ) < targetTeamNumPerServer | | self - > lastBuildTeamsFailed ) ) {
2018-11-10 10:06:00 +08:00
self - > doBuildTeams = true ;
}
2019-07-10 07:09:51 +08:00
if ( status - > isFailed & & self - > healthyZone . get ( ) . present ( ) ) {
2019-07-19 04:18:36 +08:00
if ( self - > healthyZone . get ( ) . get ( ) = = ignoreSSFailuresZoneString ) {
2019-07-10 07:09:51 +08:00
// Ignore the failed storage server
TraceEvent ( " SSFailureTracker " , self - > distributorId )
. detail ( " IgnoredFailure " , " InsideChooseWhen " )
. detail ( " ServerID " , interf . id ( ) )
. detail ( " Status " , status - > toString ( ) ) ;
status - > isFailed = false ;
} else if ( self - > clearHealthyZoneFuture . isReady ( ) ) {
self - > clearHealthyZoneFuture = clearHealthyZone ( self - > cx ) ;
2021-07-27 10:55:10 +08:00
TraceEvent ( " MaintenanceZoneCleared " , self - > distributorId ) . log ( ) ;
2019-07-10 07:09:51 +08:00
self - > healthyZone . set ( Optional < Key > ( ) ) ;
}
2019-04-03 05:15:51 +08:00
}
2018-11-10 10:06:00 +08:00
2019-08-20 04:47:48 +08:00
// TraceEvent("StatusMapChange", self->distributorId)
// .detail("ServerID", interf.id())
// .detail("Status", status->toString())
// .detail("Available",
// IFailureMonitor::failureMonitor().getState(interf.waitFailure.getEndpoint()).isAvailable());
2018-11-10 10:06:00 +08:00
}
2021-03-11 02:06:03 +08:00
when ( wait ( status - > isUnhealthy ( ) ? waitForAllDataRemoved ( cx , interf . id ( ) , addedVersion , self ) : Never ( ) ) ) {
break ;
}
when ( wait ( self - > healthyZone . onChange ( ) ) ) { }
2018-11-10 10:06:00 +08:00
}
}
2019-07-17 06:12:18 +08:00
return Void ( ) ; // Don't ignore failures
2018-11-10 10:06:00 +08:00
}
2018-08-30 05:40:39 +08:00
// Check the status of a storage server.
// Apply all requirements to the server and mark it as excluded if it fails to satisfies these requirements
2017-05-26 04:48:44 +08:00
ACTOR Future < Void > storageServerTracker (
2021-03-11 02:06:03 +08:00
DDTeamCollection * self ,
Database cx ,
2019-08-13 01:08:12 +08:00
TCServerInfo * server , // This actor is owned by this TCServerInfo, point to server_info[id]
2021-03-11 02:06:03 +08:00
Promise < Void > errorOut ,
Version addedVersion ,
2021-03-06 03:28:15 +08:00
const DDEnabledState * ddEnabledState ,
bool isTss ) {
2017-05-26 04:48:44 +08:00
state Future < Void > failureTracker ;
2021-06-12 06:58:05 +08:00
state ServerStatus status ( false , false , false , server - > lastKnownInterface . locality ) ;
2018-06-08 05:05:53 +08:00
state bool lastIsUnhealthy = false ;
2021-03-11 02:06:03 +08:00
state Future < Void > metricsTracker = serverMetricsPolling ( server ) ;
2020-02-20 06:13:27 +08:00
2017-05-26 04:48:44 +08:00
state Future < std : : pair < StorageServerInterface , ProcessClass > > interfaceChanged = server - > onInterfaceChanged ;
2021-03-06 03:28:15 +08:00
state Future < Void > storeTypeTracker = ( isTss ) ? Never ( ) : keyValueStoreTypeTracker ( self , server ) ;
2019-10-03 05:48:35 +08:00
state bool hasWrongDC = ! isCorrectDC ( self , server ) ;
state bool hasInvalidLocality =
! self - > isValidLocality ( self - > configuration . storagePolicy , server - > lastKnownInterface . locality ) ;
2021-03-11 02:06:03 +08:00
state int targetTeamNumPerServer =
( SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * ( self - > configuration . storageTeamSize + 1 ) ) / 2 ;
2017-05-26 04:48:44 +08:00
try {
loop {
2020-04-07 14:37:11 +08:00
status . isUndesired = ! self - > disableFailingLaggingServers . get ( ) & & server - > ssVersionTooFarBehind . get ( ) ;
2017-05-26 04:48:44 +08:00
status . isWrongConfiguration = false ;
2021-06-12 06:58:05 +08:00
status . isWiggling = false ;
2019-10-03 05:48:35 +08:00
hasWrongDC = ! isCorrectDC ( self , server ) ;
hasInvalidLocality =
! self - > isValidLocality ( self - > configuration . storagePolicy , server - > lastKnownInterface . locality ) ;
2017-05-26 04:48:44 +08:00
2019-08-13 01:08:12 +08:00
// If there is any other server on this exact NetworkAddress, this server is undesired and will eventually
// be eliminated. This samAddress checking must be redo whenever the server's state (e.g., storeType,
// dcLocation, interface) is changed.
2017-05-26 04:48:44 +08:00
state std : : vector < Future < Void > > otherChanges ;
std : : vector < Promise < Void > > wakeUpTrackers ;
2021-03-06 03:28:15 +08:00
for ( const auto & i : self - > server_and_tss_info ) {
2021-03-11 02:06:03 +08:00
if ( i . second . getPtr ( ) ! = server & &
i . second - > lastKnownInterface . address ( ) = = server - > lastKnownInterface . address ( ) ) {
auto & statusInfo = self - > server_status . get ( i . first ) ;
2019-02-13 06:02:21 +08:00
TraceEvent ( " SameAddress " , self - > distributorId )
2021-03-11 02:06:03 +08:00
. detail ( " Failed " , statusInfo . isFailed )
. detail ( " Undesired " , statusInfo . isUndesired )
. detail ( " Server " , server - > id )
. detail ( " OtherServer " , i . second - > id )
. detail ( " Address " , server - > lastKnownInterface . address ( ) )
. detail ( " NumShards " , self - > shardsAffectedByTeamFailure - > getNumberOfShards ( server - > id ) )
. detail ( " OtherNumShards " , self - > shardsAffectedByTeamFailure - > getNumberOfShards ( i . second - > id ) )
. detail ( " OtherHealthy " , ! self - > server_status . get ( i . second - > id ) . isUnhealthy ( ) ) ;
2018-11-22 03:18:26 +08:00
// wait for the server's ip to be changed
2019-02-13 06:02:21 +08:00
otherChanges . push_back ( self - > server_status . onChange ( i . second - > id ) ) ;
2019-08-14 06:44:46 +08:00
if ( ! self - > server_status . get ( i . second - > id ) . isUnhealthy ( ) ) {
2021-03-11 02:06:03 +08:00
if ( self - > shardsAffectedByTeamFailure - > getNumberOfShards ( i . second - > id ) > =
self - > shardsAffectedByTeamFailure - > getNumberOfShards ( server - > id ) ) {
2019-02-13 06:02:21 +08:00
TraceEvent ( SevWarn , " UndesiredStorageServer " , self - > distributorId )
2021-03-11 02:06:03 +08:00
. detail ( " Server " , server - > id )
. detail ( " Address " , server - > lastKnownInterface . address ( ) )
. detail ( " OtherServer " , i . second - > id )
. detail ( " NumShards " , self - > shardsAffectedByTeamFailure - > getNumberOfShards ( server - > id ) )
. detail ( " OtherNumShards " ,
self - > shardsAffectedByTeamFailure - > getNumberOfShards ( i . second - > id ) ) ;
2017-05-26 04:48:44 +08:00
status . isUndesired = true ;
2021-03-11 02:06:03 +08:00
} else
2019-02-13 06:02:21 +08:00
wakeUpTrackers . push_back ( i . second - > wakeUpTracker ) ;
2017-05-26 04:48:44 +08:00
}
}
}
2021-03-11 02:06:03 +08:00
for ( auto & p : wakeUpTrackers ) {
if ( ! p . isSet ( ) )
2017-05-26 04:48:44 +08:00
p . send ( Void ( ) ) ;
}
2021-03-11 02:06:03 +08:00
if ( server - > lastKnownClass . machineClassFitness ( ProcessClass : : Storage ) > ProcessClass : : UnsetFit ) {
// NOTE: Should not use self->healthyTeamCount > 0 in if statement, which will cause status bouncing
// between healthy and unhealthy and result in OOM (See PR#2228).
2019-10-10 12:17:03 +08:00
2019-10-10 08:45:06 +08:00
if ( self - > optimalTeamCount > 0 ) {
2019-02-13 06:02:21 +08:00
TraceEvent ( SevWarn , " UndesiredStorageServer " , self - > distributorId )
2018-11-22 03:18:26 +08:00
. detail ( " Server " , server - > id )
. detail ( " OptimalTeamCount " , self - > optimalTeamCount )
. detail ( " Fitness " , server - > lastKnownClass . machineClassFitness ( ProcessClass : : Storage ) ) ;
2017-05-26 04:48:44 +08:00
status . isUndesired = true ;
}
2021-03-11 02:06:03 +08:00
otherChanges . push_back ( self - > zeroOptimalTeams . onChange ( ) ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
// If this storage server has the wrong key-value store type, then mark it undesired so it will be replaced
// with a server having the correct type
2019-10-03 05:48:35 +08:00
if ( hasWrongDC | | hasInvalidLocality ) {
2019-10-03 05:05:41 +08:00
TraceEvent ( SevWarn , " UndesiredDCOrLocality " , self - > distributorId )
2019-08-13 01:08:12 +08:00
. detail ( " Server " , server - > id )
2019-10-03 05:48:35 +08:00
. detail ( " WrongDC " , hasWrongDC )
. detail ( " InvalidLocality " , hasInvalidLocality ) ;
2019-08-13 01:08:12 +08:00
status . isUndesired = true ;
status . isWrongConfiguration = true ;
}
2019-08-17 07:46:54 +08:00
if ( server - > wrongStoreTypeToRemove . get ( ) ) {
2019-08-13 01:08:12 +08:00
TraceEvent ( SevWarn , " WrongStoreTypeToRemove " , self - > distributorId )
. detail ( " Server " , server - > id )
. detail ( " StoreType " , " ? " ) ;
2017-05-26 04:48:44 +08:00
status . isUndesired = true ;
status . isWrongConfiguration = true ;
}
2021-06-12 06:58:05 +08:00
// An invalid wiggle server should set itself the right status. Otherwise, it cannot be re-included by
// wiggler.
auto invalidWiggleServer =
[ ] ( const AddressExclusion & addr , const DDTeamCollection * tc , const TCServerInfo * server ) {
return server - > lastKnownInterface . locality . processId ( ) ! = tc - > wigglingPid ;
} ;
2017-05-26 04:48:44 +08:00
// If the storage server is in the excluded servers list, it is undesired
NetworkAddress a = server - > lastKnownInterface . address ( ) ;
2021-03-11 02:06:03 +08:00
AddressExclusion worstAddr ( a . ip , a . port ) ;
DDTeamCollection : : Status worstStatus = self - > excludedServers . get ( worstAddr ) ;
2021-06-12 06:58:05 +08:00
if ( worstStatus = = DDTeamCollection : : Status : : WIGGLING & & invalidWiggleServer ( worstAddr , self , server ) ) {
2021-07-12 11:36:10 +08:00
TraceEvent ( SevInfo , " InvalidWiggleServer " , self - > distributorId )
. detail ( " Address " , worstAddr . toString ( ) )
. detail ( " ProcessId " , server - > lastKnownInterface . locality . processId ( ) )
. detail ( " ValidWigglingId " , self - > wigglingPid . present ( ) ) ;
2021-06-12 06:58:05 +08:00
self - > excludedServers . set ( worstAddr , DDTeamCollection : : Status : : NONE ) ;
worstStatus = DDTeamCollection : : Status : : NONE ;
}
2021-03-11 02:06:03 +08:00
otherChanges . push_back ( self - > excludedServers . onChange ( worstAddr ) ) ;
2020-04-11 04:45:16 +08:00
2021-03-11 02:06:03 +08:00
for ( int i = 0 ; i < 3 ; i + + ) {
if ( i > 0 & & ! server - > lastKnownInterface . secondaryAddress ( ) . present ( ) ) {
2020-04-11 04:45:16 +08:00
break ;
}
AddressExclusion testAddr ;
2021-03-11 02:06:03 +08:00
if ( i = = 0 )
testAddr = AddressExclusion ( a . ip ) ;
else if ( i = = 1 )
testAddr = AddressExclusion ( server - > lastKnownInterface . secondaryAddress ( ) . get ( ) . ip ,
server - > lastKnownInterface . secondaryAddress ( ) . get ( ) . port ) ;
else if ( i = = 2 )
testAddr = AddressExclusion ( server - > lastKnownInterface . secondaryAddress ( ) . get ( ) . ip ) ;
2020-04-11 04:45:16 +08:00
DDTeamCollection : : Status testStatus = self - > excludedServers . get ( testAddr ) ;
2021-06-12 06:58:05 +08:00
if ( testStatus = = DDTeamCollection : : Status : : WIGGLING & & invalidWiggleServer ( testAddr , self , server ) ) {
2021-07-12 11:36:10 +08:00
TraceEvent ( SevInfo , " InvalidWiggleServer " , self - > distributorId )
. detail ( " Address " , testAddr . toString ( ) )
. detail ( " ProcessId " , server - > lastKnownInterface . locality . processId ( ) )
. detail ( " ValidWigglingId " , self - > wigglingPid . present ( ) ) ;
2021-06-12 06:58:05 +08:00
self - > excludedServers . set ( testAddr , DDTeamCollection : : Status : : NONE ) ;
testStatus = DDTeamCollection : : Status : : NONE ;
}
2021-03-11 02:06:03 +08:00
if ( testStatus > worstStatus ) {
2020-04-11 04:45:16 +08:00
worstStatus = testStatus ;
worstAddr = testAddr ;
}
2021-03-11 02:06:03 +08:00
otherChanges . push_back ( self - > excludedServers . onChange ( testAddr ) ) ;
2020-04-11 04:45:16 +08:00
}
if ( worstStatus ! = DDTeamCollection : : Status : : NONE ) {
2019-09-25 01:04:56 +08:00
TraceEvent ( SevWarn , " UndesiredStorageServer " , self - > distributorId )
2021-03-11 02:06:03 +08:00
. detail ( " Server " , server - > id )
. detail ( " Excluded " , worstAddr . toString ( ) ) ;
2017-05-26 04:48:44 +08:00
status . isUndesired = true ;
status . isWrongConfiguration = true ;
2021-06-02 14:12:45 +08:00
if ( worstStatus = = DDTeamCollection : : Status : : WIGGLING & & ! isTss ) {
status . isWiggling = true ;
TraceEvent ( " PerpetualWigglingStorageServer " , self - > distributorId )
. detail ( " Server " , server - > id )
2021-06-12 06:58:05 +08:00
. detail ( " ProcessId " , server - > lastKnownInterface . locality . processId ( ) )
2021-06-02 14:12:45 +08:00
. detail ( " Address " , worstAddr . toString ( ) ) ;
} else if ( worstStatus = = DDTeamCollection : : Status : : FAILED & & ! isTss ) {
2019-08-14 04:40:05 +08:00
TraceEvent ( SevWarn , " FailedServerRemoveKeys " , self - > distributorId )
2021-03-11 02:06:03 +08:00
. detail ( " Server " , server - > id )
. detail ( " Excluded " , worstAddr . toString ( ) ) ;
2021-03-06 03:28:15 +08:00
wait ( delay ( 0.0 ) ) ; // Do not throw an error while still inside trackExcludedServers
2021-03-16 01:51:53 +08:00
while ( ! ddEnabledState - > isDDEnabled ( ) ) {
wait ( delay ( 1.0 ) ) ;
}
2021-03-16 01:43:06 +08:00
if ( self - > removeFailedServer . canBeSet ( ) ) {
self - > removeFailedServer . send ( server - > id ) ;
}
throw movekeys_conflict ( ) ;
2019-08-06 02:30:22 +08:00
}
2017-05-26 04:48:44 +08:00
}
2019-07-10 07:09:51 +08:00
failureTracker = storageServerFailureTracker ( self , server , cx , & status , addedVersion ) ;
2021-03-11 02:06:03 +08:00
// We need to recruit new storage servers if the key value store type has changed
2019-10-03 05:48:35 +08:00
if ( hasWrongDC | | hasInvalidLocality | | server - > wrongStoreTypeToRemove . get ( ) ) {
2017-05-26 04:48:44 +08:00
self - > restartRecruiting . trigger ( ) ;
2019-10-03 05:48:35 +08:00
}
2017-05-26 04:48:44 +08:00
2021-03-06 03:28:15 +08:00
if ( lastIsUnhealthy & & ! status . isUnhealthy ( ) & & ! isTss & &
2021-03-11 02:06:03 +08:00
( server - > teams . size ( ) < targetTeamNumPerServer | | self - > lastBuildTeamsFailed ) ) {
2017-05-26 04:48:44 +08:00
self - > doBuildTeams = true ;
2019-06-27 08:56:54 +08:00
self - > restartTeamBuilder . trigger ( ) ; // This does not trigger building teams if there exist healthy teams
2019-02-07 08:42:22 +08:00
}
2018-06-08 05:05:53 +08:00
lastIsUnhealthy = status . isUnhealthy ( ) ;
2017-05-26 04:48:44 +08:00
2019-06-28 09:24:18 +08:00
state bool recordTeamCollectionInfo = false ;
2017-05-26 04:48:44 +08:00
choose {
2021-05-13 02:53:20 +08:00
when ( wait ( failureTracker | | server - > onTSSPairRemoved | | server - > killTss . getFuture ( ) ) ) {
2017-05-26 04:48:44 +08:00
// The server is failed AND all data has been removed from it, so permanently remove it.
2021-03-11 02:06:03 +08:00
TraceEvent ( " StatusMapChange " , self - > distributorId )
. detail ( " ServerID " , server - > id )
. detail ( " Status " , " Removing " ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
if ( server - > updated . canBeSet ( ) ) {
2018-11-13 09:39:40 +08:00
server - > updated . send ( Void ( ) ) ;
}
2017-05-26 04:48:44 +08:00
// Remove server from FF/serverList
2021-05-13 02:53:20 +08:00
wait ( removeStorageServer (
cx , server - > id , server - > lastKnownInterface . tssPairID , self - > lock , ddEnabledState ) ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
TraceEvent ( " StatusMapChange " , self - > distributorId )
. detail ( " ServerID " , server - > id )
. detail ( " Status " , " Removed " ) ;
// Sets removeSignal (alerting dataDistributionTeamCollection to remove the storage server from its
// own data structures)
server - > removed . send ( Void ( ) ) ;
2021-03-06 03:28:15 +08:00
if ( isTss ) {
self - > removedTSS . send ( server - > id ) ;
} else {
self - > removedServers . send ( server - > id ) ;
}
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
2021-03-11 02:06:03 +08:00
when ( std : : pair < StorageServerInterface , ProcessClass > newInterface = wait ( interfaceChanged ) ) {
bool restartRecruiting = newInterface . first . waitFailure . getEndpoint ( ) . getPrimaryAddress ( ) ! =
server - > lastKnownInterface . waitFailure . getEndpoint ( ) . getPrimaryAddress ( ) ;
2018-09-01 08:54:55 +08:00
bool localityChanged = server - > lastKnownInterface . locality ! = newInterface . first . locality ;
2018-12-13 10:29:03 +08:00
bool machineLocalityChanged = server - > lastKnownInterface . locality . zoneId ( ) . get ( ) ! =
newInterface . first . locality . zoneId ( ) . get ( ) ;
2021-06-12 06:58:05 +08:00
bool processIdChanged = server - > lastKnownInterface . locality . processId ( ) . get ( ) ! =
newInterface . first . locality . processId ( ) . get ( ) ;
2019-07-12 13:05:20 +08:00
TraceEvent ( " StorageServerInterfaceChanged " , self - > distributorId )
. detail ( " ServerID " , server - > id )
. detail ( " NewWaitFailureToken " , newInterface . first . waitFailure . getEndpoint ( ) . token )
. detail ( " OldWaitFailureToken " , server - > lastKnownInterface . waitFailure . getEndpoint ( ) . token )
. detail ( " LocalityChanged " , localityChanged )
2021-06-12 06:58:05 +08:00
. detail ( " ProcessIdChanged " , processIdChanged )
2019-07-12 13:05:20 +08:00
. detail ( " MachineLocalityChanged " , machineLocalityChanged ) ;
2018-09-01 08:54:55 +08:00
2017-05-26 04:48:44 +08:00
server - > lastKnownInterface = newInterface . first ;
server - > lastKnownClass = newInterface . second ;
2021-03-06 03:28:15 +08:00
if ( localityChanged & & ! isTss ) {
2018-12-07 03:26:30 +08:00
TEST ( true ) ; // Server locality changed
2018-12-06 14:23:11 +08:00
2018-12-13 10:29:03 +08:00
// The locality change of a server will affect machine teams related to the server if
// the server's machine locality is changed
if ( machineLocalityChanged ) {
// First handle the impact on the machine of the server on the old locality
Reference < TCMachineInfo > machine = server - > machine ;
ASSERT ( machine - > serversOnMachine . size ( ) > = 1 ) ;
if ( machine - > serversOnMachine . size ( ) = = 1 ) {
// When server is the last server on the machine,
// remove the machine and the related machine team
2019-11-23 02:20:13 +08:00
self - > removeMachine ( machine ) ;
2019-02-19 10:40:52 +08:00
server - > machine = Reference < TCMachineInfo > ( ) ;
2018-12-13 10:29:03 +08:00
} else {
// we remove the server from the machine, and
// update locality entry for the machine and the global machineLocalityMap
int serverIndex = - 1 ;
for ( int i = 0 ; i < machine - > serversOnMachine . size ( ) ; + + i ) {
if ( machine - > serversOnMachine [ i ] . getPtr ( ) = = server ) {
2019-07-11 10:23:45 +08:00
// NOTE: now the machine's locality is wrong. Need update it whenever uses it.
2018-12-13 10:29:03 +08:00
serverIndex = i ;
machine - > serversOnMachine [ i ] = machine - > serversOnMachine . back ( ) ;
machine - > serversOnMachine . pop_back ( ) ;
break ; // Invariant: server only appear on the machine once
}
2018-12-06 14:23:11 +08:00
}
2018-12-13 10:29:03 +08:00
ASSERT ( serverIndex ! = - 1 ) ;
// NOTE: we do not update the machine's locality map even when
// its representative server is changed.
2018-12-06 14:23:11 +08:00
}
2018-12-13 10:29:03 +08:00
// Second handle the impact on the destination machine where the server's new locality is;
// If the destination machine is new, create one; otherwise, add server to an existing one
// Update server's machine reference to the destination machine
Reference < TCMachineInfo > destMachine =
self - > checkAndCreateMachine ( self - > server_info [ server - > id ] ) ;
ASSERT ( destMachine . isValid ( ) ) ;
}
2018-12-13 03:44:05 +08:00
2021-06-12 06:58:05 +08:00
// update pid2server_info if the process id has changed
if ( processIdChanged ) {
self - > pid2server_info [ newInterface . first . locality . processId ( ) . get ( ) ] . push_back (
self - > server_info [ server - > id ] ) ;
// delete the old one
auto & old_infos =
self - > pid2server_info [ server - > lastKnownInterface . locality . processId ( ) . get ( ) ] ;
for ( int i = 0 ; i < old_infos . size ( ) ; + + i ) {
if ( old_infos [ i ] . getPtr ( ) = = server ) {
std : : swap ( old_infos [ i - - ] , old_infos . back ( ) ) ;
old_infos . pop_back ( ) ;
}
}
}
2018-12-06 14:23:11 +08:00
// Ensure the server's server team belong to a machine team, and
// Get the newBadTeams due to the locality change
2018-11-08 13:05:31 +08:00
vector < Reference < TCTeamInfo > > newBadTeams ;
2018-12-06 14:23:11 +08:00
for ( auto & serverTeam : server - > teams ) {
2019-02-11 00:58:56 +08:00
if ( ! self - > satisfiesPolicy ( serverTeam - > getServers ( ) ) ) {
2018-12-06 14:23:11 +08:00
newBadTeams . push_back ( serverTeam ) ;
continue ;
2018-11-08 13:05:31 +08:00
}
2018-12-13 10:29:03 +08:00
if ( machineLocalityChanged ) {
Reference < TCMachineTeamInfo > machineTeam = self - > checkAndCreateMachineTeam ( serverTeam ) ;
ASSERT ( machineTeam . isValid ( ) ) ;
serverTeam - > machineTeam = machineTeam ;
}
2018-11-08 13:05:31 +08:00
}
2018-12-06 14:23:11 +08:00
2018-12-07 03:26:30 +08:00
server - > inDesiredDC =
( self - > includedDCs . empty ( ) | |
2021-03-11 02:06:03 +08:00
std : : find ( self - > includedDCs . begin ( ) ,
self - > includedDCs . end ( ) ,
2018-12-07 03:26:30 +08:00
server - > lastKnownInterface . locality . dcId ( ) ) ! = self - > includedDCs . end ( ) ) ;
2018-12-06 14:23:11 +08:00
self - > resetLocalitySet ( ) ;
bool addedNewBadTeam = false ;
2021-03-11 02:06:03 +08:00
for ( auto it : newBadTeams ) {
if ( self - > removeTeam ( it ) ) {
2019-02-11 00:58:56 +08:00
self - > addTeam ( it - > getServers ( ) , true ) ;
2018-11-08 13:05:31 +08:00
addedNewBadTeam = true ;
}
}
2021-03-11 02:06:03 +08:00
if ( addedNewBadTeam & & self - > badTeamRemover . isReady ( ) ) {
2018-12-07 03:26:30 +08:00
TEST ( true ) ; // Server locality change created bad teams
2018-12-13 10:29:03 +08:00
self - > doBuildTeams = true ;
2018-11-08 13:05:31 +08:00
self - > badTeamRemover = removeBadTeams ( self ) ;
self - > addActor . send ( self - > badTeamRemover ) ;
2019-02-13 11:10:51 +08:00
// The team number changes, so we need to update the team number info
2019-07-02 07:37:10 +08:00
// self->traceTeamCollectionInfo();
2019-06-28 09:24:18 +08:00
recordTeamCollectionInfo = true ;
2018-11-08 13:05:31 +08:00
}
2019-07-12 13:05:20 +08:00
// The locality change of the server will invalid the server's old teams,
// so we need to rebuild teams for the server
2019-07-11 02:55:06 +08:00
self - > doBuildTeams = true ;
2018-09-01 08:54:55 +08:00
}
2017-05-26 04:48:44 +08:00
interfaceChanged = server - > onInterfaceChanged ;
2019-08-20 04:47:48 +08:00
// Old failureTracker for the old interface will be actorCancelled since the handler of the old
// actor now points to the new failure monitor actor.
2021-06-12 06:58:05 +08:00
status = ServerStatus (
status . isFailed , status . isUndesired , status . isWiggling , server - > lastKnownInterface . locality ) ;
2017-05-26 04:48:44 +08:00
2019-07-02 07:37:10 +08:00
// self->traceTeamCollectionInfo();
2019-06-28 09:24:18 +08:00
recordTeamCollectionInfo = true ;
2019-08-20 06:54:57 +08:00
// Restart the storeTracker for the new interface. This will cancel the previous
// keyValueStoreTypeTracker
2021-03-06 03:28:15 +08:00
storeTypeTracker = ( isTss ) ? Never ( ) : keyValueStoreTypeTracker ( self , server ) ;
2019-10-03 05:48:35 +08:00
hasWrongDC = ! isCorrectDC ( self , server ) ;
hasInvalidLocality =
! self - > isValidLocality ( self - > configuration . storagePolicy , server - > lastKnownInterface . locality ) ;
2017-05-26 04:48:44 +08:00
self - > restartTeamBuilder . trigger ( ) ;
2019-06-28 09:24:18 +08:00
2021-03-11 02:06:03 +08:00
if ( restartRecruiting )
2017-05-26 04:48:44 +08:00
self - > restartRecruiting . trigger ( ) ;
}
2021-03-11 02:06:03 +08:00
when ( wait ( otherChanges . empty ( ) ? Never ( ) : quorum ( otherChanges , 1 ) ) ) {
2019-02-13 06:02:21 +08:00
TraceEvent ( " SameAddressChangedStatus " , self - > distributorId ) . detail ( " ServerID " , server - > id ) ;
2017-05-26 04:48:44 +08:00
}
2019-08-14 06:44:46 +08:00
when ( wait ( server - > wrongStoreTypeToRemove . onChange ( ) ) ) {
2019-08-23 04:21:01 +08:00
TraceEvent ( " UndesiredStorageServerTriggered " , self - > distributorId )
2019-08-13 01:08:12 +08:00
. detail ( " Server " , server - > id )
. detail ( " StoreType " , server - > storeType )
2019-08-13 08:38:17 +08:00
. detail ( " ConfigStoreType " , self - > configuration . storageServerStoreType )
2019-08-17 07:46:54 +08:00
. detail ( " WrongStoreTypeRemoved " , server - > wrongStoreTypeToRemove . get ( ) ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
when ( wait ( server - > wakeUpTracker . getFuture ( ) ) ) { server - > wakeUpTracker = Promise < Void > ( ) ; }
2019-08-20 06:54:57 +08:00
when ( wait ( storeTypeTracker ) ) { }
2021-03-11 02:06:03 +08:00
when ( wait ( server - > ssVersionTooFarBehind . onChange ( ) ) ) { }
when ( wait ( self - > disableFailingLaggingServers . onChange ( ) ) ) { }
2017-05-26 04:48:44 +08:00
}
2019-06-28 09:24:18 +08:00
2019-06-29 07:01:05 +08:00
if ( recordTeamCollectionInfo ) {
2019-07-02 07:37:10 +08:00
self - > traceTeamCollectionInfo ( ) ;
2017-05-26 04:48:44 +08:00
}
}
2021-03-11 02:06:03 +08:00
} catch ( Error & e ) {
2020-10-17 05:26:40 +08:00
state Error err = e ;
2020-07-17 00:48:10 +08:00
TraceEvent ( " StorageServerTrackerCancelled " , self - > distributorId )
2020-07-17 11:26:07 +08:00
. suppressFor ( 1.0 )
2020-07-17 00:48:10 +08:00
. detail ( " Primary " , self - > primary )
2020-10-17 06:40:01 +08:00
. detail ( " Server " , server - > id )
2020-10-17 07:55:09 +08:00
. error ( e , /*includeCancelled*/ true ) ;
2020-10-17 05:23:27 +08:00
if ( e . code ( ) ! = error_code_actor_cancelled & & errorOut . canBeSet ( ) ) {
errorOut . sendError ( e ) ;
wait ( delay ( 0 ) ) ; // Check for cancellation, since errorOut.sendError(e) could delete self
}
2020-10-17 00:05:03 +08:00
throw err ;
2017-05-26 04:48:44 +08:00
}
}
2021-03-11 02:06:03 +08:00
// Monitor whether or not storage servers are being recruited. If so, then a database cannot be considered quiet
2018-11-10 02:07:55 +08:00
ACTOR Future < Void > monitorStorageServerRecruitment ( DDTeamCollection * self ) {
2017-05-26 04:48:44 +08:00
state bool recruiting = false ;
2021-03-06 03:28:15 +08:00
state bool lastIsTss = false ;
2018-12-14 05:31:37 +08:00
TraceEvent ( " StorageServerRecruitment " , self - > distributorId )
2018-11-22 03:18:26 +08:00
. detail ( " State " , " Idle " )
2020-03-06 10:17:06 +08:00
. trackLatest ( " StorageServerRecruitment_ " + self - > distributorId . toString ( ) ) ;
2017-05-26 04:48:44 +08:00
loop {
2021-03-11 02:06:03 +08:00
if ( ! recruiting ) {
while ( self - > recruitingStream . get ( ) = = 0 ) {
wait ( self - > recruitingStream . onChange ( ) ) ;
2017-05-26 04:48:44 +08:00
}
2018-12-14 05:31:37 +08:00
TraceEvent ( " StorageServerRecruitment " , self - > distributorId )
2021-03-11 02:06:03 +08:00
. detail ( " State " , " Recruiting " )
2021-03-06 03:28:15 +08:00
. detail ( " IsTSS " , self - > isTssRecruiting ? " True " : " False " )
2021-03-11 02:06:03 +08:00
. trackLatest ( " StorageServerRecruitment_ " + self - > distributorId . toString ( ) ) ;
2017-05-26 04:48:44 +08:00
recruiting = true ;
2021-03-06 03:28:15 +08:00
lastIsTss = self - > isTssRecruiting ;
2017-05-26 04:48:44 +08:00
} else {
loop {
choose {
2021-03-06 03:28:15 +08:00
when ( wait ( self - > recruitingStream . onChange ( ) ) ) {
if ( lastIsTss ! = self - > isTssRecruiting ) {
TraceEvent ( " StorageServerRecruitment " , self - > distributorId )
. detail ( " State " , " Recruiting " )
. detail ( " IsTSS " , self - > isTssRecruiting ? " True " : " False " )
. trackLatest ( " StorageServerRecruitment_ " + self - > distributorId . toString ( ) ) ;
lastIsTss = self - > isTssRecruiting ;
}
}
2021-03-11 02:06:03 +08:00
when ( wait ( self - > recruitingStream . get ( ) = = 0
? delay ( SERVER_KNOBS - > RECRUITMENT_IDLE_DELAY , TaskPriority : : DataDistribution )
: Future < Void > ( Never ( ) ) ) ) {
break ;
}
2017-05-26 04:48:44 +08:00
}
}
2018-12-14 05:31:37 +08:00
TraceEvent ( " StorageServerRecruitment " , self - > distributorId )
2021-03-11 02:06:03 +08:00
. detail ( " State " , " Idle " )
. trackLatest ( " StorageServerRecruitment_ " + self - > distributorId . toString ( ) ) ;
2017-05-26 04:48:44 +08:00
recruiting = false ;
}
}
}
2019-09-15 02:21:51 +08:00
ACTOR Future < Void > checkAndRemoveInvalidLocalityAddr ( DDTeamCollection * self ) {
2019-09-18 04:03:57 +08:00
state double start = now ( ) ;
2019-09-25 04:35:38 +08:00
state bool hasCorrectedLocality = false ;
2019-09-15 02:21:51 +08:00
loop {
try {
2019-09-28 10:39:53 +08:00
wait ( delay ( SERVER_KNOBS - > DD_CHECK_INVALID_LOCALITY_DELAY , TaskPriority : : DataDistribution ) ) ;
2019-09-15 02:21:51 +08:00
// Because worker's processId can be changed when its locality is changed, we cannot watch on the old
// processId; This actor is inactive most time, so iterating all workers incurs little performance overhead.
2019-09-19 02:06:03 +08:00
state vector < ProcessData > workers = wait ( getWorkers ( self - > cx ) ) ;
2019-09-18 04:03:57 +08:00
state std : : set < AddressExclusion > existingAddrs ;
for ( int i = 0 ; i < workers . size ( ) ; i + + ) {
const ProcessData & workerData = workers [ i ] ;
2019-09-15 02:21:51 +08:00
AddressExclusion addr ( workerData . address . ip , workerData . address . port ) ;
existingAddrs . insert ( addr ) ;
if ( self - > invalidLocalityAddr . count ( addr ) & &
self - > isValidLocality ( self - > configuration . storagePolicy , workerData . locality ) ) {
// The locality info on the addr has been corrected
self - > invalidLocalityAddr . erase ( addr ) ;
2019-09-25 04:35:38 +08:00
hasCorrectedLocality = true ;
2019-09-19 02:06:03 +08:00
TraceEvent ( " InvalidLocalityCorrected " ) . detail ( " Addr " , addr . toString ( ) ) ;
2019-09-15 02:21:51 +08:00
}
}
2019-09-18 04:03:57 +08:00
wait ( yield ( TaskPriority : : DataDistribution ) ) ;
2019-09-15 02:21:51 +08:00
// In case system operator permanently excludes workers on the address with invalid locality
for ( auto addr = self - > invalidLocalityAddr . begin ( ) ; addr ! = self - > invalidLocalityAddr . end ( ) ; ) {
if ( ! existingAddrs . count ( * addr ) ) {
// The address no longer has a worker
2019-09-18 06:28:30 +08:00
addr = self - > invalidLocalityAddr . erase ( addr ) ;
2019-09-25 04:35:38 +08:00
hasCorrectedLocality = true ;
2019-09-19 05:45:18 +08:00
TraceEvent ( " InvalidLocalityNoLongerExists " ) . detail ( " Addr " , addr - > toString ( ) ) ;
2019-09-15 02:21:51 +08:00
} else {
+ + addr ;
}
}
2017-05-26 04:48:44 +08:00
2019-09-25 04:35:38 +08:00
if ( hasCorrectedLocality ) {
// Recruit on address who locality has been corrected
self - > restartRecruiting . trigger ( ) ;
hasCorrectedLocality = false ;
}
2017-05-26 04:48:44 +08:00
2019-09-15 02:21:51 +08:00
if ( self - > invalidLocalityAddr . empty ( ) ) {
break ;
}
2017-05-26 04:48:44 +08:00
2019-09-18 04:03:57 +08:00
if ( now ( ) - start > 300 ) { // Report warning if invalid locality is not corrected within 300 seconds
// The incorrect locality info has not been properly corrected in a reasonable time
TraceEvent ( SevWarn , " PersistentInvalidLocality " ) . detail ( " Addresses " , self - > invalidLocalityAddr . size ( ) ) ;
2019-09-19 02:06:03 +08:00
start = now ( ) ;
2019-09-15 02:21:51 +08:00
}
} catch ( Error & e ) {
2019-09-19 02:06:03 +08:00
TraceEvent ( " CheckAndRemoveInvalidLocalityAddrRetry " , self - > distributorId ) . detail ( " Error " , e . what ( ) ) ;
2019-09-15 02:21:51 +08:00
}
2018-10-03 08:31:07 +08:00
}
2017-05-26 04:48:44 +08:00
2019-09-15 02:21:51 +08:00
return Void ( ) ;
}
2019-08-16 05:08:41 +08:00
int numExistingSSOnAddr ( DDTeamCollection * self , const AddressExclusion & addr ) {
int numExistingSS = 0 ;
2021-03-06 03:28:15 +08:00
for ( auto & server : self - > server_and_tss_info ) {
2020-04-11 04:45:16 +08:00
const NetworkAddress & netAddr = server . second - > lastKnownInterface . stableAddress ( ) ;
2019-08-16 05:08:41 +08:00
AddressExclusion usedAddr ( netAddr . ip , netAddr . port ) ;
if ( usedAddr = = addr ) {
+ + numExistingSS ;
}
2018-10-03 08:31:07 +08:00
}
2017-05-26 04:48:44 +08:00
2019-08-16 05:08:41 +08:00
return numExistingSS ;
}
2017-05-26 04:48:44 +08:00
2021-03-06 03:28:15 +08:00
// All state that represents an ongoing tss pair recruitment
2021-05-13 02:53:20 +08:00
struct TSSPairState : ReferenceCounted < TSSPairState > , NonCopyable {
2021-03-06 03:28:15 +08:00
Promise < Optional < std : : pair < UID , Version > > >
ssPairInfo ; // if set, for ss to pass its id to tss pair once it is successfully recruited
Promise < bool > tssPairDone ; // if set, for tss to pass ss that it was successfully recruited
2021-05-29 02:15:52 +08:00
Promise < Void > complete ;
2021-05-13 02:53:20 +08:00
2021-03-06 03:28:15 +08:00
Optional < Key > dcId ; // dc
2021-05-13 02:53:20 +08:00
Optional < Key > dataHallId ; // data hall
2021-03-06 03:28:15 +08:00
bool active ;
2021-05-13 02:53:20 +08:00
TSSPairState ( ) : active ( false ) { }
2021-03-06 03:28:15 +08:00
2021-05-13 02:53:20 +08:00
TSSPairState ( const LocalityData & locality )
2021-07-25 08:29:27 +08:00
: dcId ( locality . dcId ( ) ) , dataHallId ( locality . dataHallId ( ) ) , active ( true ) { }
2021-05-13 02:53:20 +08:00
bool inDataZone ( const LocalityData & locality ) {
return locality . dcId ( ) = = dcId & & locality . dataHallId ( ) = = dataHallId ;
}
2021-03-06 03:28:15 +08:00
void cancel ( ) {
// only cancel if both haven't been set, otherwise one half of pair could think it was successful but the other
// half would think it failed
if ( active & & ssPairInfo . canBeSet ( ) & & tssPairDone . canBeSet ( ) ) {
ssPairInfo . send ( Optional < std : : pair < UID , Version > > ( ) ) ;
// callback of ssPairInfo could have cancelled tssPairDone already, so double check before cancelling
if ( tssPairDone . canBeSet ( ) ) {
tssPairDone . send ( false ) ;
}
2021-05-29 02:15:52 +08:00
if ( complete . canBeSet ( ) ) {
complete . send ( Void ( ) ) ;
}
2021-03-06 03:28:15 +08:00
}
}
bool tssRecruitSuccess ( ) {
if ( active & & tssPairDone . canBeSet ( ) ) {
tssPairDone . send ( true ) ;
return true ;
}
return false ;
}
bool tssRecruitFailed ( ) {
if ( active & & tssPairDone . canBeSet ( ) ) {
tssPairDone . send ( false ) ;
return true ;
}
return false ;
}
bool ssRecruitSuccess ( std : : pair < UID , Version > ssInfo ) {
if ( active & & ssPairInfo . canBeSet ( ) ) {
ssPairInfo . send ( Optional < std : : pair < UID , Version > > ( ssInfo ) ) ;
return true ;
}
return false ;
}
bool ssRecruitFailed ( ) {
if ( active & & ssPairInfo . canBeSet ( ) ) {
ssPairInfo . send ( Optional < std : : pair < UID , Version > > ( ) ) ;
return true ;
}
return false ;
}
2021-05-29 02:15:52 +08:00
bool markComplete ( ) {
if ( active & & complete . canBeSet ( ) ) {
complete . send ( Void ( ) ) ;
return true ;
}
return false ;
}
2021-03-06 03:28:15 +08:00
Future < Optional < std : : pair < UID , Version > > > waitOnSS ( ) { return ssPairInfo . getFuture ( ) ; }
Future < bool > waitOnTSS ( ) { return tssPairDone . getFuture ( ) ; }
2021-05-29 02:15:52 +08:00
Future < Void > waitComplete ( ) { return complete . getFuture ( ) ; }
2021-03-06 03:28:15 +08:00
} ;
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > initializeStorage ( DDTeamCollection * self ,
RecruitStorageReply candidateWorker ,
2021-03-06 03:28:15 +08:00
const DDEnabledState * ddEnabledState ,
bool recruitTss ,
2021-05-13 02:53:20 +08:00
Reference < TSSPairState > tssState ) {
2019-08-16 05:08:41 +08:00
// SOMEDAY: Cluster controller waits for availability, retry quickly if a server's Locality changes
2019-08-17 07:46:54 +08:00
self - > recruitingStream . set ( self - > recruitingStream . get ( ) + 1 ) ;
2017-05-26 04:48:44 +08:00
2020-04-11 04:45:16 +08:00
const NetworkAddress & netAddr = candidateWorker . worker . stableAddress ( ) ;
2019-08-17 07:46:54 +08:00
AddressExclusion workerAddr ( netAddr . ip , netAddr . port ) ;
if ( numExistingSSOnAddr ( self , workerAddr ) < = 2 & &
2020-04-11 04:45:16 +08:00
self - > recruitingLocalities . find ( candidateWorker . worker . stableAddress ( ) ) = = self - > recruitingLocalities . end ( ) ) {
2019-08-16 05:08:41 +08:00
// Only allow at most 2 storage servers on an address, because
2019-08-17 06:04:11 +08:00
// too many storage server on the same address (i.e., process) can cause OOM.
// Ask the candidateWorker to initialize a SS only if the worker does not have a pending request
2019-08-16 05:08:41 +08:00
state UID interfaceId = deterministicRandom ( ) - > randomUniqueID ( ) ;
2021-03-06 03:28:15 +08:00
state InitializeStorageRequest isr ;
isr . storeType =
recruitTss ? self - > configuration . testingStorageServerStoreType : self - > configuration . storageServerStoreType ;
2019-08-16 05:08:41 +08:00
isr . seedTag = invalidTag ;
isr . reqId = deterministicRandom ( ) - > randomUniqueID ( ) ;
isr . interfaceId = interfaceId ;
2021-03-06 03:28:15 +08:00
self - > recruitingIds . insert ( interfaceId ) ;
self - > recruitingLocalities . insert ( candidateWorker . worker . stableAddress ( ) ) ;
// if tss, wait for pair ss to finish and add its id to isr. If pair fails, don't recruit tss
state bool doRecruit = true ;
if ( recruitTss ) {
TraceEvent ( " TSS_Recruit " , self - > distributorId )
. detail ( " TSSID " , interfaceId )
. detail ( " Stage " , " TSSWaitingPair " )
. detail ( " Addr " , candidateWorker . worker . address ( ) )
. detail ( " Locality " , candidateWorker . worker . locality . toString ( ) ) ;
Optional < std : : pair < UID , Version > > ssPairInfoResult = wait ( tssState - > waitOnSS ( ) ) ;
if ( ssPairInfoResult . present ( ) ) {
2021-05-13 02:53:20 +08:00
isr . tssPairIDAndVersion = ssPairInfoResult . get ( ) ;
2021-03-06 03:28:15 +08:00
TraceEvent ( " TSS_Recruit " , self - > distributorId )
2021-05-13 02:53:20 +08:00
. detail ( " SSID " , ssPairInfoResult . get ( ) . first )
2021-03-06 03:28:15 +08:00
. detail ( " TSSID " , interfaceId )
. detail ( " Stage " , " TSSWaitingPair " )
. detail ( " Addr " , candidateWorker . worker . address ( ) )
2021-05-13 02:53:20 +08:00
. detail ( " Version " , ssPairInfoResult . get ( ) . second )
2021-03-06 03:28:15 +08:00
. detail ( " Locality " , candidateWorker . worker . locality . toString ( ) ) ;
} else {
doRecruit = false ;
2021-05-13 02:53:20 +08:00
TraceEvent ( SevWarnAlways , " TSS_RecruitError " , self - > distributorId )
2021-03-06 03:28:15 +08:00
. detail ( " TSSID " , interfaceId )
. detail ( " Reason " , " SS recruitment failed for some reason " )
. detail ( " Addr " , candidateWorker . worker . address ( ) )
. detail ( " Locality " , candidateWorker . worker . locality . toString ( ) ) ;
}
}
2019-08-16 05:08:41 +08:00
TraceEvent ( " DDRecruiting " )
2019-08-17 07:46:54 +08:00
. detail ( " Primary " , self - > primary )
. detail ( " State " , " Sending request to worker " )
. detail ( " WorkerID " , candidateWorker . worker . id ( ) )
. detail ( " WorkerLocality " , candidateWorker . worker . locality . toString ( ) )
. detail ( " Interf " , interfaceId )
. detail ( " Addr " , candidateWorker . worker . address ( ) )
2021-03-06 03:28:15 +08:00
. detail ( " TSS " , recruitTss ? " true " : " false " )
2019-08-17 07:46:54 +08:00
. detail ( " RecruitingStream " , self - > recruitingStream . get ( ) ) ;
2019-08-16 05:08:41 +08:00
2021-03-06 03:28:15 +08:00
Future < ErrorOr < InitializeStorageReply > > fRecruit =
doRecruit ? candidateWorker . worker . storage . tryGetReply ( isr , TaskPriority : : DataDistribution )
: Future < ErrorOr < InitializeStorageReply > > ( ErrorOr < InitializeStorageReply > ( recruitment_failed ( ) ) ) ;
state ErrorOr < InitializeStorageReply > newServer = wait ( fRecruit ) ;
if ( doRecruit & & newServer . isError ( ) ) {
2019-08-16 05:08:41 +08:00
TraceEvent ( SevWarn , " DDRecruitmentError " ) . error ( newServer . getError ( ) ) ;
2019-08-17 07:46:54 +08:00
if ( ! newServer . isError ( error_code_recruitment_failed ) & &
! newServer . isError ( error_code_request_maybe_delivered ) )
2019-08-16 05:08:41 +08:00
throw newServer . getError ( ) ;
2019-08-17 07:46:54 +08:00
wait ( delay ( SERVER_KNOBS - > STORAGE_RECRUITMENT_DELAY , TaskPriority : : DataDistribution ) ) ;
2019-08-16 05:08:41 +08:00
}
2021-03-06 03:28:15 +08:00
if ( ! recruitTss & & newServer . present ( ) & &
tssState - > ssRecruitSuccess ( std : : pair ( interfaceId , newServer . get ( ) . addedVersion ) ) ) {
2021-05-13 02:53:20 +08:00
// SS has a tss pair. send it this id, but try to wait for add server until tss is recruited
2021-03-06 03:28:15 +08:00
TraceEvent ( " TSS_Recruit " , self - > distributorId )
. detail ( " SSID " , interfaceId )
. detail ( " Stage " , " SSSignaling " )
. detail ( " Addr " , candidateWorker . worker . address ( ) )
. detail ( " Locality " , candidateWorker . worker . locality . toString ( ) ) ;
2021-05-13 02:53:20 +08:00
// wait for timeout, but eventually move on if no TSS pair recruited
2021-03-06 03:28:15 +08:00
Optional < bool > tssSuccessful = wait ( timeout ( tssState - > waitOnTSS ( ) , SERVER_KNOBS - > TSS_RECRUITMENT_TIMEOUT ) ) ;
if ( tssSuccessful . present ( ) & & tssSuccessful . get ( ) ) {
TraceEvent ( " TSS_Recruit " , self - > distributorId )
. detail ( " SSID " , interfaceId )
. detail ( " Stage " , " SSGotPair " )
. detail ( " Addr " , candidateWorker . worker . address ( ) )
. detail ( " Locality " , candidateWorker . worker . locality . toString ( ) ) ;
} else {
TraceEvent ( SevWarn , " TSS_RecruitError " , self - > distributorId )
. detail ( " SSID " , interfaceId )
. detail ( " Reason " ,
tssSuccessful . present ( ) ? " TSS recruitment failed for some reason "
: " TSS recruitment timed out " )
. detail ( " Addr " , candidateWorker . worker . address ( ) )
. detail ( " Locality " , candidateWorker . worker . locality . toString ( ) ) ;
}
}
2019-08-16 05:08:41 +08:00
self - > recruitingIds . erase ( interfaceId ) ;
2020-04-11 04:45:16 +08:00
self - > recruitingLocalities . erase ( candidateWorker . worker . stableAddress ( ) ) ;
2019-08-16 05:08:41 +08:00
TraceEvent ( " DDRecruiting " )
2019-08-17 07:46:54 +08:00
. detail ( " Primary " , self - > primary )
. detail ( " State " , " Finished request " )
. detail ( " WorkerID " , candidateWorker . worker . id ( ) )
. detail ( " WorkerLocality " , candidateWorker . worker . locality . toString ( ) )
. detail ( " Interf " , interfaceId )
. detail ( " Addr " , candidateWorker . worker . address ( ) )
. detail ( " RecruitingStream " , self - > recruitingStream . get ( ) ) ;
if ( newServer . present ( ) ) {
2021-03-06 03:28:15 +08:00
UID id = newServer . get ( ) . interf . id ( ) ;
if ( ! self - > server_and_tss_info . count ( id ) ) {
if ( ! recruitTss | | tssState - > tssRecruitSuccess ( ) ) {
self - > addServer ( newServer . get ( ) . interf ,
candidateWorker . processClass ,
self - > serverTrackerErrorOut ,
newServer . get ( ) . addedVersion ,
ddEnabledState ) ;
2021-05-29 02:15:52 +08:00
// signal all done after adding tss to tracking info
tssState - > markComplete ( ) ;
2021-03-06 03:28:15 +08:00
}
} else {
TraceEvent ( SevWarn , " DDRecruitmentError " )
. detail ( " Reason " , " Server ID already recruited " )
. detail ( " ServerID " , id ) ;
}
if ( ! recruitTss ) {
self - > doBuildTeams = true ;
}
2019-08-16 05:08:41 +08:00
}
2017-05-26 04:48:44 +08:00
}
2021-05-13 02:53:20 +08:00
// SS and/or TSS recruitment failed at this point, update tssState
2021-03-06 03:28:15 +08:00
if ( recruitTss & & tssState - > tssRecruitFailed ( ) ) {
2021-05-29 02:15:52 +08:00
tssState - > markComplete ( ) ;
2021-03-06 03:28:15 +08:00
TEST ( true ) ; // TSS recruitment failed for some reason
}
if ( ! recruitTss & & tssState - > ssRecruitFailed ( ) ) {
TEST ( true ) ; // SS with pair TSS recruitment failed for some reason
}
2019-08-17 07:46:54 +08:00
self - > recruitingStream . set ( self - > recruitingStream . get ( ) - 1 ) ;
2017-05-26 04:48:44 +08:00
self - > restartRecruiting . trigger ( ) ;
return Void ( ) ;
}
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > storageRecruiter ( DDTeamCollection * self ,
2021-08-13 07:24:03 +08:00
Reference < IAsyncListener < RequestStream < RecruitStorageRequest > > > recruitStorage ,
2020-09-28 06:26:50 +08:00
const DDEnabledState * ddEnabledState ) {
2017-05-26 04:48:44 +08:00
state Future < RecruitStorageReply > fCandidateWorker ;
state RecruitStorageRequest lastRequest ;
2019-08-13 01:08:12 +08:00
state bool hasHealthyTeam ;
state std : : map < AddressExclusion , int > numSSPerAddr ;
2021-03-06 03:28:15 +08:00
// tss-specific recruitment state
2021-05-29 02:15:52 +08:00
state int32_t targetTSSInDC = 0 ;
state int32_t tssToRecruit = 0 ;
state int inProgressTSSCount = 0 ;
state PromiseStream < Future < Void > > addTSSInProgress ;
state Future < Void > inProgressTSS =
actorCollection ( addTSSInProgress . getFuture ( ) , & inProgressTSSCount , nullptr , nullptr , nullptr ) ;
2021-05-13 02:53:20 +08:00
state Reference < TSSPairState > tssState = makeReference < TSSPairState > ( ) ;
2021-05-29 02:15:52 +08:00
state Future < Void > checkTss = self - > initialFailureReactionDelay ;
state bool pendingTSSCheck = false ;
2021-03-06 03:28:15 +08:00
TraceEvent ( SevDebug , " TSS_RecruitUpdated " , self - > distributorId ) . detail ( " Count " , tssToRecruit ) ;
2017-05-26 04:48:44 +08:00
loop {
try {
2021-05-29 02:15:52 +08:00
// Divide TSS evenly in each DC if there are multiple
// TODO would it be better to put all of them in primary DC?
targetTSSInDC = self - > configuration . desiredTSSCount ;
if ( self - > configuration . usableRegions > 1 ) {
targetTSSInDC / = self - > configuration . usableRegions ;
if ( self - > primary ) {
// put extras in primary DC if it's uneven
targetTSSInDC + = ( self - > configuration . desiredTSSCount % self - > configuration . usableRegions ) ;
}
}
int newTssToRecruit = targetTSSInDC - self - > tss_info_by_pair . size ( ) - inProgressTSSCount ;
if ( newTssToRecruit ! = tssToRecruit ) {
TraceEvent ( " TSS_RecruitUpdated " , self - > distributorId ) . detail ( " Count " , newTssToRecruit ) ;
tssToRecruit = newTssToRecruit ;
// if we need to get rid of some TSS processes, signal to either cancel recruitment or kill existing TSS
// processes
if ( ! pendingTSSCheck & & ( tssToRecruit < 0 | | self - > zeroHealthyTeams - > get ( ) ) & &
( self - > isTssRecruiting | | ( self - > zeroHealthyTeams - > get ( ) & & self - > tss_info_by_pair . size ( ) > 0 ) ) ) {
checkTss = self - > initialFailureReactionDelay ;
}
}
2019-08-13 01:08:12 +08:00
numSSPerAddr . clear ( ) ;
hasHealthyTeam = ( self - > healthyTeamCount ! = 0 ) ;
2017-05-26 04:48:44 +08:00
RecruitStorageRequest rsr ;
std : : set < AddressExclusion > exclusions ;
2021-03-06 03:28:15 +08:00
for ( auto s = self - > server_and_tss_info . begin ( ) ; s ! = self - > server_and_tss_info . end ( ) ; + + s ) {
2021-03-11 02:06:03 +08:00
auto serverStatus = self - > server_status . get ( s - > second - > lastKnownInterface . id ( ) ) ;
if ( serverStatus . excludeOnRecruit ( ) ) {
2019-08-13 01:08:12 +08:00
TraceEvent ( SevDebug , " DDRecruitExcl1 " )
. detail ( " Primary " , self - > primary )
. detail ( " Excluding " , s - > second - > lastKnownInterface . address ( ) ) ;
2020-04-11 04:45:16 +08:00
auto addr = s - > second - > lastKnownInterface . stableAddress ( ) ;
2019-08-13 01:08:12 +08:00
AddressExclusion addrExcl ( addr . ip , addr . port ) ;
exclusions . insert ( addrExcl ) ;
numSSPerAddr [ addrExcl ] + + ; // increase from 0
2017-05-26 04:48:44 +08:00
}
}
2021-03-11 02:06:03 +08:00
for ( auto addr : self - > recruitingLocalities ) {
exclusions . insert ( AddressExclusion ( addr . ip , addr . port ) ) ;
2017-05-26 04:48:44 +08:00
}
auto excl = self - > excludedServers . getKeys ( ) ;
2021-03-11 02:06:03 +08:00
for ( const auto & s : excl ) {
2019-09-25 01:04:56 +08:00
if ( self - > excludedServers . get ( s ) ! = DDTeamCollection : : Status : : NONE ) {
2019-08-13 01:08:12 +08:00
TraceEvent ( SevDebug , " DDRecruitExcl2 " )
. detail ( " Primary " , self - > primary )
. detail ( " Excluding " , s . toString ( ) ) ;
2021-03-11 02:06:03 +08:00
exclusions . insert ( s ) ;
2017-05-26 04:48:44 +08:00
}
2019-09-15 02:21:51 +08:00
}
// Exclude workers that have invalid locality
for ( auto & addr : self - > invalidLocalityAddr ) {
TraceEvent ( SevDebug , " DDRecruitExclInvalidAddr " ) . detail ( " Excluding " , addr . toString ( ) ) ;
exclusions . insert ( addr ) ;
}
2021-03-06 03:28:15 +08:00
rsr . criticalRecruitment = ! hasHealthyTeam ;
2021-03-11 02:06:03 +08:00
for ( auto it : exclusions ) {
2017-05-26 04:48:44 +08:00
rsr . excludeAddresses . push_back ( it ) ;
}
2017-10-11 01:36:33 +08:00
rsr . includeDCs = self - > includedDCs ;
2017-09-08 06:32:08 +08:00
2019-08-13 01:08:12 +08:00
TraceEvent ( rsr . criticalRecruitment ? SevWarn : SevInfo , " DDRecruiting " )
. detail ( " Primary " , self - > primary )
. detail ( " State " , " Sending request to CC " )
. detail ( " Exclusions " , rsr . excludeAddresses . size ( ) )
. detail ( " Critical " , rsr . criticalRecruitment )
. detail ( " IncludedDCsSize " , rsr . includeDCs . size ( ) ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
if ( rsr . criticalRecruitment ) {
2019-08-13 01:08:12 +08:00
TraceEvent ( SevWarn , " DDRecruitingEmergency " , self - > distributorId ) . detail ( " Primary " , self - > primary ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
if ( ! fCandidateWorker . isValid ( ) | | fCandidateWorker . isReady ( ) | |
rsr . excludeAddresses ! = lastRequest . excludeAddresses | |
rsr . criticalRecruitment ! = lastRequest . criticalRecruitment ) {
2017-05-26 04:48:44 +08:00
lastRequest = rsr ;
2021-08-13 07:24:03 +08:00
fCandidateWorker =
brokenPromiseToNever ( recruitStorage - > get ( ) . getReply ( rsr , TaskPriority : : DataDistribution ) ) ;
2017-05-26 04:48:44 +08:00
}
choose {
2021-03-11 02:06:03 +08:00
when ( RecruitStorageReply candidateWorker = wait ( fCandidateWorker ) ) {
2020-04-11 04:45:16 +08:00
AddressExclusion candidateSSAddr ( candidateWorker . worker . stableAddress ( ) . ip ,
candidateWorker . worker . stableAddress ( ) . port ) ;
2019-08-13 01:08:12 +08:00
int numExistingSS = numSSPerAddr [ candidateSSAddr ] ;
if ( numExistingSS > = 2 ) {
2019-08-20 04:47:48 +08:00
TraceEvent ( SevWarnAlways , " StorageRecruiterTooManySSOnSameAddr " , self - > distributorId )
2019-08-13 01:08:12 +08:00
. detail ( " Primary " , self - > primary )
. detail ( " Addr " , candidateSSAddr . toString ( ) )
. detail ( " NumExistingSS " , numExistingSS ) ;
}
2021-03-06 03:28:15 +08:00
if ( hasHealthyTeam & & ! tssState - > active & & tssToRecruit > 0 ) {
TraceEvent ( " TSS_Recruit " , self - > distributorId )
. detail ( " Stage " , " HoldTSS " )
. detail ( " Addr " , candidateSSAddr . toString ( ) )
. detail ( " Locality " , candidateWorker . worker . locality . toString ( ) ) ;
TEST ( true ) ; // Starting TSS recruitment
self - > isTssRecruiting = true ;
2021-05-13 02:53:20 +08:00
tssState = makeReference < TSSPairState > ( candidateWorker . worker . locality ) ;
2021-03-06 03:28:15 +08:00
2021-05-29 02:15:52 +08:00
addTSSInProgress . send ( tssState - > waitComplete ( ) ) ;
2021-03-06 03:28:15 +08:00
self - > addActor . send ( initializeStorage ( self , candidateWorker , ddEnabledState , true , tssState ) ) ;
2021-05-29 02:15:52 +08:00
checkTss = self - > initialFailureReactionDelay ;
2021-03-06 03:28:15 +08:00
} else {
2021-05-13 02:53:20 +08:00
if ( tssState - > active & & tssState - > inDataZone ( candidateWorker . worker . locality ) ) {
TEST ( true ) ; // TSS recruits pair in same dc/datahall
2021-03-06 03:28:15 +08:00
self - > isTssRecruiting = false ;
TraceEvent ( " TSS_Recruit " , self - > distributorId )
. detail ( " Stage " , " PairSS " )
. detail ( " Addr " , candidateSSAddr . toString ( ) )
. detail ( " Locality " , candidateWorker . worker . locality . toString ( ) ) ;
self - > addActor . send (
initializeStorage ( self , candidateWorker , ddEnabledState , false , tssState ) ) ;
// successfully started recruitment of pair, reset tss recruitment state
2021-05-13 02:53:20 +08:00
tssState = makeReference < TSSPairState > ( ) ;
2021-03-06 03:28:15 +08:00
} else {
2021-05-13 02:53:20 +08:00
TEST ( tssState - > active ) ; // TSS recruitment skipped potential pair because it's in a
// different dc/datahall
2021-03-06 03:28:15 +08:00
self - > addActor . send ( initializeStorage (
2021-05-13 02:53:20 +08:00
self , candidateWorker , ddEnabledState , false , makeReference < TSSPairState > ( ) ) ) ;
2021-03-06 03:28:15 +08:00
}
}
2017-05-26 04:48:44 +08:00
}
2021-08-13 07:24:03 +08:00
when ( wait ( recruitStorage - > onChange ( ) ) ) { fCandidateWorker = Future < RecruitStorageReply > ( ) ; }
2021-03-06 03:28:15 +08:00
when ( wait ( self - > zeroHealthyTeams - > onChange ( ) ) ) {
2021-05-29 02:15:52 +08:00
if ( ! pendingTSSCheck & & self - > zeroHealthyTeams - > get ( ) & &
( self - > isTssRecruiting | | self - > tss_info_by_pair . size ( ) > 0 ) ) {
checkTss = self - > initialFailureReactionDelay ;
}
}
when ( wait ( checkTss ) ) {
bool cancelTss = self - > isTssRecruiting & & ( tssToRecruit < 0 | | self - > zeroHealthyTeams - > get ( ) ) ;
// Can't kill more tss' than we have. Kill 1 if zero healthy teams, otherwise kill enough to get
// back to the desired amount
int tssToKill = std : : min ( ( int ) self - > tss_info_by_pair . size ( ) ,
std : : max ( - tssToRecruit , self - > zeroHealthyTeams - > get ( ) ? 1 : 0 ) ) ;
if ( cancelTss ) {
TEST ( tssToRecruit < 0 ) ; // tss recruitment cancelled due to too many TSS
TEST ( self - > zeroHealthyTeams - > get ( ) ) ; // tss recruitment cancelled due zero healthy teams
2021-03-06 03:28:15 +08:00
TraceEvent ( SevWarn , " TSS_RecruitCancelled " , self - > distributorId )
2021-05-29 02:15:52 +08:00
. detail ( " Reason " , tssToRecruit < = 0 ? " TooMany " : " ZeroHealthyTeams " ) ;
2021-03-06 03:28:15 +08:00
tssState - > cancel ( ) ;
2021-05-13 02:53:20 +08:00
tssState = makeReference < TSSPairState > ( ) ;
2021-03-06 03:28:15 +08:00
self - > isTssRecruiting = false ;
2021-05-13 02:53:20 +08:00
2021-05-29 02:15:52 +08:00
pendingTSSCheck = true ;
checkTss = delay ( SERVER_KNOBS - > TSS_DD_CHECK_INTERVAL ) ;
} else if ( tssToKill > 0 ) {
auto itr = self - > tss_info_by_pair . begin ( ) ;
for ( int i = 0 ; i < tssToKill ; i + + , itr + + ) {
UID tssId = itr - > second - > id ;
StorageServerInterface tssi = itr - > second - > lastKnownInterface ;
2021-05-13 02:53:20 +08:00
2021-05-29 02:15:52 +08:00
if ( self - > shouldHandleServer ( tssi ) & & self - > server_and_tss_info . count ( tssId ) ) {
Promise < Void > killPromise = itr - > second - > killTss ;
2021-05-13 02:53:20 +08:00
if ( killPromise . canBeSet ( ) ) {
2021-05-29 02:15:52 +08:00
TEST ( tssToRecruit < 0 ) ; // Killing TSS due to too many TSS
TEST ( self - > zeroHealthyTeams - > get ( ) ) ; // Killing TSS due zero healthy teams
TraceEvent ( SevWarn , " TSS_DDKill " , self - > distributorId )
. detail ( " TSSID " , tssId )
. detail ( " Reason " ,
self - > zeroHealthyTeams - > get ( ) ? " ZeroHealthyTeams " : " TooMany " ) ;
2021-05-13 02:53:20 +08:00
killPromise . send ( Void ( ) ) ;
}
}
}
// If we're killing a TSS because of zero healthy teams, wait a bit to give the replacing SS a
// change to join teams and stuff before killing another TSS
2021-05-29 02:15:52 +08:00
pendingTSSCheck = true ;
checkTss = delay ( SERVER_KNOBS - > TSS_DD_CHECK_INTERVAL ) ;
} else if ( self - > isTssRecruiting ) {
// check again later in case we need to cancel recruitment
pendingTSSCheck = true ;
checkTss = delay ( SERVER_KNOBS - > TSS_DD_CHECK_INTERVAL ) ;
// FIXME: better way to do this than timer?
2021-05-13 02:53:20 +08:00
} else {
2021-05-29 02:15:52 +08:00
pendingTSSCheck = false ;
checkTss = Never ( ) ;
2021-03-06 03:28:15 +08:00
}
2017-05-26 04:48:44 +08:00
}
2019-08-20 04:47:48 +08:00
when ( wait ( self - > restartRecruiting . onTrigger ( ) ) ) { }
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
wait ( delay ( FLOW_KNOBS - > PREVENT_FAST_SPIN_DELAY , TaskPriority : : DataDistribution ) ) ;
} catch ( Error & e ) {
if ( e . code ( ) ! = error_code_timed_out ) {
2017-05-26 04:48:44 +08:00
throw ;
}
2021-03-11 02:06:03 +08:00
TEST ( true ) ; // Storage recruitment timed out
2017-05-26 04:48:44 +08:00
}
}
}
2018-04-09 12:24:05 +08:00
ACTOR Future < Void > updateReplicasKey ( DDTeamCollection * self , Optional < Key > dcId ) {
2018-11-13 09:39:40 +08:00
std : : vector < Future < Void > > serverUpdates ;
2021-03-11 02:06:03 +08:00
for ( auto & it : self - > server_info ) {
2018-11-13 09:39:40 +08:00
serverUpdates . push_back ( it . second - > updated . getFuture ( ) ) ;
}
2018-11-13 12:26:58 +08:00
wait ( self - > initialFailureReactionDelay & & waitForAll ( serverUpdates ) ) ;
2019-02-21 10:20:10 +08:00
wait ( waitUntilHealthy ( self ) ) ;
2019-08-13 01:08:12 +08:00
TraceEvent ( " DDUpdatingReplicas " , self - > distributorId )
. detail ( " Primary " , self - > primary )
. detail ( " DcId " , dcId )
. detail ( " Replicas " , self - > configuration . storageTeamSize ) ;
2018-04-09 12:24:05 +08:00
state Transaction tr ( self - > cx ) ;
loop {
try {
2021-03-11 02:06:03 +08:00
Optional < Value > val = wait ( tr . get ( datacenterReplicasKeyFor ( dcId ) ) ) ;
2018-11-09 07:44:03 +08:00
state int oldReplicas = val . present ( ) ? decodeDatacenterReplicasValue ( val . get ( ) ) : 0 ;
2021-03-11 02:06:03 +08:00
if ( oldReplicas = = self - > configuration . storageTeamSize ) {
2019-08-13 01:08:12 +08:00
TraceEvent ( " DDUpdatedAlready " , self - > distributorId )
. detail ( " Primary " , self - > primary )
. detail ( " DcId " , dcId )
. detail ( " Replicas " , self - > configuration . storageTeamSize ) ;
2018-11-09 07:44:03 +08:00
return Void ( ) ;
}
2021-03-11 02:06:03 +08:00
if ( oldReplicas < self - > configuration . storageTeamSize ) {
2018-11-09 07:44:03 +08:00
tr . set ( rebootWhenDurableKey , StringRef ( ) ) ;
}
2018-04-09 12:24:05 +08:00
tr . set ( datacenterReplicasKeyFor ( dcId ) , datacenterReplicasValue ( self - > configuration . storageTeamSize ) ) ;
2021-03-11 02:06:03 +08:00
wait ( tr . commit ( ) ) ;
2019-08-13 01:08:12 +08:00
TraceEvent ( " DDUpdatedReplicas " , self - > distributorId )
. detail ( " Primary " , self - > primary )
. detail ( " DcId " , dcId )
. detail ( " Replicas " , self - > configuration . storageTeamSize )
. detail ( " OldReplicas " , oldReplicas ) ;
2018-04-09 12:24:05 +08:00
return Void ( ) ;
2021-03-11 02:06:03 +08:00
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
2018-04-09 12:24:05 +08:00
}
}
}
2018-06-08 07:14:40 +08:00
ACTOR Future < Void > serverGetTeamRequests ( TeamCollectionInterface tci , DDTeamCollection * self ) {
loop {
GetTeamRequest req = waitNext ( tci . getTeam . getFuture ( ) ) ;
2021-03-11 02:06:03 +08:00
self - > addActor . send ( self - > getTeam ( self , req ) ) ;
2018-06-08 07:14:40 +08:00
}
}
2021-07-12 13:04:38 +08:00
ACTOR Future < Void > remoteRecovered ( Reference < AsyncVar < ServerDBInfo > const > db ) {
2021-07-27 10:55:10 +08:00
TraceEvent ( " DDTrackerStarting " ) . log ( ) ;
2021-03-11 02:06:03 +08:00
while ( db - > get ( ) . recoveryState < RecoveryState : : ALL_LOGS_RECRUITED ) {
2019-01-23 13:19:31 +08:00
TraceEvent ( " DDTrackerStarting " ) . detail ( " RecoveryState " , ( int ) db - > get ( ) . recoveryState ) ;
2021-03-11 02:06:03 +08:00
wait ( db - > onChange ( ) ) ;
2019-01-23 13:19:31 +08:00
}
return Void ( ) ;
}
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > monitorHealthyTeams ( DDTeamCollection * self ) {
2019-08-13 01:08:12 +08:00
TraceEvent ( " DDMonitorHealthyTeamsStart " ) . detail ( " ZeroHealthyTeams " , self - > zeroHealthyTeams - > get ( ) ) ;
2019-02-13 06:02:21 +08:00
loop choose {
2021-03-11 02:06:03 +08:00
when ( wait ( self - > zeroHealthyTeams - > get ( )
? delay ( SERVER_KNOBS - > DD_ZERO_HEALTHY_TEAM_DELAY , TaskPriority : : DataDistribution )
: Never ( ) ) ) {
2019-02-13 07:50:44 +08:00
self - > doBuildTeams = true ;
2021-03-11 02:06:03 +08:00
wait ( DDTeamCollection : : checkBuildTeams ( self ) ) ;
2019-02-13 06:02:21 +08:00
}
2021-03-11 02:06:03 +08:00
when ( wait ( self - > zeroHealthyTeams - > onChange ( ) ) ) { }
2019-02-13 06:02:21 +08:00
}
}
2017-05-26 04:48:44 +08:00
// Keep track of servers and teams -- serves requests for getRandomTeam
2021-08-13 07:24:03 +08:00
ACTOR Future < Void > dataDistributionTeamCollection (
Reference < DDTeamCollection > teamCollection ,
Reference < InitialDataDistribution > initData ,
TeamCollectionInterface tci ,
Reference < IAsyncListener < RequestStream < RecruitStorageRequest > > > recruitStorage ,
DDEnabledState const * ddEnabledState ) {
2018-11-10 02:07:55 +08:00
state DDTeamCollection * self = teamCollection . getPtr ( ) ;
2017-05-26 04:48:44 +08:00
state Future < Void > loggingTrigger = Void ( ) ;
state PromiseStream < Void > serverRemoved ;
2021-03-11 02:06:03 +08:00
state Future < Void > error = actorCollection ( self - > addActor . getFuture ( ) ) ;
2018-11-10 02:07:55 +08:00
2017-05-26 04:48:44 +08:00
try {
2020-09-28 06:26:50 +08:00
wait ( DDTeamCollection : : init ( self , initData , ddEnabledState ) ) ;
2017-05-26 04:48:44 +08:00
initData = Reference < InitialDataDistribution > ( ) ;
2018-11-10 02:07:55 +08:00
self - > addActor . send ( serverGetTeamRequests ( tci , self ) ) ;
2018-06-08 07:14:40 +08:00
2018-12-14 05:31:37 +08:00
TraceEvent ( " DDTeamCollectionBegin " , self - > distributorId ) . detail ( " Primary " , self - > primary ) ;
2021-03-11 02:06:03 +08:00
wait ( self - > readyToStart | | error ) ;
2018-12-14 05:31:37 +08:00
TraceEvent ( " DDTeamCollectionReadyToStart " , self - > distributorId ) . detail ( " Primary " , self - > primary ) ;
2018-11-08 13:05:31 +08:00
2019-07-20 07:22:15 +08:00
// removeBadTeams() does not always run. We may need to restart the actor when needed.
// So we need the badTeamRemover variable to check if the actor is ready.
2021-03-11 02:06:03 +08:00
if ( self - > badTeamRemover . isReady ( ) ) {
2018-11-10 02:07:55 +08:00
self - > badTeamRemover = removeBadTeams ( self ) ;
self - > addActor . send ( self - > badTeamRemover ) ;
2018-11-08 13:05:31 +08:00
}
2019-08-13 08:38:17 +08:00
self - > addActor . send ( machineTeamRemover ( self ) ) ;
self - > addActor . send ( serverTeamRemover ( self ) ) ;
2019-08-17 01:48:50 +08:00
if ( self - > wrongStoreTypeRemover . isReady ( ) ) {
self - > wrongStoreTypeRemover = removeWrongStoreType ( self ) ;
self - > addActor . send ( self - > wrongStoreTypeRemover ) ;
2019-02-19 02:02:40 +08:00
}
2019-08-13 01:08:12 +08:00
2019-02-19 02:02:40 +08:00
self - > traceTeamCollectionInfo ( ) ;
2021-03-11 02:06:03 +08:00
if ( self - > includedDCs . size ( ) ) {
// start this actor before any potential recruitments can happen
2018-11-10 02:07:55 +08:00
self - > addActor . send ( updateReplicasKey ( self , self - > includedDCs [ 0 ] ) ) ;
2018-04-09 12:24:05 +08:00
}
2018-11-13 09:39:40 +08:00
2019-07-20 07:22:15 +08:00
// The following actors (e.g. storageRecruiter) do not need to be assigned to a variable because
// they are always running.
2021-08-13 07:24:03 +08:00
self - > addActor . send ( storageRecruiter ( self , recruitStorage , ddEnabledState ) ) ;
2021-03-11 02:06:03 +08:00
self - > addActor . send ( monitorStorageServerRecruitment ( self ) ) ;
2020-09-28 06:26:50 +08:00
self - > addActor . send ( waitServerListChange ( self , serverRemoved . getFuture ( ) , ddEnabledState ) ) ;
2021-03-11 02:06:03 +08:00
self - > addActor . send ( trackExcludedServers ( self ) ) ;
self - > addActor . send ( monitorHealthyTeams ( self ) ) ;
self - > addActor . send ( waitHealthyZoneChange ( self ) ) ;
2018-11-13 09:39:40 +08:00
2021-06-12 06:58:05 +08:00
if ( self - > primary ) { // the primary dc also handle the satellite dc's perpetual wiggling
2021-08-04 01:15:34 +08:00
self - > addActor . send ( monitorPerpetualStorageWiggle ( self ) ) ;
2021-06-12 06:58:05 +08:00
}
2017-05-26 04:48:44 +08:00
// SOMEDAY: Monitor FF/serverList for (new) servers that aren't in allServers and add or remove them
loop choose {
2021-03-11 02:06:03 +08:00
when ( UID removedServer = waitNext ( self - > removedServers . getFuture ( ) ) ) {
TEST ( true ) ; // Storage server removed from database
2019-11-23 02:20:13 +08:00
self - > removeServer ( removedServer ) ;
2021-03-11 02:06:03 +08:00
serverRemoved . send ( Void ( ) ) ;
2017-05-26 04:48:44 +08:00
2018-11-10 02:07:55 +08:00
self - > restartRecruiting . trigger ( ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-06 03:28:15 +08:00
when ( UID removedTSS = waitNext ( self - > removedTSS . getFuture ( ) ) ) {
TEST ( true ) ; // TSS removed from database
self - > removeTSS ( removedTSS ) ;
serverRemoved . send ( Void ( ) ) ;
self - > restartRecruiting . trigger ( ) ;
}
2021-03-11 02:06:03 +08:00
when ( wait ( self - > zeroHealthyTeams - > onChange ( ) ) ) {
if ( self - > zeroHealthyTeams - > get ( ) ) {
2018-11-10 02:07:55 +08:00
self - > restartRecruiting . trigger ( ) ;
self - > noHealthyTeams ( ) ;
2018-01-31 09:00:51 +08:00
}
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
when ( wait ( loggingTrigger ) ) {
2018-08-10 04:16:09 +08:00
int highestPriority = 0 ;
2021-03-11 02:06:03 +08:00
for ( auto it : self - > priority_teams ) {
if ( it . second > 0 ) {
2018-08-10 04:16:09 +08:00
highestPriority = std : : max ( highestPriority , it . first ) ;
}
}
2018-08-30 05:40:39 +08:00
2018-12-14 05:31:37 +08:00
TraceEvent ( " TotalDataInFlight " , self - > distributorId )
2018-11-22 03:18:26 +08:00
. detail ( " Primary " , self - > primary )
. detail ( " TotalBytes " , self - > getDebugTotalDataInFlight ( ) )
. detail ( " UnhealthyServers " , self - > unhealthyServers )
2019-07-05 23:54:21 +08:00
. detail ( " ServerCount " , self - > server_info . size ( ) )
2018-11-22 03:18:26 +08:00
. detail ( " StorageTeamSize " , self - > configuration . storageTeamSize )
. detail ( " HighestPriority " , highestPriority )
. trackLatest ( self - > primary ? " TotalDataInFlight " : " TotalDataInFlightRemote " ) ;
2021-03-11 02:06:03 +08:00
loggingTrigger = delay ( SERVER_KNOBS - > DATA_DISTRIBUTION_LOGGING_INTERVAL , TaskPriority : : FlushTrace ) ;
2017-05-26 04:48:44 +08:00
}
2021-03-11 02:06:03 +08:00
when ( wait ( self - > serverTrackerErrorOut . getFuture ( ) ) ) { } // Propagate errors from storageServerTracker
when ( wait ( error ) ) { }
2017-05-26 04:48:44 +08:00
}
} catch ( Error & e ) {
if ( e . code ( ) ! = error_code_movekeys_conflict )
2018-12-14 05:31:37 +08:00
TraceEvent ( SevError , " DataDistributionTeamCollectionError " , self - > distributorId ) . error ( e ) ;
2017-05-26 04:48:44 +08:00
throw e ;
}
}
2020-09-28 06:26:50 +08:00
ACTOR Future < Void > waitForDataDistributionEnabled ( Database cx , const DDEnabledState * ddEnabledState ) {
2017-05-26 04:48:44 +08:00
state Transaction tr ( cx ) ;
loop {
2019-06-25 17:47:35 +08:00
wait ( delay ( SERVER_KNOBS - > DD_ENABLED_CHECK_DELAY , TaskPriority : : DataDistribution ) ) ;
2017-05-26 04:48:44 +08:00
try {
2021-03-11 02:06:03 +08:00
Optional < Value > mode = wait ( tr . get ( dataDistributionModeKey ) ) ;
2020-09-28 06:26:50 +08:00
if ( ! mode . present ( ) & & ddEnabledState - > isDDEnabled ( ) ) {
2021-07-27 10:55:10 +08:00
TraceEvent ( " WaitForDDEnabledSucceeded " ) . log ( ) ;
2019-07-24 07:16:31 +08:00
return Void ( ) ;
}
2017-05-26 04:48:44 +08:00
if ( mode . present ( ) ) {
2021-03-11 02:06:03 +08:00
BinaryReader rd ( mode . get ( ) , Unversioned ( ) ) ;
2017-05-26 04:48:44 +08:00
int m ;
rd > > m ;
2019-07-24 07:16:31 +08:00
TraceEvent ( SevDebug , " WaitForDDEnabled " )
2020-09-28 06:26:50 +08:00
. detail ( " Mode " , m )
. detail ( " IsDDEnabled " , ddEnabledState - > isDDEnabled ( ) ) ;
if ( m & & ddEnabledState - > isDDEnabled ( ) ) {
2021-07-27 10:55:10 +08:00
TraceEvent ( " WaitForDDEnabledSucceeded " ) . log ( ) ;
2019-07-24 07:16:31 +08:00
return Void ( ) ;
}
2017-05-26 04:48:44 +08:00
}
tr . reset ( ) ;
} catch ( Error & e ) {
2021-03-11 02:06:03 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
}
2020-09-28 06:26:50 +08:00
ACTOR Future < bool > isDataDistributionEnabled ( Database cx , const DDEnabledState * ddEnabledState ) {
2017-05-26 04:48:44 +08:00
state Transaction tr ( cx ) ;
loop {
try {
2021-03-11 02:06:03 +08:00
Optional < Value > mode = wait ( tr . get ( dataDistributionModeKey ) ) ;
if ( ! mode . present ( ) & & ddEnabledState - > isDDEnabled ( ) )
return true ;
2017-05-26 04:48:44 +08:00
if ( mode . present ( ) ) {
2021-03-11 02:06:03 +08:00
BinaryReader rd ( mode . get ( ) , Unversioned ( ) ) ;
2017-05-26 04:48:44 +08:00
int m ;
rd > > m ;
2020-09-28 06:26:50 +08:00
if ( m & & ddEnabledState - > isDDEnabled ( ) ) {
2019-07-24 07:16:31 +08:00
TraceEvent ( SevDebug , " IsDDEnabledSucceeded " )
2020-09-28 06:26:50 +08:00
. detail ( " Mode " , m )
. detail ( " IsDDEnabled " , ddEnabledState - > isDDEnabled ( ) ) ;
2019-07-24 07:16:31 +08:00
return true ;
}
2017-05-26 04:48:44 +08:00
}
2019-02-18 10:55:52 +08:00
// SOMEDAY: Write a wrapper in MoveKeys.actor.h
2021-03-11 02:06:03 +08:00
Optional < Value > readVal = wait ( tr . get ( moveKeysLockOwnerKey ) ) ;
UID currentOwner =
readVal . present ( ) ? BinaryReader : : fromStringRef < UID > ( readVal . get ( ) , Unversioned ( ) ) : UID ( ) ;
2020-09-28 06:26:50 +08:00
if ( ddEnabledState - > isDDEnabled ( ) & & ( currentOwner ! = dataDistributionModeLock ) ) {
2019-07-24 07:16:31 +08:00
TraceEvent ( SevDebug , " IsDDEnabledSucceeded " )
2020-09-28 06:26:50 +08:00
. detail ( " CurrentOwner " , currentOwner )
. detail ( " DDModeLock " , dataDistributionModeLock )
. detail ( " IsDDEnabled " , ddEnabledState - > isDDEnabled ( ) ) ;
2017-05-26 04:48:44 +08:00
return true ;
2019-07-24 07:16:31 +08:00
}
TraceEvent ( SevDebug , " IsDDEnabledFailed " )
2020-09-28 06:26:50 +08:00
. detail ( " CurrentOwner " , currentOwner )
. detail ( " DDModeLock " , dataDistributionModeLock )
. detail ( " IsDDEnabled " , ddEnabledState - > isDDEnabled ( ) ) ;
2017-05-26 04:48:44 +08:00
return false ;
} catch ( Error & e ) {
2021-03-11 02:06:03 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
}
2021-03-11 02:06:03 +08:00
// Ensures that the serverKeys key space is properly coalesced
// This method is only used for testing and is not implemented in a manner that is safe for large databases
2017-05-26 04:48:44 +08:00
ACTOR Future < Void > debugCheckCoalescing ( Database cx ) {
state Transaction tr ( cx ) ;
loop {
try {
2021-05-04 04:14:16 +08:00
state RangeResult serverList = wait ( tr . getRange ( serverListKeys , CLIENT_KNOBS - > TOO_MANY ) ) ;
2021-03-11 02:06:03 +08:00
ASSERT ( ! serverList . more & & serverList . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
2017-05-26 04:48:44 +08:00
state int i ;
2021-03-11 02:06:03 +08:00
for ( i = 0 ; i < serverList . size ( ) ; i + + ) {
2017-05-26 04:48:44 +08:00
state UID id = decodeServerListValue ( serverList [ i ] . value ) . id ( ) ;
2021-05-04 04:14:16 +08:00
RangeResult ranges = wait ( krmGetRanges ( & tr , serverKeysPrefixFor ( id ) , allKeys ) ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( ranges . end ( ) [ - 1 ] . key = = allKeys . end ) ;
2021-03-11 02:06:03 +08:00
for ( int j = 0 ; j < ranges . size ( ) - 2 ; j + + )
if ( ranges [ j ] . value = = ranges [ j + 1 ] . value )
TraceEvent ( SevError , " UncoalescedValues " , id )
. detail ( " Key1 " , ranges [ j ] . key )
. detail ( " Key2 " , ranges [ j + 1 ] . key )
. detail ( " Value " , ranges [ j ] . value ) ;
2017-05-26 04:48:44 +08:00
}
2021-07-27 10:55:10 +08:00
TraceEvent ( " DoneCheckingCoalescing " ) . log ( ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
2021-03-11 02:06:03 +08:00
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
}
static std : : set < int > const & normalDDQueueErrors ( ) {
static std : : set < int > s ;
if ( s . empty ( ) ) {
2021-03-11 02:06:03 +08:00
s . insert ( error_code_movekeys_conflict ) ;
s . insert ( error_code_broken_promise ) ;
2017-05-26 04:48:44 +08:00
}
return s ;
}
2020-09-28 06:26:50 +08:00
ACTOR Future < Void > pollMoveKeysLock ( Database cx , MoveKeysLock lock , const DDEnabledState * ddEnabledState ) {
2017-05-26 04:48:44 +08:00
loop {
2018-08-11 04:57:10 +08:00
wait ( delay ( SERVER_KNOBS - > MOVEKEYS_LOCK_POLLING_DELAY ) ) ;
2017-05-26 04:48:44 +08:00
state Transaction tr ( cx ) ;
loop {
try {
2020-09-28 06:26:50 +08:00
wait ( checkMoveKeysLockReadOnly ( & tr , lock , ddEnabledState ) ) ;
2017-05-26 04:48:44 +08:00
break ;
2021-03-11 02:06:03 +08:00
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
}
}
2019-02-19 06:57:21 +08:00
struct DataDistributorData : NonCopyable , ReferenceCounted < DataDistributorData > {
2021-07-12 13:04:38 +08:00
Reference < AsyncVar < ServerDBInfo > const > dbInfo ;
2019-02-19 06:57:21 +08:00
UID ddId ;
PromiseStream < Future < Void > > addActor ;
2019-09-04 07:27:14 +08:00
DDTeamCollection * teamCollection ;
2019-02-19 06:57:21 +08:00
2021-07-12 13:04:38 +08:00
DataDistributorData ( Reference < AsyncVar < ServerDBInfo > const > const & db , UID id )
2019-09-04 07:27:14 +08:00
: dbInfo ( db ) , ddId ( id ) , teamCollection ( nullptr ) { }
2019-02-19 06:57:21 +08:00
} ;
2021-07-12 13:04:38 +08:00
ACTOR Future < Void > monitorBatchLimitedTime ( Reference < AsyncVar < ServerDBInfo > const > db , double * lastLimited ) {
2019-03-13 02:34:16 +08:00
loop {
2021-03-11 02:06:03 +08:00
wait ( delay ( SERVER_KNOBS - > METRIC_UPDATE_RATE ) ) ;
2019-03-13 02:34:16 +08:00
2020-10-20 13:51:56 +08:00
state Reference < GrvProxyInfo > grvProxies ( new GrvProxyInfo ( db - > get ( ) . client . grvProxies , false ) ) ;
2019-03-13 02:34:16 +08:00
choose {
2021-03-11 02:06:03 +08:00
when ( wait ( db - > onChange ( ) ) ) { }
when ( GetHealthMetricsReply reply =
wait ( grvProxies - > size ( ) ? basicLoadBalance ( grvProxies ,
& GrvProxyInterface : : getHealthMetrics ,
GetHealthMetricsRequest ( false ) )
: Never ( ) ) ) {
2019-03-13 02:34:16 +08:00
if ( reply . healthMetrics . batchLimited ) {
* lastLimited = now ( ) ;
}
}
}
}
}
2021-03-17 06:44:49 +08:00
// Runs the data distribution algorithm for FDB, including the DD Queue, DD tracker, and DD team collection
2020-09-28 06:26:50 +08:00
ACTOR Future < Void > dataDistribution ( Reference < DataDistributorData > self ,
PromiseStream < GetMetricsListRequest > getShardMetricsList ,
const DDEnabledState * ddEnabledState ) {
2019-03-13 02:34:16 +08:00
state double lastLimited = 0 ;
2021-03-11 02:06:03 +08:00
self - > addActor . send ( monitorBatchLimitedTime ( self - > dbInfo , & lastLimited ) ) ;
2019-03-13 02:34:16 +08:00
2021-07-17 15:11:40 +08:00
state Database cx = openDBOnServer ( self - > dbInfo , TaskPriority : : DataDistributionLaunch , LockAware : : True ) ;
2017-05-26 04:48:44 +08:00
cx - > locationCacheSize = SERVER_KNOBS - > DD_LOCATION_CACHE_SIZE ;
2021-03-11 02:06:03 +08:00
// cx->setOption( FDBDatabaseOptions::LOCATION_CACHE_SIZE, StringRef((uint8_t*)
// &SERVER_KNOBS->DD_LOCATION_CACHE_SIZE, 8) ); ASSERT( cx->locationCacheSize ==
// SERVER_KNOBS->DD_LOCATION_CACHE_SIZE
// );
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
// wait(debugCheckCoalescing(cx));
2019-02-20 08:04:52 +08:00
state std : : vector < Optional < Key > > primaryDcId ;
state std : : vector < Optional < Key > > remoteDcIds ;
state DatabaseConfiguration configuration ;
2019-02-18 10:46:59 +08:00
state Reference < InitialDataDistribution > initData ;
state MoveKeysLock lock ;
2020-05-11 14:14:19 +08:00
state Reference < DDTeamCollection > primaryTeamCollection ;
state Reference < DDTeamCollection > remoteTeamCollection ;
2020-11-16 15:21:23 +08:00
state bool trackerCancelled ;
2017-05-26 04:48:44 +08:00
loop {
2020-11-16 15:21:23 +08:00
trackerCancelled = false ;
2021-01-26 08:09:32 +08:00
// Stored outside of data distribution tracker to avoid slow tasks
// when tracker is cancelled
state KeyRangeMap < ShardTrackedData > shards ;
2021-03-16 01:43:06 +08:00
state Promise < UID > removeFailedServer ;
2017-05-26 04:48:44 +08:00
try {
loop {
2021-07-27 10:55:10 +08:00
TraceEvent ( " DDInitTakingMoveKeysLock " , self - > ddId ) . log ( ) ;
2021-03-11 02:06:03 +08:00
MoveKeysLock lock_ = wait ( takeMoveKeysLock ( cx , self - > ddId ) ) ;
2019-02-18 10:46:59 +08:00
lock = lock_ ;
2021-07-27 10:55:10 +08:00
TraceEvent ( " DDInitTookMoveKeysLock " , self - > ddId ) . log ( ) ;
2019-02-20 08:04:52 +08:00
2021-03-11 02:06:03 +08:00
DatabaseConfiguration configuration_ = wait ( getDatabaseConfiguration ( cx ) ) ;
2019-02-23 06:38:13 +08:00
configuration = configuration_ ;
2019-02-20 08:04:52 +08:00
primaryDcId . clear ( ) ;
remoteDcIds . clear ( ) ;
const std : : vector < RegionInfo > & regions = configuration . regions ;
2021-03-11 02:06:03 +08:00
if ( configuration . regions . size ( ) > 0 ) {
primaryDcId . push_back ( regions [ 0 ] . dcId ) ;
2019-02-20 08:04:52 +08:00
}
2021-03-11 02:06:03 +08:00
if ( configuration . regions . size ( ) > 1 ) {
remoteDcIds . push_back ( regions [ 1 ] . dcId ) ;
2019-02-20 08:04:52 +08:00
}
2019-02-23 08:36:07 +08:00
TraceEvent ( " DDInitGotConfiguration " , self - > ddId ) . detail ( " Conf " , configuration . toString ( ) ) ;
2019-02-20 08:04:52 +08:00
state Transaction tr ( cx ) ;
loop {
try {
2021-03-11 02:06:03 +08:00
tr . setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
tr . setOption ( FDBTransactionOptions : : PRIORITY_SYSTEM_IMMEDIATE ) ;
2019-02-20 08:04:52 +08:00
2021-05-04 04:14:16 +08:00
RangeResult replicaKeys = wait ( tr . getRange ( datacenterReplicasKeys , CLIENT_KNOBS - > TOO_MANY ) ) ;
2019-02-20 08:04:52 +08:00
2021-03-11 02:06:03 +08:00
for ( auto & kv : replicaKeys ) {
2019-02-20 08:04:52 +08:00
auto dcId = decodeDatacenterReplicasKey ( kv . key ) ;
auto replicas = decodeDatacenterReplicasValue ( kv . value ) ;
2021-03-11 02:06:03 +08:00
if ( ( primaryDcId . size ( ) & & primaryDcId [ 0 ] = = dcId ) | |
( remoteDcIds . size ( ) & & remoteDcIds [ 0 ] = = dcId & & configuration . usableRegions > 1 ) ) {
if ( replicas > configuration . storageTeamSize ) {
2019-02-20 08:04:52 +08:00
tr . set ( kv . key , datacenterReplicasValue ( configuration . storageTeamSize ) ) ;
}
} else {
tr . clear ( kv . key ) ;
}
}
wait ( tr . commit ( ) ) ;
break ;
2021-03-11 02:06:03 +08:00
} catch ( Error & e ) {
2019-02-20 08:04:52 +08:00
wait ( tr . onError ( e ) ) ;
}
}
2021-07-27 10:55:10 +08:00
TraceEvent ( " DDInitUpdatedReplicaKeys " , self - > ddId ) . log ( ) ;
2020-09-28 06:26:50 +08:00
Reference < InitialDataDistribution > initData_ = wait ( getInitialDataDistribution (
2021-03-11 02:06:03 +08:00
cx ,
self - > ddId ,
lock ,
configuration . usableRegions > 1 ? remoteDcIds : std : : vector < Optional < Key > > ( ) ,
2020-09-28 06:26:50 +08:00
ddEnabledState ) ) ;
2019-02-18 10:46:59 +08:00
initData = initData_ ;
2021-03-11 02:06:03 +08:00
if ( initData - > shards . size ( ) > 1 ) {
2019-02-15 08:24:46 +08:00
TraceEvent ( " DDInitGotInitialDD " , self - > ddId )
2019-03-19 06:03:43 +08:00
. detail ( " B " , initData - > shards . end ( ) [ - 2 ] . key )
. detail ( " E " , initData - > shards . end ( ) [ - 1 ] . key )
2018-11-22 03:18:26 +08:00
. detail ( " Src " , describe ( initData - > shards . end ( ) [ - 2 ] . primarySrc ) )
. detail ( " Dest " , describe ( initData - > shards . end ( ) [ - 2 ] . primaryDest ) )
. trackLatest ( " InitialDD " ) ;
2017-05-26 04:48:44 +08:00
} else {
2021-03-11 02:06:03 +08:00
TraceEvent ( " DDInitGotInitialDD " , self - > ddId )
. detail ( " B " , " " )
. detail ( " E " , " " )
. detail ( " Src " , " [no items] " )
. detail ( " Dest " , " [no items] " )
. trackLatest ( " InitialDD " ) ;
2017-05-26 04:48:44 +08:00
}
2020-09-28 06:26:50 +08:00
if ( initData - > mode & & ddEnabledState - > isDDEnabled ( ) ) {
2019-07-24 07:16:31 +08:00
// mode may be set true by system operator using fdbcli and isDDEnabled() set to true
break ;
}
2021-07-27 10:55:10 +08:00
TraceEvent ( " DataDistributionDisabled " , self - > ddId ) . log ( ) ;
2017-05-26 04:48:44 +08:00
2019-02-15 08:24:46 +08:00
TraceEvent ( " MovingData " , self - > ddId )
2021-03-11 02:06:03 +08:00
. detail ( " InFlight " , 0 )
. detail ( " InQueue " , 0 )
. detail ( " AverageShardSize " , - 1 )
. detail ( " UnhealthyRelocations " , 0 )
. detail ( " HighestPriority " , 0 )
. detail ( " BytesWritten " , 0 )
. detail ( " PriorityRecoverMove " , 0 )
. detail ( " PriorityRebalanceUnderutilizedTeam " , 0 )
. detail ( " PriorityRebalannceOverutilizedTeam " , 0 )
. detail ( " PriorityTeamHealthy " , 0 )
. detail ( " PriorityTeamContainsUndesiredServer " , 0 )
. detail ( " PriorityTeamRedundant " , 0 )
. detail ( " PriorityMergeShard " , 0 )
. detail ( " PriorityTeamUnhealthy " , 0 )
. detail ( " PriorityTeam2Left " , 0 )
. detail ( " PriorityTeam1Left " , 0 )
. detail ( " PriorityTeam0Left " , 0 )
. detail ( " PrioritySplitShard " , 0 )
. trackLatest ( " MovingData " ) ;
TraceEvent ( " TotalDataInFlight " , self - > ddId )
. detail ( " Primary " , true )
. detail ( " TotalBytes " , 0 )
. detail ( " UnhealthyServers " , 0 )
. detail ( " HighestPriority " , 0 )
. trackLatest ( " TotalDataInFlight " ) ;
TraceEvent ( " TotalDataInFlight " , self - > ddId )
. detail ( " Primary " , false )
. detail ( " TotalBytes " , 0 )
. detail ( " UnhealthyServers " , 0 )
. detail ( " HighestPriority " , configuration . usableRegions > 1 ? 0 : - 1 )
. trackLatest ( " TotalDataInFlightRemote " ) ;
2017-05-26 04:48:44 +08:00
2020-09-28 06:26:50 +08:00
wait ( waitForDataDistributionEnabled ( cx , ddEnabledState ) ) ;
2021-07-27 10:55:10 +08:00
TraceEvent ( " DataDistributionEnabled " ) . log ( ) ;
2017-05-26 04:48:44 +08:00
}
// When/If this assertion fails, Evan owes Ben a pat on the back for his foresight
ASSERT ( configuration . storageTeamSize > 0 ) ;
state PromiseStream < RelocateShard > output ;
2018-08-10 03:37:46 +08:00
state PromiseStream < RelocateShard > input ;
2017-05-26 04:48:44 +08:00
state PromiseStream < Promise < int64_t > > getAverageShardBytes ;
2021-05-21 07:31:08 +08:00
state PromiseStream < Promise < int > > getUnhealthyRelocationCount ;
2017-05-26 04:48:44 +08:00
state PromiseStream < GetMetricsRequest > getShardMetrics ;
2021-03-11 02:06:03 +08:00
state Reference < AsyncVar < bool > > processingUnhealthy ( new AsyncVar < bool > ( false ) ) ;
2017-05-26 04:48:44 +08:00
state Promise < Void > readyToStart ;
2021-03-11 02:06:03 +08:00
state Reference < ShardsAffectedByTeamFailure > shardsAffectedByTeamFailure ( new ShardsAffectedByTeamFailure ) ;
2018-08-14 10:46:47 +08:00
state int shard = 0 ;
2019-02-15 08:24:46 +08:00
for ( ; shard < initData - > shards . size ( ) - 1 ; shard + + ) {
2021-03-11 02:06:03 +08:00
KeyRangeRef keys = KeyRangeRef ( initData - > shards [ shard ] . key , initData - > shards [ shard + 1 ] . key ) ;
2018-08-14 10:46:47 +08:00
shardsAffectedByTeamFailure - > defineShard ( keys ) ;
std : : vector < ShardsAffectedByTeamFailure : : Team > teams ;
teams . push_back ( ShardsAffectedByTeamFailure : : Team ( initData - > shards [ shard ] . primarySrc , true ) ) ;
2019-02-15 08:24:46 +08:00
if ( configuration . usableRegions > 1 ) {
2018-08-14 10:46:47 +08:00
teams . push_back ( ShardsAffectedByTeamFailure : : Team ( initData - > shards [ shard ] . remoteSrc , false ) ) ;
}
2021-03-11 02:06:03 +08:00
if ( g_network - > isSimulated ( ) ) {
TraceEvent ( " DDInitShard " )
. detail ( " Keys " , keys )
. detail ( " PrimarySrc " , describe ( initData - > shards [ shard ] . primarySrc ) )
. detail ( " RemoteSrc " , describe ( initData - > shards [ shard ] . remoteSrc ) )
. detail ( " PrimaryDest " , describe ( initData - > shards [ shard ] . primaryDest ) )
. detail ( " RemoteDest " , describe ( initData - > shards [ shard ] . remoteDest ) ) ;
2018-11-12 04:33:31 +08:00
}
2018-08-14 10:46:47 +08:00
shardsAffectedByTeamFailure - > moveShard ( keys , teams ) ;
2019-02-15 08:24:46 +08:00
if ( initData - > shards [ shard ] . hasDest ) {
2020-07-13 09:30:02 +08:00
// This shard is already in flight. Ideally we should use dest in ShardsAffectedByTeamFailure and
// generate a dataDistributionRelocator directly in DataDistributionQueue to track it, but it's
// easier to just (with low priority) schedule it for movement.
2018-09-29 03:15:23 +08:00
bool unhealthy = initData - > shards [ shard ] . primarySrc . size ( ) ! = configuration . storageTeamSize ;
2019-02-15 08:24:46 +08:00
if ( ! unhealthy & & configuration . usableRegions > 1 ) {
2018-09-29 03:15:23 +08:00
unhealthy = initData - > shards [ shard ] . remoteSrc . size ( ) ! = configuration . storageTeamSize ;
}
2021-03-11 02:06:03 +08:00
output . send ( RelocateShard (
keys , unhealthy ? SERVER_KNOBS - > PRIORITY_TEAM_UNHEALTHY : SERVER_KNOBS - > PRIORITY_RECOVER_MOVE ) ) ;
2018-08-14 10:46:47 +08:00
}
2021-03-11 02:06:03 +08:00
wait ( yield ( TaskPriority : : DataDistribution ) ) ;
2018-08-14 10:46:47 +08:00
}
2017-10-11 01:36:33 +08:00
vector < TeamCollectionInterface > tcis ;
2018-11-10 02:07:55 +08:00
2018-02-03 03:46:04 +08:00
Reference < AsyncVar < bool > > anyZeroHealthyTeams ;
vector < Reference < AsyncVar < bool > > > zeroHealthyTeams ;
2017-10-11 01:36:33 +08:00
tcis . push_back ( TeamCollectionInterface ( ) ) ;
2020-11-07 15:50:55 +08:00
zeroHealthyTeams . push_back ( makeReference < AsyncVar < bool > > ( true ) ) ;
2018-02-03 03:46:04 +08:00
int storageTeamSize = configuration . storageTeamSize ;
vector < Future < Void > > actors ;
2018-06-18 10:31:15 +08:00
if ( configuration . usableRegions > 1 ) {
2017-10-11 01:36:33 +08:00
tcis . push_back ( TeamCollectionInterface ( ) ) ;
2021-03-11 02:06:03 +08:00
storageTeamSize = 2 * configuration . storageTeamSize ;
2018-02-03 03:46:04 +08:00
2020-11-07 15:50:55 +08:00
zeroHealthyTeams . push_back ( makeReference < AsyncVar < bool > > ( true ) ) ;
anyZeroHealthyTeams = makeReference < AsyncVar < bool > > ( true ) ;
2021-03-11 02:06:03 +08:00
actors . push_back ( anyTrue ( zeroHealthyTeams , anyZeroHealthyTeams ) ) ;
2018-02-03 03:46:04 +08:00
} else {
anyZeroHealthyTeams = zeroHealthyTeams [ 0 ] ;
2017-10-11 01:36:33 +08:00
}
2021-03-11 02:06:03 +08:00
actors . push_back ( pollMoveKeysLock ( cx , lock , ddEnabledState ) ) ;
actors . push_back ( reportErrorsExcept ( dataDistributionTracker ( initData ,
cx ,
output ,
shardsAffectedByTeamFailure ,
getShardMetrics ,
getShardMetricsList ,
getAverageShardBytes . getFuture ( ) ,
readyToStart ,
anyZeroHealthyTeams ,
self - > ddId ,
& shards ,
& trackerCancelled ) ,
" DDTracker " ,
self - > ddId ,
& normalDDQueueErrors ( ) ) ) ;
actors . push_back ( reportErrorsExcept ( dataDistributionQueue ( cx ,
output ,
input . getFuture ( ) ,
getShardMetrics ,
processingUnhealthy ,
tcis ,
shardsAffectedByTeamFailure ,
lock ,
getAverageShardBytes ,
2021-05-21 07:31:08 +08:00
getUnhealthyRelocationCount ,
2021-03-11 02:06:03 +08:00
self - > ddId ,
storageTeamSize ,
configuration . storageTeamSize ,
& lastLimited ,
ddEnabledState ) ,
" DDQueue " ,
self - > ddId ,
& normalDDQueueErrors ( ) ) ) ;
2018-11-10 02:07:55 +08:00
vector < DDTeamCollection * > teamCollectionsPtrs ;
2020-11-18 06:53:02 +08:00
primaryTeamCollection = makeReference < DDTeamCollection > (
2021-03-11 02:06:03 +08:00
cx ,
self - > ddId ,
lock ,
output ,
shardsAffectedByTeamFailure ,
configuration ,
primaryDcId ,
configuration . usableRegions > 1 ? remoteDcIds : std : : vector < Optional < Key > > ( ) ,
readyToStart . getFuture ( ) ,
zeroHealthyTeams [ 0 ] ,
2021-08-13 05:05:04 +08:00
IsPrimary : : True ,
2021-03-11 02:06:03 +08:00
processingUnhealthy ,
2021-03-16 01:43:06 +08:00
getShardMetrics ,
2021-05-21 07:31:08 +08:00
removeFailedServer ,
getUnhealthyRelocationCount ) ;
2018-11-12 04:33:31 +08:00
teamCollectionsPtrs . push_back ( primaryTeamCollection . getPtr ( ) ) ;
2021-08-13 07:24:03 +08:00
auto recruitStorage = IAsyncListener < RequestStream < RecruitStorageRequest > > : : create (
self - > dbInfo , [ ] ( auto const & info ) { return info . clusterInterface . recruitStorage ; } ) ;
2018-06-18 10:31:15 +08:00
if ( configuration . usableRegions > 1 ) {
2021-03-11 02:06:03 +08:00
remoteTeamCollection =
makeReference < DDTeamCollection > ( cx ,
self - > ddId ,
lock ,
output ,
shardsAffectedByTeamFailure ,
configuration ,
remoteDcIds ,
Optional < std : : vector < Optional < Key > > > ( ) ,
readyToStart . getFuture ( ) & & remoteRecovered ( self - > dbInfo ) ,
zeroHealthyTeams [ 1 ] ,
2021-08-13 05:05:04 +08:00
IsPrimary : : False ,
2021-03-11 02:06:03 +08:00
processingUnhealthy ,
2021-03-16 01:43:06 +08:00
getShardMetrics ,
2021-05-21 07:31:08 +08:00
removeFailedServer ,
getUnhealthyRelocationCount ) ;
2018-11-12 04:33:31 +08:00
teamCollectionsPtrs . push_back ( remoteTeamCollection . getPtr ( ) ) ;
remoteTeamCollection - > teamCollections = teamCollectionsPtrs ;
2020-09-28 06:26:50 +08:00
actors . push_back (
2021-03-11 02:06:03 +08:00
reportErrorsExcept ( dataDistributionTeamCollection (
2021-08-13 07:24:03 +08:00
remoteTeamCollection , initData , tcis [ 1 ] , recruitStorage , ddEnabledState ) ,
2021-03-11 02:06:03 +08:00
" DDTeamCollectionSecondary " ,
self - > ddId ,
& normalDDQueueErrors ( ) ) ) ;
2020-10-24 05:01:53 +08:00
actors . push_back ( printSnapshotTeamsInfo ( remoteTeamCollection ) ) ;
2017-10-11 01:36:33 +08:00
}
2018-11-12 04:33:31 +08:00
primaryTeamCollection - > teamCollections = teamCollectionsPtrs ;
2019-09-04 07:27:14 +08:00
self - > teamCollection = primaryTeamCollection . getPtr ( ) ;
2021-08-13 07:24:03 +08:00
actors . push_back (
reportErrorsExcept ( dataDistributionTeamCollection (
primaryTeamCollection , initData , tcis [ 0 ] , recruitStorage , ddEnabledState ) ,
" DDTeamCollectionPrimary " ,
self - > ddId ,
& normalDDQueueErrors ( ) ) ) ;
2020-10-30 13:15:41 +08:00
2020-10-24 05:01:53 +08:00
actors . push_back ( printSnapshotTeamsInfo ( primaryTeamCollection ) ) ;
2018-08-14 13:29:55 +08:00
actors . push_back ( yieldPromiseStream ( output . getFuture ( ) , input ) ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
wait ( waitForAll ( actors ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
2021-03-11 02:06:03 +08:00
} catch ( Error & e ) {
2020-11-25 04:34:06 +08:00
trackerCancelled = true ;
2017-05-26 04:48:44 +08:00
state Error err = e ;
2020-07-17 01:22:18 +08:00
TraceEvent ( " DataDistributorDestroyTeamCollections " ) . error ( e ) ;
2019-09-04 07:27:14 +08:00
self - > teamCollection = nullptr ;
2020-05-11 14:14:19 +08:00
primaryTeamCollection = Reference < DDTeamCollection > ( ) ;
remoteTeamCollection = Reference < DDTeamCollection > ( ) ;
2020-10-01 03:22:23 +08:00
wait ( shards . clearAsync ( ) ) ;
2021-03-16 06:55:02 +08:00
TraceEvent ( " DataDistributorTeamCollectionsDestroyed " ) . error ( err ) ;
2021-03-16 01:43:06 +08:00
if ( removeFailedServer . getFuture ( ) . isReady ( ) & & ! removeFailedServer . getFuture ( ) . isError ( ) ) {
2021-03-16 06:55:02 +08:00
TraceEvent ( " RemoveFailedServer " , removeFailedServer . getFuture ( ) . get ( ) ) . error ( err ) ;
2021-03-16 01:43:06 +08:00
wait ( removeKeysFromFailedServer ( cx , removeFailedServer . getFuture ( ) . get ( ) , lock , ddEnabledState ) ) ;
2021-03-06 03:28:15 +08:00
Optional < UID > tssPairID ;
wait ( removeStorageServer ( cx , removeFailedServer . getFuture ( ) . get ( ) , tssPairID , lock , ddEnabledState ) ) ;
2021-03-16 07:48:08 +08:00
} else {
2021-03-17 06:44:49 +08:00
if ( err . code ( ) ! = error_code_movekeys_conflict ) {
2021-03-16 07:48:08 +08:00
throw err ;
2021-03-17 06:44:49 +08:00
}
2021-05-29 02:15:52 +08:00
2021-03-16 07:48:08 +08:00
bool ddEnabled = wait ( isDataDistributionEnabled ( cx , ddEnabledState ) ) ;
TraceEvent ( " DataDistributionMoveKeysConflict " ) . detail ( " DataDistributionEnabled " , ddEnabled ) . error ( err ) ;
2021-03-17 06:44:49 +08:00
if ( ddEnabled ) {
2021-03-16 07:48:08 +08:00
throw err ;
2021-03-17 06:44:49 +08:00
}
2021-03-16 01:43:06 +08:00
}
2017-05-26 04:48:44 +08:00
}
}
}
2018-12-14 05:31:37 +08:00
static std : : set < int > const & normalDataDistributorErrors ( ) {
static std : : set < int > s ;
if ( s . empty ( ) ) {
2021-03-11 02:06:03 +08:00
s . insert ( error_code_worker_removed ) ;
s . insert ( error_code_broken_promise ) ;
s . insert ( error_code_actor_cancelled ) ;
s . insert ( error_code_please_reboot ) ;
s . insert ( error_code_movekeys_conflict ) ;
2018-12-14 05:31:37 +08:00
}
return s ;
}
2021-07-12 13:04:38 +08:00
ACTOR Future < Void > ddSnapCreateCore ( DistributorSnapRequest snapReq , Reference < AsyncVar < ServerDBInfo > const > db ) {
2021-07-17 15:11:40 +08:00
state Database cx = openDBOnServer ( db , TaskPriority : : DefaultDelay , LockAware : : True ) ;
2020-10-06 04:51:01 +08:00
state ReadYourWritesTransaction tr ( cx ) ;
2020-09-03 03:17:54 +08:00
loop {
try {
2020-10-06 04:51:01 +08:00
tr . setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
tr . setOption ( FDBTransactionOptions : : LOCK_AWARE ) ;
2020-09-03 03:17:54 +08:00
TraceEvent ( " SnapDataDistributor_WriteFlagAttempt " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2020-10-06 04:51:01 +08:00
tr . set ( writeRecoveryKey , writeRecoveryKeyTrue ) ;
wait ( tr . commit ( ) ) ;
2020-09-03 03:17:54 +08:00
break ;
} catch ( Error & e ) {
TraceEvent ( " SnapDataDistributor_WriteFlagError " ) . error ( e ) ;
2020-10-06 04:51:01 +08:00
wait ( tr . onError ( e ) ) ;
2020-09-03 03:17:54 +08:00
}
}
2019-08-07 07:30:30 +08:00
TraceEvent ( " SnapDataDistributor_SnapReqEnter " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2019-06-20 02:12:24 +08:00
try {
// disable tlog pop on local tlog nodes
2019-07-13 01:56:27 +08:00
state std : : vector < TLogInterface > tlogs = db - > get ( ) . logSystemConfig . allLocalLogs ( false ) ;
std : : vector < Future < Void > > disablePops ;
2021-03-04 11:36:21 +08:00
disablePops . reserve ( tlogs . size ( ) ) ;
for ( const auto & tlog : tlogs ) {
2021-03-11 02:06:03 +08:00
disablePops . push_back ( transformErrors (
throwErrorOr ( tlog . disablePopRequest . tryGetReply ( TLogDisablePopRequest ( snapReq . snapUID ) ) ) ,
snap_disable_tlog_pop_failed ( ) ) ) ;
2019-06-20 02:12:24 +08:00
}
wait ( waitForAll ( disablePops ) ) ;
2019-08-07 07:30:30 +08:00
TraceEvent ( " SnapDataDistributor_AfterDisableTLogPop " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2019-06-20 02:12:24 +08:00
// snap local storage nodes
2021-03-11 02:06:03 +08:00
std : : vector < WorkerInterface > storageWorkers =
wait ( transformErrors ( getStorageWorkers ( cx , db , true /* localOnly */ ) , snap_storage_failed ( ) ) ) ;
2019-08-07 07:30:30 +08:00
TraceEvent ( " SnapDataDistributor_GotStorageWorkers " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2019-07-13 01:56:27 +08:00
std : : vector < Future < Void > > storageSnapReqs ;
2021-03-04 11:36:21 +08:00
storageSnapReqs . reserve ( storageWorkers . size ( ) ) ;
for ( const auto & worker : storageWorkers ) {
2019-06-20 02:12:24 +08:00
storageSnapReqs . push_back (
2021-03-11 02:06:03 +08:00
transformErrors ( throwErrorOr ( worker . workerSnapReq . tryGetReply ( WorkerSnapRequest (
snapReq . snapPayload , snapReq . snapUID , LiteralStringRef ( " storage " ) ) ) ) ,
snap_storage_failed ( ) ) ) ;
2019-06-20 02:12:24 +08:00
}
wait ( waitForAll ( storageSnapReqs ) ) ;
2019-08-07 07:30:30 +08:00
TraceEvent ( " SnapDataDistributor_AfterSnapStorage " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2019-06-20 02:12:24 +08:00
// snap local tlog nodes
2019-07-13 01:56:27 +08:00
std : : vector < Future < Void > > tLogSnapReqs ;
2021-03-04 11:36:21 +08:00
tLogSnapReqs . reserve ( tlogs . size ( ) ) ;
for ( const auto & tlog : tlogs ) {
2019-06-20 02:12:24 +08:00
tLogSnapReqs . push_back (
2021-03-11 02:06:03 +08:00
transformErrors ( throwErrorOr ( tlog . snapRequest . tryGetReply (
TLogSnapRequest ( snapReq . snapPayload , snapReq . snapUID , LiteralStringRef ( " tlog " ) ) ) ) ,
snap_tlog_failed ( ) ) ) ;
2019-06-20 02:12:24 +08:00
}
wait ( waitForAll ( tLogSnapReqs ) ) ;
2019-08-07 07:30:30 +08:00
TraceEvent ( " SnapDataDistributor_AfterTLogStorage " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2019-06-20 02:12:24 +08:00
// enable tlog pop on local tlog nodes
2019-07-13 01:56:27 +08:00
std : : vector < Future < Void > > enablePops ;
2021-03-04 11:36:21 +08:00
enablePops . reserve ( tlogs . size ( ) ) ;
for ( const auto & tlog : tlogs ) {
2019-07-13 01:56:27 +08:00
enablePops . push_back (
2021-03-11 02:06:03 +08:00
transformErrors ( throwErrorOr ( tlog . enablePopRequest . tryGetReply ( TLogEnablePopRequest ( snapReq . snapUID ) ) ) ,
snap_enable_tlog_pop_failed ( ) ) ) ;
2019-06-20 02:12:24 +08:00
}
wait ( waitForAll ( enablePops ) ) ;
2019-08-07 07:30:30 +08:00
TraceEvent ( " SnapDataDistributor_AfterEnableTLogPops " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2019-06-20 02:12:24 +08:00
// snap the coordinators
2019-07-21 16:00:29 +08:00
std : : vector < WorkerInterface > coordWorkers = wait ( getCoordWorkers ( cx , db ) ) ;
2019-08-07 07:30:30 +08:00
TraceEvent ( " SnapDataDistributor_GotCoordWorkers " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2019-07-13 01:56:27 +08:00
std : : vector < Future < Void > > coordSnapReqs ;
2021-03-04 11:36:21 +08:00
coordSnapReqs . reserve ( coordWorkers . size ( ) ) ;
for ( const auto & worker : coordWorkers ) {
2019-06-20 02:12:24 +08:00
coordSnapReqs . push_back (
2021-03-11 02:06:03 +08:00
transformErrors ( throwErrorOr ( worker . workerSnapReq . tryGetReply ( WorkerSnapRequest (
snapReq . snapPayload , snapReq . snapUID , LiteralStringRef ( " coord " ) ) ) ) ,
snap_coord_failed ( ) ) ) ;
2019-06-20 02:12:24 +08:00
}
wait ( waitForAll ( coordSnapReqs ) ) ;
2019-08-07 07:30:30 +08:00
TraceEvent ( " SnapDataDistributor_AfterSnapCoords " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2020-11-03 05:58:08 +08:00
tr . reset ( ) ;
loop {
try {
tr . setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
tr . setOption ( FDBTransactionOptions : : LOCK_AWARE ) ;
TraceEvent ( " SnapDataDistributor_ClearFlagAttempt " )
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
tr . clear ( writeRecoveryKey ) ;
wait ( tr . commit ( ) ) ;
break ;
} catch ( Error & e ) {
TraceEvent ( " SnapDataDistributor_ClearFlagError " ) . error ( e ) ;
wait ( tr . onError ( e ) ) ;
}
}
2019-08-24 02:56:06 +08:00
} catch ( Error & err ) {
state Error e = err ;
2019-08-07 07:30:30 +08:00
TraceEvent ( " SnapDataDistributor_SnapReqExit " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID )
. error ( e , true /*includeCancelled */ ) ;
2021-02-21 10:24:21 +08:00
if ( e . code ( ) = = error_code_snap_storage_failed | | e . code ( ) = = error_code_snap_tlog_failed | |
e . code ( ) = = error_code_operation_cancelled | | e . code ( ) = = error_code_snap_disable_tlog_pop_failed ) {
2019-08-24 02:56:06 +08:00
// enable tlog pop on local tlog nodes
std : : vector < TLogInterface > tlogs = db - > get ( ) . logSystemConfig . allLocalLogs ( false ) ;
try {
std : : vector < Future < Void > > enablePops ;
2021-03-04 11:36:21 +08:00
enablePops . reserve ( tlogs . size ( ) ) ;
for ( const auto & tlog : tlogs ) {
2021-03-11 02:06:03 +08:00
enablePops . push_back ( transformErrors (
throwErrorOr ( tlog . enablePopRequest . tryGetReply ( TLogEnablePopRequest ( snapReq . snapUID ) ) ) ,
snap_enable_tlog_pop_failed ( ) ) ) ;
2019-08-24 02:56:06 +08:00
}
wait ( waitForAll ( enablePops ) ) ;
} catch ( Error & error ) {
2021-07-27 10:55:10 +08:00
TraceEvent ( SevDebug , " IgnoreEnableTLogPopFailure " ) . log ( ) ;
2019-08-24 02:56:06 +08:00
}
}
2019-07-21 16:00:29 +08:00
throw e ;
}
return Void ( ) ;
}
2021-03-11 02:06:03 +08:00
ACTOR Future < Void > ddSnapCreate ( DistributorSnapRequest snapReq ,
2021-07-12 13:04:38 +08:00
Reference < AsyncVar < ServerDBInfo > const > db ,
2020-09-28 06:26:50 +08:00
DDEnabledState * ddEnabledState ) {
2019-07-21 16:00:29 +08:00
state Future < Void > dbInfoChange = db - > onChange ( ) ;
2020-09-28 06:26:50 +08:00
if ( ! ddEnabledState - > setDDEnabled ( false , snapReq . snapUID ) ) {
2021-03-11 02:06:03 +08:00
// disable DD before doing snapCreate, if previous snap req has already disabled DD then this operation fails
// here
2021-07-27 10:55:10 +08:00
TraceEvent ( " SnapDDSetDDEnabledFailedInMemoryCheck " ) . log ( ) ;
2019-07-24 07:16:31 +08:00
snapReq . reply . sendError ( operation_failed ( ) ) ;
return Void ( ) ;
}
2019-07-21 16:00:29 +08:00
double delayTime = g_network - > isSimulated ( ) ? 70.0 : SERVER_KNOBS - > SNAP_CREATE_MAX_TIMEOUT ;
try {
choose {
2021-03-11 02:06:03 +08:00
when ( wait ( dbInfoChange ) ) {
2019-07-21 16:00:29 +08:00
TraceEvent ( " SnapDDCreateDBInfoChanged " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2019-08-24 02:56:06 +08:00
snapReq . reply . sendError ( snap_with_recovery_unsupported ( ) ) ;
2019-07-21 16:00:29 +08:00
}
2021-03-11 02:06:03 +08:00
when ( wait ( ddSnapCreateCore ( snapReq , db ) ) ) {
2019-07-21 16:00:29 +08:00
TraceEvent ( " SnapDDCreateSuccess " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2019-07-21 16:00:29 +08:00
snapReq . reply . send ( Void ( ) ) ;
}
2021-03-11 02:06:03 +08:00
when ( wait ( delay ( delayTime ) ) ) {
2019-07-21 16:00:29 +08:00
TraceEvent ( " SnapDDCreateTimedOut " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID ) ;
2019-07-27 06:01:05 +08:00
snapReq . reply . sendError ( timed_out ( ) ) ;
2019-07-21 16:00:29 +08:00
}
}
} catch ( Error & e ) {
TraceEvent ( " SnapDDCreateError " )
2021-03-11 02:06:03 +08:00
. detail ( " SnapPayload " , snapReq . snapPayload )
. detail ( " SnapUID " , snapReq . snapUID )
. error ( e , true /*includeCancelled */ ) ;
2019-07-21 16:00:29 +08:00
if ( e . code ( ) ! = error_code_operation_cancelled ) {
2019-06-20 02:12:24 +08:00
snapReq . reply . sendError ( e ) ;
2019-07-21 16:00:29 +08:00
} else {
2019-07-24 07:16:31 +08:00
// enable DD should always succeed
2020-09-28 06:26:50 +08:00
bool success = ddEnabledState - > setDDEnabled ( true , snapReq . snapUID ) ;
2019-07-30 06:09:32 +08:00
ASSERT ( success ) ;
2019-07-21 16:00:29 +08:00
throw e ;
2019-06-20 02:12:24 +08:00
}
}
2019-07-24 07:16:31 +08:00
// enable DD should always succeed
2020-09-28 06:26:50 +08:00
bool success = ddEnabledState - > setDDEnabled ( true , snapReq . snapUID ) ;
2019-07-30 06:09:32 +08:00
ASSERT ( success ) ;
2019-06-20 02:12:24 +08:00
return Void ( ) ;
}
2021-06-12 06:58:05 +08:00
// Find size of set intersection of excludeServerIDs and serverIDs on each team and see if the leftover team is valid
bool _exclusionSafetyCheck ( vector < UID > & excludeServerIDs , DDTeamCollection * teamCollection ) {
std : : sort ( excludeServerIDs . begin ( ) , excludeServerIDs . end ( ) ) ;
for ( const auto & team : teamCollection - > teams ) {
vector < UID > teamServerIDs = team - > getServerIDs ( ) ;
std : : sort ( teamServerIDs . begin ( ) , teamServerIDs . end ( ) ) ;
TraceEvent ( SevDebug , " DDExclusionSafetyCheck " , teamCollection - > distributorId )
. detail ( " Excluding " , describe ( excludeServerIDs ) )
. detail ( " Existing " , team - > getDesc ( ) ) ;
// Find size of set intersection of both vectors and see if the leftover team is valid
vector < UID > intersectSet ( teamServerIDs . size ( ) ) ;
auto it = std : : set_intersection ( excludeServerIDs . begin ( ) ,
excludeServerIDs . end ( ) ,
teamServerIDs . begin ( ) ,
teamServerIDs . end ( ) ,
intersectSet . begin ( ) ) ;
intersectSet . resize ( it - intersectSet . begin ( ) ) ;
if ( teamServerIDs . size ( ) - intersectSet . size ( ) < SERVER_KNOBS - > DD_EXCLUDE_MIN_REPLICAS ) {
return false ;
}
}
return true ;
}
2019-09-04 07:27:14 +08:00
ACTOR Future < Void > ddExclusionSafetyCheck ( DistributorExclusionSafetyCheckRequest req ,
2021-03-11 02:06:03 +08:00
Reference < DataDistributorData > self ,
Database cx ) {
2021-07-27 10:55:10 +08:00
TraceEvent ( " DDExclusionSafetyCheckBegin " , self - > ddId ) . log ( ) ;
2019-09-05 03:42:27 +08:00
vector < StorageServerInterface > ssis = wait ( getStorageServers ( cx ) ) ;
2019-09-19 04:40:18 +08:00
DistributorExclusionSafetyCheckReply reply ( true ) ;
2019-09-04 07:27:14 +08:00
if ( ! self - > teamCollection ) {
2021-07-27 10:55:10 +08:00
TraceEvent ( " DDExclusionSafetyCheckTeamCollectionInvalid " , self - > ddId ) . log ( ) ;
2019-09-19 04:40:18 +08:00
reply . safe = false ;
req . reply . send ( reply ) ;
2019-08-17 06:13:53 +08:00
return Void ( ) ;
}
2019-10-09 07:10:09 +08:00
// If there is only 1 team, unsafe to mark failed: team building can get stuck due to lack of servers left
if ( self - > teamCollection - > teams . size ( ) < = 1 ) {
2021-07-27 10:55:10 +08:00
TraceEvent ( " DDExclusionSafetyCheckNotEnoughTeams " , self - > ddId ) . log ( ) ;
2019-10-09 07:10:09 +08:00
reply . safe = false ;
req . reply . send ( reply ) ;
return Void ( ) ;
}
2019-08-09 07:30:05 +08:00
vector < UID > excludeServerIDs ;
// Go through storage server interfaces and translate Address -> server ID (UID)
2019-09-25 01:04:56 +08:00
for ( const AddressExclusion & excl : req . exclusions ) {
for ( const auto & ssi : ssis ) {
2021-03-11 02:06:03 +08:00
if ( excl . excludes ( ssi . address ( ) ) | |
( ssi . secondaryAddress ( ) . present ( ) & & excl . excludes ( ssi . secondaryAddress ( ) . get ( ) ) ) ) {
2019-08-10 01:29:55 +08:00
excludeServerIDs . push_back ( ssi . id ( ) ) ;
}
2019-08-09 07:30:05 +08:00
}
}
2021-06-12 06:58:05 +08:00
reply . safe = _exclusionSafetyCheck ( excludeServerIDs , self - > teamCollection ) ;
2021-07-27 10:55:10 +08:00
TraceEvent ( " DDExclusionSafetyCheckFinish " , self - > ddId ) . log ( ) ;
2019-09-19 04:40:18 +08:00
req . reply . send ( reply ) ;
2019-08-09 07:30:05 +08:00
return Void ( ) ;
}
2020-05-07 01:35:56 +08:00
ACTOR Future < Void > waitFailCacheServer ( Database * db , StorageServerInterface ssi ) {
state Transaction tr ( * db ) ;
state Key key = storageCacheServerKey ( ssi . id ( ) ) ;
wait ( waitFailureClient ( ssi . waitFailure ) ) ;
loop {
tr . setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
try {
tr . addReadConflictRange ( storageCacheServerKeys ) ;
tr . clear ( key ) ;
wait ( tr . commit ( ) ) ;
2020-05-07 10:09:40 +08:00
break ;
2020-05-07 01:35:56 +08:00
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
}
}
return Void ( ) ;
}
ACTOR Future < Void > cacheServerWatcher ( Database * db ) {
state Transaction tr ( * db ) ;
state ActorCollection actors ( false ) ;
state std : : set < UID > knownCaches ;
loop {
tr . setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
try {
2021-05-04 04:14:16 +08:00
RangeResult range = wait ( tr . getRange ( storageCacheServerKeys , CLIENT_KNOBS - > TOO_MANY ) ) ;
2020-05-07 01:35:56 +08:00
ASSERT ( ! range . more ) ;
std : : set < UID > caches ;
for ( auto & kv : range ) {
UID id ;
2021-03-11 02:06:03 +08:00
BinaryReader reader { kv . key . removePrefix ( storageCacheServersPrefix ) , Unversioned ( ) } ;
2020-05-07 01:35:56 +08:00
reader > > id ;
caches . insert ( id ) ;
if ( knownCaches . find ( id ) = = knownCaches . end ( ) ) {
StorageServerInterface ssi ;
2021-03-11 02:06:03 +08:00
BinaryReader reader { kv . value , IncludeVersion ( ) } ;
2020-05-07 01:35:56 +08:00
reader > > ssi ;
actors . add ( waitFailCacheServer ( db , ssi ) ) ;
}
}
knownCaches = std : : move ( caches ) ;
tr . reset ( ) ;
wait ( delay ( 5.0 ) | | actors . getResult ( ) ) ;
ASSERT ( ! actors . getResult ( ) . isReady ( ) ) ;
} catch ( Error & e ) {
wait ( tr . onError ( e ) ) ;
}
}
}
2020-08-19 12:23:23 +08:00
static int64_t getMedianShardSize ( VectorRef < DDMetricsRef > metricVec ) {
2021-03-11 02:06:03 +08:00
std : : nth_element ( metricVec . begin ( ) ,
metricVec . begin ( ) + metricVec . size ( ) / 2 ,
metricVec . end ( ) ,
2020-08-30 03:35:31 +08:00
[ ] ( const DDMetricsRef & d1 , const DDMetricsRef & d2 ) { return d1 . shardBytes < d2 . shardBytes ; } ) ;
2020-08-19 12:23:23 +08:00
return metricVec [ metricVec . size ( ) / 2 ] . shardBytes ;
}
2020-08-30 03:35:31 +08:00
ACTOR Future < Void > ddGetMetrics ( GetDataDistributorMetricsRequest req ,
PromiseStream < GetMetricsListRequest > getShardMetricsList ) {
ErrorOr < Standalone < VectorRef < DDMetricsRef > > > result = wait (
errorOr ( brokenPromiseToNever ( getShardMetricsList . getReply ( GetMetricsListRequest ( req . keys , req . shardLimit ) ) ) ) ) ;
2020-08-22 02:21:46 +08:00
2020-08-30 03:35:31 +08:00
if ( result . isError ( ) ) {
2020-08-22 02:21:46 +08:00
req . reply . sendError ( result . getError ( ) ) ;
} else {
GetDataDistributorMetricsReply rep ;
2020-08-30 03:35:31 +08:00
if ( ! req . midOnly ) {
2020-08-22 02:21:46 +08:00
rep . storageMetricsList = result . get ( ) ;
2020-08-30 03:35:31 +08:00
} else {
2020-08-22 02:21:46 +08:00
auto & metricVec = result . get ( ) ;
2020-08-30 03:35:31 +08:00
if ( metricVec . empty ( ) )
rep . midShardSize = 0 ;
2020-08-22 02:21:46 +08:00
else {
rep . midShardSize = getMedianShardSize ( metricVec . contents ( ) ) ;
}
}
req . reply . send ( rep ) ;
}
return Void ( ) ;
}
2021-07-12 13:04:38 +08:00
ACTOR Future < Void > dataDistributor ( DataDistributorInterface di , Reference < AsyncVar < ServerDBInfo > const > db ) {
2021-03-11 02:06:03 +08:00
state Reference < DataDistributorData > self ( new DataDistributorData ( db , di . id ( ) ) ) ;
state Future < Void > collection = actorCollection ( self - > addActor . getFuture ( ) ) ;
2019-05-17 07:46:33 +08:00
state PromiseStream < GetMetricsListRequest > getShardMetricsList ;
2021-07-17 15:11:40 +08:00
state Database cx = openDBOnServer ( db , TaskPriority : : DefaultDelay , LockAware : : True ) ;
2019-06-20 02:12:24 +08:00
state ActorCollection actors ( false ) ;
2020-09-28 06:26:50 +08:00
state DDEnabledState ddEnabledState ;
2019-07-21 16:00:29 +08:00
self - > addActor . send ( actors . getResult ( ) ) ;
2020-05-09 07:27:57 +08:00
self - > addActor . send ( traceRole ( Role : : DATA_DISTRIBUTOR , di . id ( ) ) ) ;
2018-12-14 05:31:37 +08:00
try {
2019-07-05 23:08:29 +08:00
TraceEvent ( " DataDistributorRunning " , di . id ( ) ) ;
2021-03-11 02:06:03 +08:00
self - > addActor . send ( waitFailureServer ( di . waitFailure . getFuture ( ) ) ) ;
2020-05-07 01:35:56 +08:00
self - > addActor . send ( cacheServerWatcher ( & cx ) ) ;
2020-09-28 06:26:50 +08:00
state Future < Void > distributor =
2021-03-11 02:06:03 +08:00
reportErrorsExcept ( dataDistribution ( self , getShardMetricsList , & ddEnabledState ) ,
" DataDistribution " ,
di . id ( ) ,
& normalDataDistributorErrors ( ) ) ;
2019-02-20 06:04:45 +08:00
2019-03-15 06:00:57 +08:00
loop choose {
2021-03-11 02:06:03 +08:00
when ( wait ( distributor | | collection ) ) {
2019-03-15 06:00:57 +08:00
ASSERT ( false ) ;
throw internal_error ( ) ;
}
2021-03-11 02:06:03 +08:00
when ( HaltDataDistributorRequest req = waitNext ( di . haltDataDistributor . getFuture ( ) ) ) {
2019-03-15 06:00:57 +08:00
req . reply . send ( Void ( ) ) ;
TraceEvent ( " DataDistributorHalted " , di . id ( ) ) . detail ( " ReqID " , req . requesterID ) ;
break ;
}
2020-08-22 02:21:46 +08:00
when ( GetDataDistributorMetricsRequest req = waitNext ( di . dataDistributorMetrics . getFuture ( ) ) ) {
2020-08-22 00:26:18 +08:00
actors . add ( ddGetMetrics ( req , getShardMetricsList ) ) ;
2019-05-17 07:46:33 +08:00
}
2019-07-01 07:09:10 +08:00
when ( DistributorSnapRequest snapReq = waitNext ( di . distributorSnapReq . getFuture ( ) ) ) {
2020-09-28 06:26:50 +08:00
actors . add ( ddSnapCreate ( snapReq , db , & ddEnabledState ) ) ;
2019-06-20 02:12:24 +08:00
}
2021-03-11 02:06:03 +08:00
when ( DistributorExclusionSafetyCheckRequest exclCheckReq =
waitNext ( di . distributorExclCheckReq . getFuture ( ) ) ) {
2019-09-04 07:27:14 +08:00
actors . add ( ddExclusionSafetyCheck ( exclCheckReq , self , cx ) ) ;
2019-08-09 07:30:05 +08:00
}
2019-03-15 06:00:57 +08:00
}
2021-03-11 02:06:03 +08:00
} catch ( Error & err ) {
if ( normalDataDistributorErrors ( ) . count ( err . code ( ) ) = = 0 ) {
2019-07-05 23:08:29 +08:00
TraceEvent ( " DataDistributorError " , di . id ( ) ) . error ( err , true ) ;
2018-12-14 05:31:37 +08:00
throw err ;
}
2019-07-05 23:08:29 +08:00
TraceEvent ( " DataDistributorDied " , di . id ( ) ) . error ( err , true ) ;
2018-12-14 05:31:37 +08:00
}
return Void ( ) ;
}
2021-03-11 02:06:03 +08:00
std : : unique_ptr < DDTeamCollection > testTeamCollection ( int teamSize ,
Reference < IReplicationPolicy > policy ,
int processCount ) {
2021-07-03 06:04:42 +08:00
Database database = DatabaseContext : : create (
2021-07-17 15:11:40 +08:00
makeReference < AsyncVar < ClientDBInfo > > ( ) , Never ( ) , LocalityData ( ) , EnableLocalityLoadBalance : : False ) ;
2017-05-26 04:48:44 +08:00
2017-09-08 06:32:08 +08:00
DatabaseConfiguration conf ;
conf . storageTeamSize = teamSize ;
conf . storagePolicy = policy ;
2020-12-27 11:40:54 +08:00
auto collection =
2021-03-11 02:06:03 +08:00
std : : unique_ptr < DDTeamCollection > ( new DDTeamCollection ( database ,
UID ( 0 , 0 ) ,
MoveKeysLock ( ) ,
PromiseStream < RelocateShard > ( ) ,
makeReference < ShardsAffectedByTeamFailure > ( ) ,
conf ,
{ } ,
{ } ,
Future < Void > ( Void ( ) ) ,
makeReference < AsyncVar < bool > > ( true ) ,
2021-08-13 05:05:04 +08:00
IsPrimary : : True ,
2021-03-11 02:06:03 +08:00
makeReference < AsyncVar < bool > > ( false ) ,
2021-03-16 01:48:48 +08:00
PromiseStream < GetMetricsRequest > ( ) ,
2021-05-21 07:31:08 +08:00
Promise < UID > ( ) ,
PromiseStream < Promise < int > > ( ) ) ) ;
2017-05-26 04:48:44 +08:00
2018-12-07 03:26:30 +08:00
for ( int id = 1 ; id < = processCount ; + + id ) {
2017-05-26 04:48:44 +08:00
UID uid ( id , 0 ) ;
StorageServerInterface interface ;
interface . uniqueID = uid ;
2021-03-11 02:06:03 +08:00
interface . locality . set ( LiteralStringRef ( " machineid " ) , Standalone < StringRef > ( std : : to_string ( id ) ) ) ;
2017-05-26 04:48:44 +08:00
interface . locality . set ( LiteralStringRef ( " zoneid " ) , Standalone < StringRef > ( std : : to_string ( id % 5 ) ) ) ;
interface . locality . set ( LiteralStringRef ( " data_hall " ) , Standalone < StringRef > ( std : : to_string ( id % 3 ) ) ) ;
2021-03-11 02:06:03 +08:00
collection - > server_info [ uid ] = makeReference < TCServerInfo > (
interface , collection . get ( ) , ProcessClass ( ) , true , collection - > storageServerSet ) ;
2021-06-12 06:58:05 +08:00
collection - > server_status . set ( uid , ServerStatus ( false , false , false , interface . locality ) ) ;
2018-12-13 03:44:05 +08:00
collection - > checkAndCreateMachine ( collection - > server_info [ uid ] ) ;
2017-05-26 04:48:44 +08:00
}
return collection ;
}
2021-03-11 02:06:03 +08:00
std : : unique_ptr < DDTeamCollection > testMachineTeamCollection ( int teamSize ,
Reference < IReplicationPolicy > policy ,
int processCount ) {
2021-07-03 06:04:42 +08:00
Database database = DatabaseContext : : create (
2021-07-17 15:11:40 +08:00
makeReference < AsyncVar < ClientDBInfo > > ( ) , Never ( ) , LocalityData ( ) , EnableLocalityLoadBalance : : False ) ;
2018-08-30 05:40:39 +08:00
DatabaseConfiguration conf ;
conf . storageTeamSize = teamSize ;
conf . storagePolicy = policy ;
2020-12-27 11:40:54 +08:00
auto collection =
2021-03-11 02:06:03 +08:00
std : : unique_ptr < DDTeamCollection > ( new DDTeamCollection ( database ,
UID ( 0 , 0 ) ,
MoveKeysLock ( ) ,
PromiseStream < RelocateShard > ( ) ,
makeReference < ShardsAffectedByTeamFailure > ( ) ,
conf ,
{ } ,
{ } ,
Future < Void > ( Void ( ) ) ,
makeReference < AsyncVar < bool > > ( true ) ,
2021-08-13 05:05:04 +08:00
IsPrimary : : True ,
2021-03-11 02:06:03 +08:00
makeReference < AsyncVar < bool > > ( false ) ,
2021-03-16 01:48:48 +08:00
PromiseStream < GetMetricsRequest > ( ) ,
2021-05-21 07:31:08 +08:00
Promise < UID > ( ) ,
PromiseStream < Promise < int > > ( ) ) ) ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
for ( int id = 1 ; id < = processCount ; id + + ) {
2018-08-30 05:40:39 +08:00
UID uid ( id , 0 ) ;
StorageServerInterface interface ;
interface . uniqueID = uid ;
int process_id = id ;
int dc_id = process_id / 1000 ;
int data_hall_id = process_id / 100 ;
int zone_id = process_id / 10 ;
int machine_id = process_id / 5 ;
2021-03-11 02:06:03 +08:00
printf ( " testMachineTeamCollection: process_id:%d zone_id:%d machine_id:%d ip_addr:%s \n " ,
process_id ,
zone_id ,
machine_id ,
interface . address ( ) . toString ( ) . c_str ( ) ) ;
2018-08-30 05:40:39 +08:00
interface . locality . set ( LiteralStringRef ( " processid " ) , Standalone < StringRef > ( std : : to_string ( process_id ) ) ) ;
interface . locality . set ( LiteralStringRef ( " machineid " ) , Standalone < StringRef > ( std : : to_string ( machine_id ) ) ) ;
interface . locality . set ( LiteralStringRef ( " zoneid " ) , Standalone < StringRef > ( std : : to_string ( zone_id ) ) ) ;
interface . locality . set ( LiteralStringRef ( " data_hall " ) , Standalone < StringRef > ( std : : to_string ( data_hall_id ) ) ) ;
interface . locality . set ( LiteralStringRef ( " dcid " ) , Standalone < StringRef > ( std : : to_string ( dc_id ) ) ) ;
2021-03-11 02:06:03 +08:00
collection - > server_info [ uid ] = makeReference < TCServerInfo > (
interface , collection . get ( ) , ProcessClass ( ) , true , collection - > storageServerSet ) ;
2018-08-30 05:40:39 +08:00
2021-06-12 06:58:05 +08:00
collection - > server_status . set ( uid , ServerStatus ( false , false , false , interface . locality ) ) ;
2018-08-30 05:40:39 +08:00
}
int totalServerIndex = collection - > constructMachinesFromServers ( ) ;
printf ( " testMachineTeamCollection: construct machines for %d servers \n " , totalServerIndex ) ;
return collection ;
}
TEST_CASE ( " DataDistribution/AddTeamsBestOf/UseMachineID " ) {
wait ( Future < Void > ( Void ( ) ) ) ;
2018-11-22 03:18:26 +08:00
int teamSize = 3 ; // replication size
2018-08-30 05:40:39 +08:00
int processSize = 60 ;
2019-02-05 09:35:07 +08:00
int desiredTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * processSize ;
int maxTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * processSize ;
2018-08-30 05:40:39 +08:00
2021-03-11 02:06:03 +08:00
Reference < IReplicationPolicy > policy = Reference < IReplicationPolicy > (
new PolicyAcross ( teamSize , " zoneid " , Reference < IReplicationPolicy > ( new PolicyOne ( ) ) ) ) ;
2020-12-27 11:40:54 +08:00
state std : : unique_ptr < DDTeamCollection > collection = testMachineTeamCollection ( teamSize , policy , processSize ) ;
2018-08-30 05:40:39 +08:00
2019-03-30 04:21:15 +08:00
collection - > addTeamsBestOf ( 30 , desiredTeams , maxTeams ) ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
ASSERT ( collection - > sanityCheckTeams ( ) = = true ) ;
2018-08-30 05:40:39 +08:00
return Void ( ) ;
}
TEST_CASE ( " DataDistribution/AddTeamsBestOf/NotUseMachineID " ) {
wait ( Future < Void > ( Void ( ) ) ) ;
2018-11-22 03:18:26 +08:00
int teamSize = 3 ; // replication size
2018-08-30 05:40:39 +08:00
int processSize = 60 ;
2019-02-05 09:35:07 +08:00
int desiredTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * processSize ;
int maxTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * processSize ;
2018-08-30 05:40:39 +08:00
2021-03-11 02:06:03 +08:00
Reference < IReplicationPolicy > policy = Reference < IReplicationPolicy > (
new PolicyAcross ( teamSize , " zoneid " , Reference < IReplicationPolicy > ( new PolicyOne ( ) ) ) ) ;
2020-12-27 11:40:54 +08:00
state std : : unique_ptr < DDTeamCollection > collection = testMachineTeamCollection ( teamSize , policy , processSize ) ;
2018-08-30 05:40:39 +08:00
2020-08-19 05:18:50 +08:00
if ( collection = = nullptr ) {
2018-08-30 05:40:39 +08:00
fprintf ( stderr , " collection is null \n " ) ;
return Void ( ) ;
}
2018-11-22 03:18:26 +08:00
2018-11-28 01:10:11 +08:00
collection - > addBestMachineTeams ( 30 ) ; // Create machine teams to help debug
2019-03-30 04:21:15 +08:00
collection - > addTeamsBestOf ( 30 , desiredTeams , maxTeams ) ;
2018-08-30 05:40:39 +08:00
collection - > sanityCheckTeams ( ) ; // Server team may happen to be on the same machine team, although unlikely
return Void ( ) ;
}
TEST_CASE ( " DataDistribution/AddAllTeams/isExhaustive " ) {
2021-03-11 02:06:03 +08:00
Reference < IReplicationPolicy > policy =
Reference < IReplicationPolicy > ( new PolicyAcross ( 3 , " zoneid " , Reference < IReplicationPolicy > ( new PolicyOne ( ) ) ) ) ;
2019-02-05 09:35:07 +08:00
state int processSize = 10 ;
state int desiredTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * processSize ;
state int maxTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * processSize ;
2020-12-27 11:40:54 +08:00
state std : : unique_ptr < DDTeamCollection > collection = testTeamCollection ( 3 , policy , processSize ) ;
2017-05-26 04:48:44 +08:00
2019-02-05 09:35:07 +08:00
int result = collection - > addTeamsBestOf ( 200 , desiredTeams , maxTeams ) ;
2018-08-30 05:40:39 +08:00
2018-12-06 07:03:14 +08:00
// The maximum number of available server teams without considering machine locality is 120
// The maximum number of available server teams with machine locality constraint is 120 - 40, because
// the 40 (5*4*2) server teams whose servers come from the same machine are invalid.
2017-05-26 04:48:44 +08:00
ASSERT ( result = = 80 ) ;
return Void ( ) ;
}
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /DataDistribution/AddAllTeams/withLimit " ) {
2021-03-11 02:06:03 +08:00
Reference < IReplicationPolicy > policy =
Reference < IReplicationPolicy > ( new PolicyAcross ( 3 , " zoneid " , Reference < IReplicationPolicy > ( new PolicyOne ( ) ) ) ) ;
2019-02-05 09:35:07 +08:00
state int processSize = 10 ;
state int desiredTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * processSize ;
state int maxTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * processSize ;
2020-12-27 11:40:54 +08:00
state std : : unique_ptr < DDTeamCollection > collection = testTeamCollection ( 3 , policy , processSize ) ;
2017-05-26 04:48:44 +08:00
2019-02-05 09:35:07 +08:00
int result = collection - > addTeamsBestOf ( 10 , desiredTeams , maxTeams ) ;
2017-05-26 04:48:44 +08:00
2019-06-27 13:37:34 +08:00
ASSERT ( result > = 10 ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /DataDistribution/AddTeamsBestOf/SkippingBusyServers " ) {
2018-08-11 04:57:10 +08:00
wait ( Future < Void > ( Void ( ) ) ) ;
2021-03-11 02:06:03 +08:00
Reference < IReplicationPolicy > policy =
Reference < IReplicationPolicy > ( new PolicyAcross ( 3 , " zoneid " , Reference < IReplicationPolicy > ( new PolicyOne ( ) ) ) ) ;
2019-02-05 09:35:07 +08:00
state int processSize = 10 ;
state int desiredTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * processSize ;
state int maxTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * processSize ;
2019-07-21 02:11:34 +08:00
state int teamSize = 3 ;
2021-03-11 02:06:03 +08:00
// state int targetTeamsPerServer = SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (teamSize + 1) / 2;
2020-12-27 11:40:54 +08:00
state std : : unique_ptr < DDTeamCollection > collection = testTeamCollection ( teamSize , policy , processSize ) ;
2017-05-26 04:48:44 +08:00
2018-11-22 03:18:26 +08:00
collection - > addTeam ( std : : set < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 3 , 0 ) } ) , true ) ;
collection - > addTeam ( std : : set < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 4 , 0 ) } ) , true ) ;
2017-05-26 04:48:44 +08:00
2019-07-13 10:11:40 +08:00
state int result = collection - > addTeamsBestOf ( 8 , desiredTeams , maxTeams ) ;
2017-05-26 04:48:44 +08:00
2019-06-27 13:37:34 +08:00
ASSERT ( result > = 8 ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
for ( auto process = collection - > server_info . begin ( ) ; process ! = collection - > server_info . end ( ) ; process + + ) {
2017-05-26 04:48:44 +08:00
auto teamCount = process - > second - > teams . size ( ) ;
ASSERT ( teamCount > = 1 ) ;
2021-03-11 02:06:03 +08:00
// ASSERT(teamCount <= targetTeamsPerServer);
2017-05-26 04:48:44 +08:00
}
return Void ( ) ;
}
TeamCollection: Change 1 unit test
Relax the assert condition on the random unit test.
Due to the randomness in choosing the machine team and
the server team from the machine team, it is possible that
we may not find the remaining several (e.g., 1 or 2) available teams.
For example, there are at most 10 teams available, and we have found
9 teams, the chance of finding the last one is low
when we do pure random selection.
It is ok to not find every available team because
1) In reality, we only create a small fraction of available teams, and
2) In practical system, this situation only happens when most of servers
are *temporarily* unhealthy. When this situation happens, we will
abandon all existing teams and restart the build team from scratch.
In simulation test, the situation happens 100 times out of 128613 test cases
when we run RandomUnitTests.txt only.
Signed-off-by: Meng Xu <meng_xu@apple.com>
2018-12-01 15:46:50 +08:00
// Due to the randomness in choosing the machine team and the server team from the machine team, it is possible that
// we may not find the remaining several (e.g., 1 or 2) available teams.
// It is hard to conclude what is the minimum number of teams the addTeamsBestOf() should create in this situation.
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /DataDistribution/AddTeamsBestOf/NotEnoughServers " ) {
2018-08-11 04:57:10 +08:00
wait ( Future < Void > ( Void ( ) ) ) ;
2017-05-26 04:48:44 +08:00
2021-03-11 02:06:03 +08:00
Reference < IReplicationPolicy > policy =
Reference < IReplicationPolicy > ( new PolicyAcross ( 3 , " zoneid " , Reference < IReplicationPolicy > ( new PolicyOne ( ) ) ) ) ;
2019-02-05 09:35:07 +08:00
state int processSize = 5 ;
state int desiredTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * processSize ;
state int maxTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * processSize ;
2019-07-21 02:11:34 +08:00
state int teamSize = 3 ;
2020-12-27 11:40:54 +08:00
state std : : unique_ptr < DDTeamCollection > collection = testTeamCollection ( teamSize , policy , processSize ) ;
2017-05-26 04:48:44 +08:00
2018-11-22 03:18:26 +08:00
collection - > addTeam ( std : : set < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 3 , 0 ) } ) , true ) ;
collection - > addTeam ( std : : set < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 4 , 0 ) } ) , true ) ;
2017-05-26 04:48:44 +08:00
2019-03-30 04:21:15 +08:00
collection - > addBestMachineTeams ( 10 ) ;
2019-02-05 09:35:07 +08:00
int result = collection - > addTeamsBestOf ( 10 , desiredTeams , maxTeams ) ;
2018-12-04 15:13:57 +08:00
if ( collection - > machineTeams . size ( ) ! = 10 | | result ! = 8 ) {
collection - > traceAllInfo ( true ) ; // Debug message
}
// NOTE: Due to the pure randomness in selecting a machine for a machine team,
// we cannot guarantee that all machine teams are created.
// When we chnage the selectReplicas function to achieve such guarantee, we can enable the following ASSERT
2018-12-06 06:36:45 +08:00
ASSERT ( collection - > machineTeams . size ( ) = = 10 ) ; // Should create all machine teams
2018-12-04 15:13:57 +08:00
// We need to guarantee a server always have at least a team so that the server can participate in data distribution
for ( auto process = collection - > server_info . begin ( ) ; process ! = collection - > server_info . end ( ) ; process + + ) {
auto teamCount = process - > second - > teams . size ( ) ;
ASSERT ( teamCount > = 1 ) ;
2018-12-02 05:12:52 +08:00
}
TeamCollection: Change 1 unit test
Relax the assert condition on the random unit test.
Due to the randomness in choosing the machine team and
the server team from the machine team, it is possible that
we may not find the remaining several (e.g., 1 or 2) available teams.
For example, there are at most 10 teams available, and we have found
9 teams, the chance of finding the last one is low
when we do pure random selection.
It is ok to not find every available team because
1) In reality, we only create a small fraction of available teams, and
2) In practical system, this situation only happens when most of servers
are *temporarily* unhealthy. When this situation happens, we will
abandon all existing teams and restart the build team from scratch.
In simulation test, the situation happens 100 times out of 128613 test cases
when we run RandomUnitTests.txt only.
Signed-off-by: Meng Xu <meng_xu@apple.com>
2018-12-01 15:46:50 +08:00
// If we find all available teams, result will be 8 because we prebuild 2 teams
2018-12-06 06:36:45 +08:00
ASSERT ( result = = 8 ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}