2017-05-26 04:48:44 +08:00
/*
* DataDistribution . actor . cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013 - 2018 Apple Inc . and the FoundationDB project authors
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* http : //www.apache.org/licenses/LICENSE-2.0
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
# include "flow/ActorCollection.h"
2018-10-20 01:30:13 +08:00
# include "fdbserver/DataDistribution.h"
2017-05-26 04:48:44 +08:00
# include "fdbclient/SystemData.h"
# include "fdbclient/DatabaseContext.h"
2018-10-20 01:30:13 +08:00
# include "fdbserver/MoveKeys.h"
# include "fdbserver/Knobs.h"
2017-05-26 04:48:44 +08:00
# include <set>
2018-10-20 01:30:13 +08:00
# include "fdbserver/WaitFailure.h"
# include "fdbserver/ServerDBInfo.h"
# include "fdbserver/IKeyValueStore.h"
2017-05-26 04:48:44 +08:00
# include "fdbclient/ManagementAPI.h"
# include "fdbrpc/Replication.h"
# include "flow/UnitTest.h"
2018-08-11 08:25:43 +08:00
# include "flow/actorcompiler.h" // This must be the last #include.
2018-11-02 04:32:13 +08:00
# include "../flow/Trace.h"
2017-05-26 04:48:44 +08:00
class TCTeamInfo ;
2018-08-30 05:40:39 +08:00
struct TCMachineInfo ;
class TCMachineTeamInfo ;
2017-05-26 04:48:44 +08:00
struct TCServerInfo : public ReferenceCounted < TCServerInfo > {
UID id ;
StorageServerInterface lastKnownInterface ;
ProcessClass lastKnownClass ;
vector < Reference < TCTeamInfo > > teams ;
2018-08-30 05:40:39 +08:00
Reference < TCMachineInfo > machine ;
2017-05-26 04:48:44 +08:00
Future < Void > tracker ;
int64_t dataInFlightToServer ;
ErrorOr < GetPhysicalMetricsReply > serverMetrics ;
Promise < std : : pair < StorageServerInterface , ProcessClass > > interfaceChanged ;
Future < std : : pair < StorageServerInterface , ProcessClass > > onInterfaceChanged ;
Promise < Void > removed ;
Future < Void > onRemoved ;
Promise < Void > wakeUpTracker ;
2018-09-01 08:54:55 +08:00
bool inDesiredDC ;
LocalityEntry localityEntry ;
2018-11-13 09:39:40 +08:00
Promise < Void > updated ;
2017-05-26 04:48:44 +08:00
2018-09-01 08:54:55 +08:00
TCServerInfo ( StorageServerInterface ssi , ProcessClass processClass , bool inDesiredDC , Reference < LocalitySet > storageServerSet ) : id ( ssi . id ( ) ) , lastKnownInterface ( ssi ) , lastKnownClass ( processClass ) , dataInFlightToServer ( 0 ) , onInterfaceChanged ( interfaceChanged . getFuture ( ) ) , onRemoved ( removed . getFuture ( ) ) , inDesiredDC ( inDesiredDC ) {
localityEntry = ( ( LocalityMap < UID > * ) storageServerSet . getPtr ( ) ) - > add ( ssi . locality , & id ) ;
}
2017-05-26 04:48:44 +08:00
} ;
2018-10-16 07:25:17 +08:00
struct TCMachineInfo : public ReferenceCounted < TCMachineInfo > {
2018-11-22 03:18:26 +08:00
std : : vector < Reference < TCServerInfo > > serversOnMachine ; // SOMEDAY: change from vector to set
2018-08-30 05:40:39 +08:00
Standalone < StringRef > machineID ;
2018-11-22 03:18:26 +08:00
std : : vector < Reference < TCMachineTeamInfo > > machineTeams ; // SOMEDAY: split good and bad machine teams.
2018-08-30 05:40:39 +08:00
LocalityEntry localityEntry ;
2018-11-22 03:18:26 +08:00
explicit TCMachineInfo ( Reference < TCServerInfo > server , const LocalityEntry & entry ) : localityEntry ( entry ) {
ASSERT ( serversOnMachine . empty ( ) ) ;
serversOnMachine . push_back ( server ) ;
machineID = server - > lastKnownInterface . locality . zoneId ( ) . get ( ) ;
2018-08-30 05:40:39 +08:00
}
std : : string getServersIDStr ( ) {
std : : string str ;
2018-11-22 03:18:26 +08:00
if ( serversOnMachine . empty ( ) ) return " [unset] " ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
for ( auto & server : serversOnMachine ) {
2018-08-30 05:40:39 +08:00
str + = server - > id . toString ( ) + " " ;
}
str . pop_back ( ) ;
return str ;
}
} ;
2017-05-26 04:48:44 +08:00
ACTOR Future < Void > updateServerMetrics ( TCServerInfo * server ) {
state StorageServerInterface ssi = server - > lastKnownInterface ;
state Future < ErrorOr < GetPhysicalMetricsReply > > metricsRequest = ssi . getPhysicalMetrics . tryGetReply ( GetPhysicalMetricsRequest ( ) , TaskDataDistributionLaunch ) ;
state Future < Void > resetRequest = Never ( ) ;
state Future < std : : pair < StorageServerInterface , ProcessClass > > interfaceChanged ( server - > onInterfaceChanged ) ;
state Future < Void > serverRemoved ( server - > onRemoved ) ;
loop {
choose {
when ( ErrorOr < GetPhysicalMetricsReply > rep = wait ( metricsRequest ) ) {
if ( rep . present ( ) ) {
server - > serverMetrics = rep ;
2018-11-13 09:39:40 +08:00
if ( server - > updated . canBeSet ( ) ) {
server - > updated . send ( Void ( ) ) ;
}
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
metricsRequest = Never ( ) ;
resetRequest = delay ( SERVER_KNOBS - > METRIC_DELAY , TaskDataDistributionLaunch ) ;
}
when ( std : : pair < StorageServerInterface , ProcessClass > _ssi = wait ( interfaceChanged ) ) {
ssi = _ssi . first ;
interfaceChanged = server - > onInterfaceChanged ;
resetRequest = Void ( ) ;
}
2018-08-11 04:57:10 +08:00
when ( wait ( serverRemoved ) ) {
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
2018-08-11 04:57:10 +08:00
when ( wait ( resetRequest ) ) { //To prevent a tight spin loop
2017-05-26 04:48:44 +08:00
if ( IFailureMonitor : : failureMonitor ( ) . getState ( ssi . getPhysicalMetrics . getEndpoint ( ) ) . isFailed ( ) ) {
resetRequest = IFailureMonitor : : failureMonitor ( ) . onStateEqual ( ssi . getPhysicalMetrics . getEndpoint ( ) , FailureStatus ( false ) ) ;
}
else {
resetRequest = Never ( ) ;
metricsRequest = ssi . getPhysicalMetrics . tryGetReply ( GetPhysicalMetricsRequest ( ) , TaskDataDistributionLaunch ) ;
}
}
}
}
}
ACTOR Future < Void > updateServerMetrics ( Reference < TCServerInfo > server ) {
2018-08-11 04:57:10 +08:00
wait ( updateServerMetrics ( server . getPtr ( ) ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
2018-11-01 07:46:32 +08:00
// Machine team information
2018-08-30 05:40:39 +08:00
class TCMachineTeamInfo : public ReferenceCounted < TCMachineTeamInfo > {
public :
2018-11-22 03:18:26 +08:00
vector < Reference < TCMachineInfo > > machines ;
vector < Standalone < StringRef > > machineIDs ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
explicit TCMachineTeamInfo ( vector < Reference < TCMachineInfo > > const & machines ) : machines ( machines ) {
2018-08-30 05:40:39 +08:00
machineIDs . reserve ( machines . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( int i = 0 ; i < machines . size ( ) ; i + + ) {
2018-08-30 05:40:39 +08:00
machineIDs . push_back ( machines [ i ] - > machineID ) ;
}
sort ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
}
int size ( ) {
2018-11-22 03:18:26 +08:00
ASSERT ( machines . size ( ) = = machineIDs . size ( ) ) ;
2018-08-30 05:40:39 +08:00
return machineIDs . size ( ) ;
}
std : : string getMachineIDsStr ( ) {
std : : string str ;
2018-11-22 03:18:26 +08:00
if ( this = = NULL | | machineIDs . empty ( ) ) return " [unset] " ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
for ( auto & id : machineIDs ) {
2018-08-30 05:40:39 +08:00
str + = id . contents ( ) . toString ( ) + " " ;
}
str . pop_back ( ) ;
return str ;
}
int getTotalMachineTeamNumber ( ) {
int count = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto & machine : machines ) {
ASSERT ( machine - > machineTeams . size ( ) > = 0 ) ;
2018-08-30 05:40:39 +08:00
count + = machine - > machineTeams . size ( ) ;
}
return count ;
}
2018-11-22 03:18:26 +08:00
bool operator = = ( TCMachineTeamInfo & rhs ) const { return this - > machineIDs = = rhs . machineIDs ; }
2018-08-30 05:40:39 +08:00
} ;
2017-05-26 04:48:44 +08:00
class TCTeamInfo : public ReferenceCounted < TCTeamInfo > , public IDataDistributionTeam {
public :
vector < Reference < TCServerInfo > > servers ;
vector < UID > serverIDs ;
2018-08-30 05:40:39 +08:00
Reference < TCMachineTeamInfo > machineTeam ;
2017-05-26 04:48:44 +08:00
Future < Void > tracker ;
bool healthy ;
bool wrongConfiguration ; //True if any of the servers in the team have the wrong configuration
int priority ;
2018-11-22 03:18:26 +08:00
explicit TCTeamInfo ( vector < Reference < TCServerInfo > > const & servers )
: servers ( servers ) , healthy ( true ) , priority ( PRIORITY_TEAM_HEALTHY ) , wrongConfiguration ( false ) {
if ( servers . empty ( ) ) {
2018-08-30 05:40:39 +08:00
TraceEvent ( SevInfo , " ConstructTCTeamFromEmptyServers " ) ;
}
2017-05-26 04:48:44 +08:00
serverIDs . reserve ( servers . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( int i = 0 ; i < servers . size ( ) ; i + + ) {
2017-05-26 04:48:44 +08:00
serverIDs . push_back ( servers [ i ] - > id ) ;
2018-08-30 05:40:39 +08:00
}
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
2017-05-26 04:48:44 +08:00
virtual vector < StorageServerInterface > getLastKnownServerInterfaces ( ) {
vector < StorageServerInterface > v ;
v . reserve ( servers . size ( ) ) ;
for ( int i = 0 ; i < servers . size ( ) ; i + + )
v . push_back ( servers [ i ] - > lastKnownInterface ) ;
return v ;
}
2018-02-03 03:46:04 +08:00
virtual int size ( ) { return servers . size ( ) ; }
2017-05-26 04:48:44 +08:00
virtual vector < UID > const & getServerIDs ( ) { return serverIDs ; }
2018-08-30 05:40:39 +08:00
virtual std : : string getServerIDsStr ( ) {
std : : string str ;
2018-11-22 03:18:26 +08:00
if ( this = = NULL | | serverIDs . empty ( ) ) return " [unset] " ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
for ( auto & id : serverIDs ) {
2018-08-30 05:40:39 +08:00
str + = id . toString ( ) + " " ;
}
str . pop_back ( ) ;
return str ;
}
2017-05-26 04:48:44 +08:00
virtual void addDataInFlightToTeam ( int64_t delta ) {
for ( int i = 0 ; i < servers . size ( ) ; i + + )
servers [ i ] - > dataInFlightToServer + = delta ;
}
virtual int64_t getDataInFlightToTeam ( ) {
int64_t dataInFlight = 0.0 ;
for ( int i = 0 ; i < servers . size ( ) ; i + + )
dataInFlight + = servers [ i ] - > dataInFlightToServer ;
return dataInFlight ;
}
virtual int64_t getLoadBytes ( bool includeInFlight = true , double inflightPenalty = 1.0 ) {
int64_t physicalBytes = getLoadAverage ( ) ;
double minFreeSpaceRatio = getMinFreeSpaceRatio ( includeInFlight ) ;
int64_t inFlightBytes = includeInFlight ? getDataInFlightToTeam ( ) / servers . size ( ) : 0 ;
double freeSpaceMultiplier = SERVER_KNOBS - > FREE_SPACE_RATIO_CUTOFF / ( std : : max ( std : : min ( SERVER_KNOBS - > FREE_SPACE_RATIO_CUTOFF , minFreeSpaceRatio ) , 0.000001 ) ) ;
if ( freeSpaceMultiplier > 1 & & g_random - > random01 ( ) < 0.001 )
TraceEvent ( SevWarn , " DiskNearCapacity " ) . detail ( " FreeSpaceRatio " , minFreeSpaceRatio ) ;
return ( physicalBytes + ( inflightPenalty * inFlightBytes ) ) * freeSpaceMultiplier ;
}
virtual int64_t getMinFreeSpace ( bool includeInFlight = true ) {
int64_t minFreeSpace = std : : numeric_limits < int64_t > : : max ( ) ;
for ( int i = 0 ; i < servers . size ( ) ; i + + ) {
if ( servers [ i ] - > serverMetrics . present ( ) ) {
auto & replyValue = servers [ i ] - > serverMetrics . get ( ) ;
ASSERT ( replyValue . free . bytes > = 0 ) ;
ASSERT ( replyValue . capacity . bytes > = 0 ) ;
int64_t bytesFree = replyValue . free . bytes ;
if ( includeInFlight ) {
bytesFree - = servers [ i ] - > dataInFlightToServer ;
}
minFreeSpace = std : : min ( bytesFree , minFreeSpace ) ;
}
}
return minFreeSpace ; // Could be negative
}
virtual double getMinFreeSpaceRatio ( bool includeInFlight = true ) {
double minRatio = 1.0 ;
for ( int i = 0 ; i < servers . size ( ) ; i + + ) {
if ( servers [ i ] - > serverMetrics . present ( ) ) {
auto & replyValue = servers [ i ] - > serverMetrics . get ( ) ;
ASSERT ( replyValue . free . bytes > = 0 ) ;
ASSERT ( replyValue . capacity . bytes > = 0 ) ;
int64_t bytesFree = replyValue . free . bytes ;
if ( includeInFlight ) {
bytesFree = std : : max ( ( int64_t ) 0 , bytesFree - servers [ i ] - > dataInFlightToServer ) ;
}
if ( replyValue . capacity . bytes = = 0 )
minRatio = 0 ;
else
minRatio = std : : min ( minRatio , ( ( double ) bytesFree ) / replyValue . capacity . bytes ) ;
}
}
return minRatio ;
}
virtual bool hasHealthyFreeSpace ( ) {
return getMinFreeSpaceRatio ( ) > SERVER_KNOBS - > MIN_FREE_SPACE_RATIO & & getMinFreeSpace ( ) > SERVER_KNOBS - > MIN_FREE_SPACE ;
}
virtual Future < Void > updatePhysicalMetrics ( ) {
return doUpdatePhysicalMetrics ( this ) ;
}
virtual bool isOptimal ( ) {
for ( int i = 0 ; i < servers . size ( ) ; i + + ) {
if ( servers [ i ] - > lastKnownClass . machineClassFitness ( ProcessClass : : Storage ) > ProcessClass : : UnsetFit ) {
return false ;
}
}
return true ;
}
virtual bool isWrongConfiguration ( ) { return wrongConfiguration ; }
virtual void setWrongConfiguration ( bool wrongConfiguration ) { this - > wrongConfiguration = wrongConfiguration ; }
virtual bool isHealthy ( ) { return healthy ; }
virtual void setHealthy ( bool h ) { healthy = h ; }
virtual int getPriority ( ) { return priority ; }
virtual void setPriority ( int p ) { priority = p ; }
virtual void addref ( ) { ReferenceCounted < TCTeamInfo > : : addref ( ) ; }
virtual void delref ( ) { ReferenceCounted < TCTeamInfo > : : delref ( ) ; }
2017-10-11 01:36:33 +08:00
virtual void addServers ( const vector < UID > & servers ) {
serverIDs . reserve ( servers . size ( ) ) ;
for ( int i = 0 ; i < servers . size ( ) ; i + + ) {
serverIDs . push_back ( servers [ i ] ) ;
}
}
2017-05-26 04:48:44 +08:00
private :
2018-04-20 01:49:22 +08:00
// Calculate an "average" of the metrics replies that we received. Penalize teams from which we did not receive all replies.
2017-05-26 04:48:44 +08:00
int64_t getLoadAverage ( ) {
int64_t bytesSum = 0 ;
int added = 0 ;
for ( int i = 0 ; i < servers . size ( ) ; i + + )
if ( servers [ i ] - > serverMetrics . present ( ) ) {
added + + ;
bytesSum + = servers [ i ] - > serverMetrics . get ( ) . load . bytes ;
}
if ( added < servers . size ( ) )
bytesSum * = 2 ;
return added = = 0 ? 0 : bytesSum / added ;
}
// Calculate the max of the metrics replies that we received.
ACTOR Future < Void > doUpdatePhysicalMetrics ( TCTeamInfo * self ) {
std : : vector < Future < Void > > updates ;
for ( int i = 0 ; i < self - > servers . size ( ) ; i + + )
updates . push_back ( updateServerMetrics ( self - > servers [ i ] ) ) ;
2018-08-11 04:57:10 +08:00
wait ( waitForAll ( updates ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
} ;
struct ServerStatus {
bool isFailed ;
bool isUndesired ;
bool isWrongConfiguration ;
2018-09-22 07:14:39 +08:00
bool initialized ; //AsyncMap erases default constructed objects
2017-05-26 04:48:44 +08:00
LocalityData locality ;
2018-09-22 07:14:39 +08:00
ServerStatus ( ) : isFailed ( true ) , isUndesired ( false ) , isWrongConfiguration ( false ) , initialized ( false ) { }
ServerStatus ( bool isFailed , bool isUndesired , LocalityData const & locality ) : isFailed ( isFailed ) , isUndesired ( isUndesired ) , locality ( locality ) , isWrongConfiguration ( false ) , initialized ( true ) { }
2017-05-26 04:48:44 +08:00
bool isUnhealthy ( ) const { return isFailed | | isUndesired ; }
const char * toString ( ) const { return isFailed ? " Failed " : isUndesired ? " Undesired " : " Healthy " ; }
2018-09-22 07:14:39 +08:00
bool operator = = ( ServerStatus const & r ) const { return isFailed = = r . isFailed & & isUndesired = = r . isUndesired & & isWrongConfiguration = = r . isWrongConfiguration & & locality = = r . locality & & initialized = = r . initialized ; }
2017-05-26 04:48:44 +08:00
//If a process has reappeared without the storage server that was on it (isFailed == true), we don't need to exclude it
//We also don't need to exclude processes who are in the wrong configuration (since those servers will be removed)
bool excludeOnRecruit ( ) { return ! isFailed & & ! isWrongConfiguration ; }
} ;
typedef AsyncMap < UID , ServerStatus > ServerStatusMap ;
2018-05-06 09:16:28 +08:00
ACTOR Future < Void > waitForAllDataRemoved ( Database cx , UID serverID , Version addedVersion ) {
2017-05-26 04:48:44 +08:00
state Transaction tr ( cx ) ;
loop {
try {
tr . setOption ( FDBTransactionOptions : : PRIORITY_SYSTEM_IMMEDIATE ) ;
2018-05-06 09:16:28 +08:00
Version ver = wait ( tr . getReadVersion ( ) ) ;
2018-06-04 10:38:51 +08:00
//we cannot remove a server immediately after adding it, because a perfectly timed master recovery could cause us to not store the mutations sent to the short lived storage server.
2018-05-06 09:16:28 +08:00
if ( ver > addedVersion + SERVER_KNOBS - > MAX_READ_TRANSACTION_LIFE_VERSIONS ) {
bool canRemove = wait ( canRemoveStorageServer ( & tr , serverID ) ) ;
if ( canRemove ) {
return Void ( ) ;
}
}
2017-05-26 04:48:44 +08:00
// Wait for any change to the serverKeys for this server
2018-08-11 04:57:10 +08:00
wait ( delay ( SERVER_KNOBS - > ALL_DATA_REMOVED_DELAY , TaskDataDistribution ) ) ;
2017-05-26 04:48:44 +08:00
tr . reset ( ) ;
} catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
}
// Read keyservers, return unique set of teams
2018-03-09 02:50:05 +08:00
ACTOR Future < Reference < InitialDataDistribution > > getInitialDataDistribution ( Database cx , UID masterId , MoveKeysLock moveKeysLock , std : : vector < Optional < Key > > remoteDcIds ) {
2017-05-26 04:48:44 +08:00
state Reference < InitialDataDistribution > result = Reference < InitialDataDistribution > ( new InitialDataDistribution ) ;
state Key beginKey = allKeys . begin ;
state bool succeeded ;
state Transaction tr ( cx ) ;
2018-03-09 02:50:05 +08:00
state std : : map < UID , Optional < Key > > server_dc ;
state std : : map < vector < UID > , std : : pair < vector < UID > , vector < UID > > > team_cache ;
2017-05-26 04:48:44 +08:00
//Get the server list in its own try/catch block since it modifies result. We don't want a subsequent failure causing entries to be duplicated
loop {
2018-03-09 02:50:05 +08:00
server_dc . clear ( ) ;
2017-05-26 04:48:44 +08:00
succeeded = false ;
try {
result - > mode = 1 ;
tr . setOption ( FDBTransactionOptions : : PRIORITY_SYSTEM_IMMEDIATE ) ;
Optional < Value > mode = wait ( tr . get ( dataDistributionModeKey ) ) ;
if ( mode . present ( ) ) {
BinaryReader rd ( mode . get ( ) , Unversioned ( ) ) ;
rd > > result - > mode ;
}
2018-11-22 03:18:26 +08:00
if ( ! result - > mode ) // result->mode can be changed to 0 when we disable data distribution
2017-05-26 04:48:44 +08:00
return result ;
state Future < vector < ProcessData > > workers = getWorkers ( & tr ) ;
state Future < Standalone < RangeResultRef > > serverList = tr . getRange ( serverListKeys , CLIENT_KNOBS - > TOO_MANY ) ;
2018-08-11 04:57:10 +08:00
wait ( success ( workers ) & & success ( serverList ) ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( ! serverList . get ( ) . more & & serverList . get ( ) . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
std : : map < Optional < Standalone < StringRef > > , ProcessData > id_data ;
for ( int i = 0 ; i < workers . get ( ) . size ( ) ; i + + )
id_data [ workers . get ( ) [ i ] . locality . processId ( ) ] = workers . get ( ) [ i ] ;
succeeded = true ;
for ( int i = 0 ; i < serverList . get ( ) . size ( ) ; i + + ) {
auto ssi = decodeServerListValue ( serverList . get ( ) [ i ] . value ) ;
result - > allServers . push_back ( std : : make_pair ( ssi , id_data [ ssi . locality . processId ( ) ] . processClass ) ) ;
2018-03-09 02:50:05 +08:00
server_dc [ ssi . id ( ) ] = ssi . locality . dcId ( ) ;
2017-05-26 04:48:44 +08:00
}
break ;
}
catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( ! succeeded ) ; //We shouldn't be retrying if we have already started modifying result in this loop
2018-06-09 02:11:08 +08:00
TraceEvent ( " GetInitialTeamsRetry " , masterId ) ;
2017-05-26 04:48:44 +08:00
}
}
//If keyServers is too large to read in a single transaction, then we will have to break this process up into multiple transactions.
//In that case, each iteration should begin where the previous left off
while ( beginKey < allKeys . end ) {
TEST ( beginKey > allKeys . begin ) ; //Multi-transactional getInitialDataDistribution
loop {
succeeded = false ;
try {
tr . setOption ( FDBTransactionOptions : : PRIORITY_SYSTEM_IMMEDIATE ) ;
2018-08-11 04:57:10 +08:00
wait ( checkMoveKeysLockReadOnly ( & tr , moveKeysLock ) ) ;
2017-05-26 04:48:44 +08:00
Standalone < RangeResultRef > keyServers = wait ( krmGetRanges ( & tr , keyServersPrefix , KeyRangeRef ( beginKey , allKeys . end ) , SERVER_KNOBS - > MOVE_KEYS_KRM_LIMIT , SERVER_KNOBS - > MOVE_KEYS_KRM_LIMIT_BYTES ) ) ;
succeeded = true ;
vector < UID > src , dest , last ;
// for each range
for ( int i = 0 ; i < keyServers . size ( ) - 1 ; i + + ) {
2018-07-01 09:44:46 +08:00
DDShardInfo info ( keyServers [ i ] . key ) ;
2017-05-26 04:48:44 +08:00
decodeKeyServersValue ( keyServers [ i ] . value , src , dest ) ;
2018-03-09 02:50:05 +08:00
if ( remoteDcIds . size ( ) ) {
auto srcIter = team_cache . find ( src ) ;
if ( srcIter = = team_cache . end ( ) ) {
for ( auto & id : src ) {
auto & dc = server_dc [ id ] ;
if ( std : : find ( remoteDcIds . begin ( ) , remoteDcIds . end ( ) , dc ) ! = remoteDcIds . end ( ) ) {
info . remoteSrc . push_back ( id ) ;
} else {
info . primarySrc . push_back ( id ) ;
}
}
result - > primaryTeams . insert ( info . primarySrc ) ;
result - > remoteTeams . insert ( info . remoteSrc ) ;
team_cache [ src ] = std : : make_pair ( info . primarySrc , info . remoteSrc ) ;
} else {
info . primarySrc = srcIter - > second . first ;
info . remoteSrc = srcIter - > second . second ;
}
if ( dest . size ( ) ) {
info . hasDest = true ;
auto destIter = team_cache . find ( dest ) ;
if ( destIter = = team_cache . end ( ) ) {
for ( auto & id : dest ) {
auto & dc = server_dc [ id ] ;
if ( std : : find ( remoteDcIds . begin ( ) , remoteDcIds . end ( ) , dc ) ! = remoteDcIds . end ( ) ) {
info . remoteDest . push_back ( id ) ;
} else {
info . primaryDest . push_back ( id ) ;
}
}
result - > primaryTeams . insert ( info . primaryDest ) ;
result - > remoteTeams . insert ( info . remoteDest ) ;
team_cache [ dest ] = std : : make_pair ( info . primaryDest , info . remoteDest ) ;
} else {
info . primaryDest = destIter - > second . first ;
info . remoteDest = destIter - > second . second ;
}
}
} else {
info . primarySrc = src ;
2018-07-13 03:10:18 +08:00
auto srcIter = team_cache . find ( src ) ;
if ( srcIter = = team_cache . end ( ) ) {
result - > primaryTeams . insert ( src ) ;
team_cache [ src ] = std : : pair < vector < UID > , vector < UID > > ( ) ;
}
2018-03-09 02:50:05 +08:00
if ( dest . size ( ) ) {
info . hasDest = true ;
info . primaryDest = dest ;
2018-07-13 03:10:18 +08:00
auto destIter = team_cache . find ( dest ) ;
if ( destIter = = team_cache . end ( ) ) {
result - > primaryTeams . insert ( dest ) ;
team_cache [ dest ] = std : : pair < vector < UID > , vector < UID > > ( ) ;
}
2018-03-09 02:50:05 +08:00
}
}
result - > shards . push_back ( info ) ;
2017-05-26 04:48:44 +08:00
}
ASSERT ( keyServers . size ( ) > 0 ) ;
beginKey = keyServers . end ( ) [ - 1 ] . key ;
break ;
} catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( ! succeeded ) ; //We shouldn't be retrying if we have already started modifying result in this loop
2018-06-09 02:11:08 +08:00
TraceEvent ( " GetInitialTeamsKeyServersRetry " , masterId ) ;
2017-05-26 04:48:44 +08:00
}
}
tr . reset ( ) ;
}
// a dummy shard at the end with no keys or servers makes life easier for trackInitialShards()
2018-07-01 09:44:46 +08:00
result - > shards . push_back ( DDShardInfo ( allKeys . end ) ) ;
2017-05-26 04:48:44 +08:00
return result ;
}
Future < Void > storageServerTracker (
struct DDTeamCollection * const & self ,
Database const & cx ,
TCServerInfo * const & server ,
ServerStatusMap * const & statusMap ,
MoveKeysLock const & lock ,
UID const & masterId ,
std : : map < UID , Reference < TCServerInfo > > * const & other_servers ,
2018-06-19 08:23:55 +08:00
Optional < PromiseStream < std : : pair < UID , Optional < StorageServerInterface > > > > const & changes ,
2018-05-06 09:16:28 +08:00
Promise < Void > const & errorOut ,
Version const & addedVersion ) ;
2017-05-26 04:48:44 +08:00
2018-11-08 13:05:31 +08:00
Future < Void > teamTracker ( struct DDTeamCollection * const & self , Reference < TCTeamInfo > const & team , bool const & badTeam ) ;
2017-05-26 04:48:44 +08:00
2018-11-10 02:07:55 +08:00
struct DDTeamCollection : ReferenceCounted < DDTeamCollection > {
2017-05-26 04:48:44 +08:00
enum { REQUESTING_WORKER = 0 , GETTING_WORKER = 1 , GETTING_STORAGE = 2 } ;
PromiseStream < Future < Void > > addActor ;
Database cx ;
UID masterId ;
2017-09-08 06:32:08 +08:00
DatabaseConfiguration configuration ;
2017-05-26 04:48:44 +08:00
bool doBuildTeams ;
Future < Void > teamBuilder ;
AsyncTrigger restartTeamBuilder ;
MoveKeysLock lock ;
PromiseStream < RelocateShard > output ;
vector < UID > allServers ;
ServerStatusMap server_status ;
int64_t unhealthyServers ;
2018-08-10 04:16:09 +08:00
std : : map < int , int > priority_teams ;
2017-05-26 04:48:44 +08:00
std : : map < UID , Reference < TCServerInfo > > server_info ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
// machine_info has all machines info; key must be unique across processes on the same machine
std : : map < Standalone < StringRef > , Reference < TCMachineInfo > > machine_info ;
std : : vector < Reference < TCMachineTeamInfo > > machineTeams ; // all machine teams
2018-08-30 05:40:39 +08:00
LocalityMap < UID > machineLocalityMap ; // locality info of machines
2017-05-26 04:48:44 +08:00
vector < Reference < TCTeamInfo > > teams ;
2018-08-22 12:08:15 +08:00
vector < Reference < TCTeamInfo > > badTeams ;
2017-05-26 04:48:44 +08:00
Reference < ShardsAffectedByTeamFailure > shardsAffectedByTeamFailure ;
PromiseStream < UID > removedServers ;
std : : set < UID > recruitingIds ; // The IDs of the SS which are being recruited
std : : set < NetworkAddress > recruitingLocalities ;
2018-06-19 08:23:55 +08:00
Optional < PromiseStream < std : : pair < UID , Optional < StorageServerInterface > > > > serverChanges ;
2017-05-26 04:48:44 +08:00
Future < Void > initialFailureReactionDelay ;
Future < Void > initializationDoneActor ;
Promise < Void > serverTrackerErrorOut ;
AsyncVar < int > recruitingStream ;
Debouncer restartRecruiting ;
int healthyTeamCount ;
2018-02-03 03:46:04 +08:00
Reference < AsyncVar < bool > > zeroHealthyTeams ;
2017-05-26 04:48:44 +08:00
int optimalTeamCount ;
2018-01-31 09:00:51 +08:00
AsyncVar < bool > zeroOptimalTeams ;
2017-05-26 04:48:44 +08:00
AsyncMap < AddressExclusion , bool > excludedServers ; // true if an address is in the excluded list in the database. Updated asynchronously (eventually)
2017-10-11 01:36:33 +08:00
std : : vector < Optional < Key > > includedDCs ;
2018-02-14 09:01:34 +08:00
Optional < std : : vector < Optional < Key > > > otherTrackedDCs ;
2018-03-09 02:50:05 +08:00
bool primary ;
2018-04-09 12:24:05 +08:00
Reference < AsyncVar < bool > > processingUnhealthy ;
2018-06-08 07:14:40 +08:00
Future < Void > readyToStart ;
2018-06-11 11:21:39 +08:00
Future < Void > checkTeamDelay ;
2018-11-08 13:05:31 +08:00
Promise < Void > addSubsetComplete ;
Future < Void > badTeamRemover ;
2018-04-09 12:24:05 +08:00
2018-09-01 08:54:55 +08:00
Reference < LocalitySet > storageServerSet ;
std : : vector < LocalityEntry > forcedEntries , resultEntries ;
2018-11-10 02:07:55 +08:00
std : : vector < DDTeamCollection * > teamCollections ;
2018-09-01 08:54:55 +08:00
void resetLocalitySet ( ) {
storageServerSet = Reference < LocalitySet > ( new LocalityMap < UID > ( ) ) ;
LocalityMap < UID > * storageServerMap = ( LocalityMap < UID > * ) storageServerSet . getPtr ( ) ;
for ( auto & it : server_info ) {
it . second - > localityEntry = storageServerMap - > add ( it . second - > lastKnownInterface . locality , & it . second - > id ) ;
}
}
bool satisfiesPolicy ( const std : : vector < Reference < TCServerInfo > > & team , int amount = - 1 ) {
forcedEntries . clear ( ) ;
resultEntries . clear ( ) ;
if ( amount = = - 1 ) {
amount = team . size ( ) ;
}
for ( int i = 0 ; i < amount ; i + + ) {
forcedEntries . push_back ( team [ i ] - > localityEntry ) ;
}
bool result = storageServerSet - > selectReplicas ( configuration . storagePolicy , forcedEntries , resultEntries ) ;
return result & & resultEntries . size ( ) = = 0 ;
}
2018-08-30 05:40:39 +08:00
// Used for map<Reference<TCMachineTeamInfo>, int>
2018-10-16 07:25:17 +08:00
// SOMEDAY: Change it to lamda function
2018-08-30 05:40:39 +08:00
struct CompareTCMachineTeamInfoRef {
2018-11-22 03:18:26 +08:00
bool operator ( ) ( const Reference < TCMachineTeamInfo > & lhs , const Reference < TCMachineTeamInfo > & rhs ) const {
2018-08-30 05:40:39 +08:00
return lhs - > machineIDs < rhs - > machineIDs ;
}
} ;
2017-05-26 04:48:44 +08:00
DDTeamCollection (
Database const & cx ,
UID masterId ,
MoveKeysLock const & lock ,
PromiseStream < RelocateShard > const & output ,
Reference < ShardsAffectedByTeamFailure > const & shardsAffectedByTeamFailure ,
2017-09-08 06:32:08 +08:00
DatabaseConfiguration configuration ,
2017-10-11 01:36:33 +08:00
std : : vector < Optional < Key > > includedDCs ,
2018-02-14 09:01:34 +08:00
Optional < std : : vector < Optional < Key > > > otherTrackedDCs ,
2018-06-19 08:23:55 +08:00
Optional < PromiseStream < std : : pair < UID , Optional < StorageServerInterface > > > > const & serverChanges ,
2018-04-09 12:24:05 +08:00
Future < Void > readyToStart , Reference < AsyncVar < bool > > zeroHealthyTeams , bool primary ,
Reference < AsyncVar < bool > > processingUnhealthy )
2018-11-08 13:05:31 +08:00
: cx ( cx ) , masterId ( masterId ) , lock ( lock ) , output ( output ) , shardsAffectedByTeamFailure ( shardsAffectedByTeamFailure ) , doBuildTeams ( true ) , teamBuilder ( Void ( ) ) , badTeamRemover ( Void ( ) ) ,
2018-06-11 11:21:39 +08:00
configuration ( configuration ) , serverChanges ( serverChanges ) , readyToStart ( readyToStart ) , checkTeamDelay ( delay ( SERVER_KNOBS - > CHECK_TEAM_DELAY , TaskDataDistribution ) ) ,
2018-09-01 08:54:55 +08:00
initialFailureReactionDelay ( delayed ( readyToStart , SERVER_KNOBS - > INITIAL_FAILURE_REACTION_DELAY , TaskDataDistribution ) ) , healthyTeamCount ( 0 ) , storageServerSet ( new LocalityMap < UID > ( ) ) ,
2017-07-01 07:21:59 +08:00
initializationDoneActor ( logOnCompletion ( readyToStart & & initialFailureReactionDelay , this ) ) , optimalTeamCount ( 0 ) , recruitingStream ( 0 ) , restartRecruiting ( SERVER_KNOBS - > DEBOUNCE_RECRUITING_DELAY ) ,
2018-04-09 12:24:05 +08:00
unhealthyServers ( 0 ) , includedDCs ( includedDCs ) , otherTrackedDCs ( otherTrackedDCs ) , zeroHealthyTeams ( zeroHealthyTeams ) , zeroOptimalTeams ( true ) , primary ( primary ) , processingUnhealthy ( processingUnhealthy )
2017-05-26 04:48:44 +08:00
{
2018-06-18 10:31:15 +08:00
if ( ! primary | | configuration . usableRegions = = 1 ) {
2018-06-11 11:25:15 +08:00
TraceEvent ( " DDTrackerStarting " , masterId )
. detail ( " State " , " Inactive " )
2018-08-17 01:24:12 +08:00
. trackLatest ( " DDTrackerStarting " ) ;
2018-06-11 11:25:15 +08:00
}
2017-05-26 04:48:44 +08:00
}
~ DDTeamCollection ( ) {
// The following kills a reference cycle between the teamTracker actor and the TCTeamInfo that both holds and is held by the actor
// It also ensures that the trackers are done fiddling with healthyTeamCount before we free this
for ( int i = 0 ; i < teams . size ( ) ; i + + ) {
teams [ i ] - > tracker . cancel ( ) ;
}
2018-08-22 12:08:15 +08:00
for ( int i = 0 ; i < badTeams . size ( ) ; i + + ) {
badTeams [ i ] - > tracker . cancel ( ) ;
}
2017-05-26 04:48:44 +08:00
// The following makes sure that, even if a reference to a team is held in the DD Queue, the tracker will be stopped
// before the server_status map to which it has a pointer, is destroyed.
for ( auto it = server_info . begin ( ) ; it ! = server_info . end ( ) ; + + it ) {
it - > second - > tracker . cancel ( ) ;
}
teamBuilder . cancel ( ) ;
}
2018-11-10 02:07:55 +08:00
ACTOR static Future < Void > logOnCompletion ( Future < Void > signal , DDTeamCollection * self ) {
2018-08-11 04:57:10 +08:00
wait ( signal ) ;
wait ( delay ( SERVER_KNOBS - > LOG_ON_COMPLETION_DELAY , TaskDataDistribution ) ) ;
2017-05-26 04:48:44 +08:00
2018-06-18 10:31:15 +08:00
if ( ! self - > primary | | self - > configuration . usableRegions = = 1 ) {
2018-06-11 11:25:15 +08:00
TraceEvent ( " DDTrackerStarting " , self - > masterId )
. detail ( " State " , " Active " )
2018-08-17 01:24:12 +08:00
. trackLatest ( " DDTrackerStarting " ) ;
2018-06-11 11:25:15 +08:00
}
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
2018-11-08 13:05:31 +08:00
ACTOR static Future < Void > interruptableBuildTeams ( DDTeamCollection * self ) {
if ( ! self - > addSubsetComplete . isSet ( ) ) {
2018-11-11 05:04:24 +08:00
wait ( addSubsetOfEmergencyTeams ( self ) ) ;
2018-11-08 13:05:31 +08:00
self - > addSubsetComplete . send ( Void ( ) ) ;
}
loop {
choose {
2018-11-11 05:04:24 +08:00
when ( wait ( self - > buildTeams ( self ) ) ) {
2018-11-08 13:05:31 +08:00
return Void ( ) ;
}
2018-11-11 05:04:24 +08:00
when ( wait ( self - > restartTeamBuilder . onTrigger ( ) ) ) { }
2018-11-08 13:05:31 +08:00
}
}
}
2017-05-26 04:48:44 +08:00
2018-11-08 13:05:31 +08:00
ACTOR static Future < Void > checkBuildTeams ( DDTeamCollection * self ) {
2018-08-11 04:57:10 +08:00
wait ( self - > checkTeamDelay ) ;
2017-05-26 04:48:44 +08:00
while ( ! self - > teamBuilder . isReady ( ) )
2018-08-11 04:57:10 +08:00
wait ( self - > teamBuilder ) ;
2017-05-26 04:48:44 +08:00
2018-09-22 07:14:39 +08:00
if ( self - > doBuildTeams & & self - > readyToStart . isReady ( ) ) {
2017-05-26 04:48:44 +08:00
self - > doBuildTeams = false ;
2018-11-08 13:05:31 +08:00
self - > teamBuilder = self - > interruptableBuildTeams ( self ) ;
2018-11-11 05:04:24 +08:00
wait ( self - > teamBuilder ) ;
2017-05-26 04:48:44 +08:00
}
return Void ( ) ;
}
// SOMEDAY: Make bestTeam better about deciding to leave a shard where it is (e.g. in PRIORITY_TEAM_HEALTHY case)
// use keys, src, dest, metrics, priority, system load, etc.. to decide...
2018-09-01 08:40:27 +08:00
ACTOR static Future < Void > getTeam ( DDTeamCollection * self , GetTeamRequest req ) {
2017-05-26 04:48:44 +08:00
try {
2018-08-11 04:57:10 +08:00
wait ( self - > checkBuildTeams ( self ) ) ;
2017-05-26 04:48:44 +08:00
// Select the best team
// Currently the metric is minimum used disk space (adjusted for data in flight)
// Only healthy teams may be selected. The team has to be healthy at the moment we update
// shardsAffectedByTeamFailure or we could be dropping a shard on the floor (since team
// tracking is "edge triggered")
// SOMEDAY: Account for capacity, load (when shardMetrics load is high)
2018-11-22 03:18:26 +08:00
// self->teams.size() can be 0 under the following test
// -f foundationdb/tests/slow/ConfigureTest.txt -b on -s 780181629
2018-11-02 04:32:13 +08:00
// The situation happens rarely. We may want to eliminate this situation someday
if ( ! self - > teams . size ( ) ) {
req . reply . send ( Optional < Reference < IDataDistributionTeam > > ( ) ) ;
return Void ( ) ;
}
2018-01-31 09:00:51 +08:00
2017-05-26 04:48:44 +08:00
int64_t bestLoadBytes = 0 ;
Optional < Reference < IDataDistributionTeam > > bestOption ;
std : : vector < std : : pair < int , Reference < IDataDistributionTeam > > > randomTeams ;
2018-01-31 09:00:51 +08:00
std : : set < UID > sources ;
2017-05-26 04:48:44 +08:00
if ( ! req . wantsNewServers ) {
std : : vector < Reference < IDataDistributionTeam > > similarTeams ;
bool foundExact = false ;
for ( int i = 0 ; i < req . sources . size ( ) ; i + + )
sources . insert ( req . sources [ i ] ) ;
for ( int i = 0 ; i < req . sources . size ( ) ; i + + ) {
if ( ! self - > server_info . count ( req . sources [ i ] ) ) {
TEST ( true ) ; // GetSimilarTeams source server now unknown
2018-08-30 05:40:39 +08:00
TraceEvent ( SevWarn , " GetTeam " ) . detail ( " ReqSourceUnknown " , req . sources [ i ] ) ;
2017-05-26 04:48:44 +08:00
}
else {
auto & teamList = self - > server_info [ req . sources [ i ] ] - > teams ;
for ( int j = 0 ; j < teamList . size ( ) ; j + + ) {
if ( teamList [ j ] - > isHealthy ( ) & & ( ! req . preferLowerUtilization | | teamList [ j ] - > hasHealthyFreeSpace ( ) ) ) {
int sharedMembers = 0 ;
for ( int k = 0 ; k < teamList [ j ] - > serverIDs . size ( ) ; k + + )
if ( sources . count ( teamList [ j ] - > serverIDs [ k ] ) )
sharedMembers + + ;
if ( ! foundExact & & sharedMembers = = teamList [ j ] - > serverIDs . size ( ) ) {
foundExact = true ;
bestOption = Optional < Reference < IDataDistributionTeam > > ( ) ;
similarTeams . clear ( ) ;
}
if ( ( sharedMembers = = teamList [ j ] - > serverIDs . size ( ) ) | | ( ! foundExact & & req . wantsTrueBest ) ) {
int64_t loadBytes = SOME_SHARED * teamList [ j ] - > getLoadBytes ( true , req . inflightPenalty ) ;
if ( ! bestOption . present ( ) | | ( req . preferLowerUtilization & & loadBytes < bestLoadBytes ) | | ( ! req . preferLowerUtilization & & loadBytes > bestLoadBytes ) ) {
bestLoadBytes = loadBytes ;
bestOption = teamList [ j ] ;
}
}
else if ( ! req . wantsTrueBest & & ! foundExact )
similarTeams . push_back ( teamList [ j ] ) ;
}
}
}
}
if ( foundExact | | ( req . wantsTrueBest & & bestOption . present ( ) ) ) {
ASSERT ( bestOption . present ( ) ) ;
2018-08-30 05:40:39 +08:00
// Check the team size: be sure team size is correct
2018-11-22 03:18:26 +08:00
ASSERT ( bestOption . get ( ) - > size ( ) = = self - > configuration . storageTeamSize ) ;
2017-05-26 04:48:44 +08:00
req . reply . send ( bestOption ) ;
return Void ( ) ;
}
if ( ! req . wantsTrueBest ) {
while ( similarTeams . size ( ) & & randomTeams . size ( ) < SERVER_KNOBS - > BEST_TEAM_OPTION_COUNT ) {
int randomTeam = g_random - > randomInt ( 0 , similarTeams . size ( ) ) ;
randomTeams . push_back ( std : : make_pair ( SOME_SHARED , similarTeams [ randomTeam ] ) ) ;
2018-08-02 09:09:54 +08:00
swapAndPop ( & similarTeams , randomTeam ) ;
2017-05-26 04:48:44 +08:00
}
}
}
if ( req . wantsTrueBest ) {
ASSERT ( ! bestOption . present ( ) ) ;
for ( int i = 0 ; i < self - > teams . size ( ) ; i + + ) {
if ( self - > teams [ i ] - > isHealthy ( ) & & ( ! req . preferLowerUtilization | | self - > teams [ i ] - > hasHealthyFreeSpace ( ) ) ) {
int64_t loadBytes = NONE_SHARED * self - > teams [ i ] - > getLoadBytes ( true , req . inflightPenalty ) ;
if ( ! bestOption . present ( ) | | ( req . preferLowerUtilization & & loadBytes < bestLoadBytes ) | | ( ! req . preferLowerUtilization & & loadBytes > bestLoadBytes ) ) {
bestLoadBytes = loadBytes ;
bestOption = self - > teams [ i ] ;
}
}
}
}
else {
int nTries = 0 ;
while ( randomTeams . size ( ) < SERVER_KNOBS - > BEST_TEAM_OPTION_COUNT & & nTries < SERVER_KNOBS - > BEST_TEAM_MAX_TEAM_TRIES ) {
Reference < IDataDistributionTeam > dest = g_random - > randomChoice ( self - > teams ) ;
bool ok = dest - > isHealthy ( ) & & ( ! req . preferLowerUtilization | | dest - > hasHealthyFreeSpace ( ) ) ;
for ( int i = 0 ; ok & & i < randomTeams . size ( ) ; i + + )
if ( randomTeams [ i ] . second - > getServerIDs ( ) = = dest - > getServerIDs ( ) )
ok = false ;
if ( ok )
randomTeams . push_back ( std : : make_pair ( NONE_SHARED , dest ) ) ;
else
nTries + + ;
}
for ( int i = 0 ; i < randomTeams . size ( ) ; i + + ) {
int64_t loadBytes = randomTeams [ i ] . first * randomTeams [ i ] . second - > getLoadBytes ( true , req . inflightPenalty ) ;
if ( ! bestOption . present ( ) | | ( req . preferLowerUtilization & & loadBytes < bestLoadBytes ) | | ( ! req . preferLowerUtilization & & loadBytes > bestLoadBytes ) ) {
bestLoadBytes = loadBytes ;
bestOption = randomTeams [ i ] . second ;
}
}
}
2018-01-31 09:00:51 +08:00
2018-11-22 03:18:26 +08:00
// Note: req.completeSources can be empty and all servers (and server teams) can be unhealthy.
// We will get stuck at this! This only happens when a DC fails. No need to consider it right now.
2018-02-03 03:46:04 +08:00
if ( ! bestOption . present ( ) & & self - > zeroHealthyTeams - > get ( ) ) {
2018-01-31 09:00:51 +08:00
//Attempt to find the unhealthy source server team and return it
2018-02-03 03:46:04 +08:00
std : : set < UID > completeSources ;
for ( int i = 0 ; i < req . completeSources . size ( ) ; i + + ) {
completeSources . insert ( req . completeSources [ i ] ) ;
2018-01-31 09:00:51 +08:00
}
2018-02-03 03:46:04 +08:00
int bestSize = 0 ;
2018-05-02 14:08:31 +08:00
for ( int i = 0 ; i < req . completeSources . size ( ) ; i + + ) {
if ( self - > server_info . count ( req . completeSources [ i ] ) ) {
auto & teamList = self - > server_info [ req . completeSources [ i ] ] - > teams ;
2018-01-31 09:00:51 +08:00
for ( int j = 0 ; j < teamList . size ( ) ; j + + ) {
bool found = true ;
for ( int k = 0 ; k < teamList [ j ] - > serverIDs . size ( ) ; k + + ) {
2018-02-03 03:46:04 +08:00
if ( ! completeSources . count ( teamList [ j ] - > serverIDs [ k ] ) ) {
2018-01-31 09:00:51 +08:00
found = false ;
break ;
}
}
2018-02-03 03:46:04 +08:00
if ( found & & teamList [ j ] - > serverIDs . size ( ) > bestSize ) {
2018-01-31 09:00:51 +08:00
bestOption = teamList [ j ] ;
2018-02-03 03:46:04 +08:00
bestSize = teamList [ j ] - > serverIDs . size ( ) ;
2018-01-31 09:00:51 +08:00
}
}
break ;
}
}
}
2017-05-26 04:48:44 +08:00
req . reply . send ( bestOption ) ;
return Void ( ) ;
} catch ( Error & e ) {
if ( e . code ( ) ! = error_code_actor_cancelled )
req . reply . sendError ( e ) ;
throw ;
}
}
int64_t getDebugTotalDataInFlight ( ) {
int64_t total = 0 ;
for ( auto itr = server_info . begin ( ) ; itr ! = server_info . end ( ) ; + + itr )
total + = itr - > second - > dataInFlightToServer ;
return total ;
}
2018-11-10 02:07:55 +08:00
ACTOR static Future < Void > addSubsetOfEmergencyTeams ( DDTeamCollection * self ) {
2018-09-01 08:54:55 +08:00
state int idx = 0 ;
state std : : vector < Reference < TCServerInfo > > servers ;
state std : : vector < UID > serverIds ;
state Reference < LocalitySet > tempSet = Reference < LocalitySet > ( new LocalityMap < UID > ( ) ) ;
state LocalityMap < UID > * tempMap = ( LocalityMap < UID > * ) tempSet . getPtr ( ) ;
for ( ; idx < self - > badTeams . size ( ) ; idx + + ) {
servers . clear ( ) ;
for ( auto server : self - > badTeams [ idx ] - > servers ) {
2018-09-22 07:14:39 +08:00
if ( server - > inDesiredDC & & ! self - > server_status . get ( server - > id ) . isUnhealthy ( ) ) {
2018-09-01 08:54:55 +08:00
servers . push_back ( server ) ;
}
}
if ( servers . size ( ) > = self - > configuration . storageTeamSize ) {
bool foundTeam = false ;
2018-09-06 04:42:10 +08:00
for ( int j = 0 ; j < servers . size ( ) - self - > configuration . storageTeamSize + 1 & & ! foundTeam ; j + + ) {
2018-11-08 13:05:31 +08:00
auto & serverTeams = servers [ j ] - > teams ;
2018-09-01 08:54:55 +08:00
for ( int k = 0 ; k < serverTeams . size ( ) ; k + + ) {
auto & testTeam = serverTeams [ k ] - > getServerIDs ( ) ;
bool allInTeam = true ;
for ( int l = 0 ; l < testTeam . size ( ) ; l + + ) {
bool foundServer = false ;
for ( auto it : servers ) {
if ( it - > id = = testTeam [ l ] ) {
foundServer = true ;
break ;
}
}
if ( ! foundServer ) {
allInTeam = false ;
break ;
}
}
if ( allInTeam ) {
foundTeam = true ;
2017-05-26 04:48:44 +08:00
break ;
}
}
2018-09-01 08:54:55 +08:00
}
if ( ! foundTeam ) {
if ( self - > satisfiesPolicy ( servers ) ) {
if ( servers . size ( ) = = self - > configuration . storageTeamSize | | self - > satisfiesPolicy ( servers , self - > configuration . storageTeamSize ) ) {
servers . resize ( self - > configuration . storageTeamSize ) ;
2018-08-30 05:40:39 +08:00
self - > addTeam ( servers , true ) ;
2018-09-01 08:54:55 +08:00
} else {
tempSet - > clear ( ) ;
for ( auto it : servers ) {
tempMap - > add ( it - > lastKnownInterface . locality , & it - > id ) ;
}
self - > resultEntries . clear ( ) ;
self - > forcedEntries . clear ( ) ;
bool result = tempSet - > selectReplicas ( self - > configuration . storagePolicy , self - > forcedEntries , self - > resultEntries ) ;
ASSERT ( result & & self - > resultEntries . size ( ) = = self - > configuration . storageTeamSize ) ;
serverIds . clear ( ) ;
for ( auto & it : self - > resultEntries ) {
serverIds . push_back ( * tempMap - > getObject ( it ) ) ;
}
2018-08-30 05:40:39 +08:00
self - > addTeam ( serverIds . begin ( ) , serverIds . end ( ) , true ) ;
2018-09-01 08:54:55 +08:00
}
} else {
serverIds . clear ( ) ;
for ( auto it : servers ) {
serverIds . push_back ( it - > id ) ;
}
TraceEvent ( SevWarnAlways , " CannotAddSubset " , self - > masterId ) . detail ( " Servers " , describe ( serverIds ) ) ;
2018-08-22 12:08:15 +08:00
}
2017-05-26 04:48:44 +08:00
}
}
2018-09-06 07:06:33 +08:00
wait ( yield ( ) ) ;
2017-05-26 04:48:44 +08:00
}
2018-09-01 08:54:55 +08:00
return Void ( ) ;
2017-05-26 04:48:44 +08:00
}
2018-11-10 02:07:55 +08:00
ACTOR static Future < Void > init ( DDTeamCollection * self , Reference < InitialDataDistribution > initTeams ) {
2017-05-26 04:48:44 +08:00
// SOMEDAY: If some servers have teams and not others (or some servers have more data than others) and there is an address/locality collision, should
// we preferentially mark the least used server as undesirable?
2018-09-01 08:54:55 +08:00
for ( auto i = initTeams - > allServers . begin ( ) ; i ! = initTeams - > allServers . end ( ) ; + + i ) {
if ( self - > shouldHandleServer ( i - > first ) ) {
self - > addServer ( i - > first , i - > second , self - > serverTrackerErrorOut , 0 ) ;
2017-10-11 01:36:33 +08:00
}
}
2017-05-26 04:48:44 +08:00
2018-09-01 08:54:55 +08:00
state std : : set < std : : vector < UID > > : : iterator teamIter = self - > primary ? initTeams - > primaryTeams . begin ( ) : initTeams - > remoteTeams . begin ( ) ;
state std : : set < std : : vector < UID > > : : iterator teamIterEnd = self - > primary ? initTeams - > primaryTeams . end ( ) : initTeams - > remoteTeams . end ( ) ;
for ( ; teamIter ! = teamIterEnd ; + + teamIter ) {
2018-11-22 03:18:26 +08:00
self - > addTeam ( teamIter - > begin ( ) , teamIter - > end ( ) , true ) ;
2018-09-06 07:06:33 +08:00
wait ( yield ( ) ) ;
2017-05-26 04:48:44 +08:00
}
2018-09-01 08:54:55 +08:00
return Void ( ) ;
2017-05-26 04:48:44 +08:00
}
void evaluateTeamQuality ( ) {
int teamCount = teams . size ( ) , serverCount = allServers . size ( ) ;
2017-09-08 06:32:08 +08:00
double teamsPerServer = ( double ) teamCount * configuration . storageTeamSize / serverCount ;
2017-05-26 04:48:44 +08:00
ASSERT ( serverCount = = server_info . size ( ) ) ;
2018-11-01 07:46:32 +08:00
int minTeams = std : : numeric_limits < int > : : max ( ) ;
2018-08-30 05:40:39 +08:00
int maxTeams = std : : numeric_limits < int > : : min ( ) ;
2017-05-26 04:48:44 +08:00
double varTeams = 0 ;
std : : map < Optional < Standalone < StringRef > > , int > machineTeams ;
for ( auto s = server_info . begin ( ) ; s ! = server_info . end ( ) ; + + s ) {
2018-06-08 05:05:53 +08:00
if ( ! server_status . get ( s - > first ) . isUnhealthy ( ) ) {
2017-05-26 04:48:44 +08:00
int stc = s - > second - > teams . size ( ) ;
minTeams = std : : min ( minTeams , stc ) ;
maxTeams = std : : max ( maxTeams , stc ) ;
varTeams + = ( stc - teamsPerServer ) * ( stc - teamsPerServer ) ;
2018-11-22 03:18:26 +08:00
// Use zoneId as server's machine id
machineTeams [ s - > second - > lastKnownInterface . locality . zoneId ( ) ] + = stc ;
2017-05-26 04:48:44 +08:00
}
}
varTeams / = teamsPerServer * teamsPerServer ;
2018-08-30 05:40:39 +08:00
int minMachineTeams = std : : numeric_limits < int > : : max ( ) ;
int maxMachineTeams = std : : numeric_limits < int > : : min ( ) ;
2017-05-26 04:48:44 +08:00
for ( auto m = machineTeams . begin ( ) ; m ! = machineTeams . end ( ) ; + + m ) {
minMachineTeams = std : : min ( minMachineTeams , m - > second ) ;
maxMachineTeams = std : : max ( maxMachineTeams , m - > second ) ;
}
TraceEvent (
minTeams > 0 ? SevInfo : SevWarn ,
" DataDistributionTeamQuality " , masterId )
. detail ( " Servers " , serverCount )
. detail ( " Teams " , teamCount )
. detail ( " TeamsPerServer " , teamsPerServer )
. detail ( " Variance " , varTeams / serverCount )
. detail ( " ServerMinTeams " , minTeams )
. detail ( " ServerMaxTeams " , maxTeams )
. detail ( " MachineMinTeams " , minMachineTeams )
. detail ( " MachineMaxTeams " , maxMachineTeams ) ;
}
bool teamExists ( vector < UID > & team ) {
2018-11-22 03:18:26 +08:00
for ( auto & serverID : team ) {
for ( auto & usedTeam : server_info [ serverID ] - > teams ) {
if ( team = = usedTeam - > getServerIDs ( ) ) {
2018-10-16 07:25:17 +08:00
return true ;
2018-08-30 05:40:39 +08:00
}
2017-05-26 04:48:44 +08:00
}
}
2018-08-30 05:40:39 +08:00
2018-10-16 07:25:17 +08:00
return false ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
// SOMEDAY: when machineTeams is changed from vector to set, we may check the existance faster
2018-11-22 03:18:26 +08:00
bool machineTeamExists ( vector < Standalone < StringRef > > & machineIDs ) { return findMachineTeam ( machineIDs ) . isValid ( ) ; }
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
Reference < TCMachineTeamInfo > findMachineTeam ( vector < Standalone < StringRef > > & machineIDs ) {
for ( int i = 0 ; i < machineTeams . size ( ) ; + + i ) {
if ( machineTeams [ i ] - > machineIDs = = machineIDs ) {
2018-10-16 07:25:17 +08:00
return machineTeams [ i ] ;
2018-08-30 05:40:39 +08:00
}
}
2018-10-16 07:25:17 +08:00
return Reference < TCMachineTeamInfo > ( ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
int countMachineTeams ( ) { return machineTeams . size ( ) ; }
2018-08-30 05:40:39 +08:00
2018-11-01 07:46:32 +08:00
// Assume begin to end is sorted by std::sort
// Assume InputIt is iterator to UID
// Note: We must allow creating empty teams because empty team is created when a remote DB is initialized.
// The empty team is used as the starting point to move data to the remote DB
// begin : the start of the team member ID
// end : end of the team member ID
2018-11-22 03:18:26 +08:00
// isIntialTeam : False when the team is added by addTeamsBestOf();
// True otherwise, e.g., when the team added at init() when we recreate teams by looking up DB
template < class InputIt >
void addTeam ( InputIt begin , InputIt end , bool isInitialTeam ) {
vector < Reference < TCServerInfo > > newTeamServers ;
2018-08-30 05:40:39 +08:00
for ( auto i = begin ; i ! = end ; + + i ) {
if ( server_info . find ( * i ) ! = server_info . end ( ) ) {
newTeamServers . push_back ( server_info [ * i ] ) ;
}
}
2018-11-22 03:18:26 +08:00
addTeam ( newTeamServers , isInitialTeam ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
void addTeam ( const vector < Reference < TCServerInfo > > & newTeamServers , bool isInitialTeam ) {
Reference < TCTeamInfo > teamInfo ( new TCTeamInfo ( newTeamServers ) ) ;
2018-11-08 13:05:31 +08:00
bool badTeam = ! satisfiesPolicy ( teamInfo - > servers ) | | teamInfo - > servers . size ( ) ! = configuration . storageTeamSize ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
// Add the reference of machineTeam (with machineIDs) into process team
vector < Standalone < StringRef > > machineIDs ;
for ( auto server = newTeamServers . begin ( ) ; server ! = newTeamServers . end ( ) ; + + server ) {
2018-11-08 06:37:10 +08:00
machineIDs . push_back ( ( * server ) - > machine - > machineID ) ;
2018-08-30 05:40:39 +08:00
}
sort ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
Reference < TCMachineTeamInfo > machineTeamInfo = findMachineTeam ( machineIDs ) ;
// A team is not initial team if it is added by addTeamsBestOf() which always create a team with correct size
// A non-initial team must have its machine team created and its size must be correct
2018-11-22 03:18:26 +08:00
ASSERT ( isInitialTeam | |
( machineTeamInfo . isValid ( ) & & teamInfo - > serverIDs . size ( ) = = configuration . storageTeamSize ) ) ;
2018-11-02 04:32:13 +08:00
2018-08-30 05:40:39 +08:00
// Create a machine team if it does not exist
// Note an initial team may be added at init() even though the team size is not storageTeamSize
2018-11-22 03:18:26 +08:00
if ( ! machineTeamInfo . isValid ( ) & & ! machineIDs . empty ( ) ) {
2018-08-30 05:40:39 +08:00
machineTeamInfo = addMachineTeam ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
teamInfo - > machineTeam = machineTeamInfo ;
}
2018-11-22 03:18:26 +08:00
if ( ! machineTeamInfo . isValid ( ) ) {
TraceEvent ( SevWarn , " AddTeamWarning " )
. detail ( " NotFoundMachineTeam " , " OKIfTeamIsEmpty " )
. detail ( " TeamInfo " , teamInfo - > getDesc ( ) ) ;
2018-08-30 05:40:39 +08:00
}
teamInfo - > machineTeam = machineTeamInfo ;
2018-11-08 13:05:31 +08:00
teamInfo - > tracker = teamTracker ( this , teamInfo , badTeam ) ;
2018-11-22 03:18:26 +08:00
// ASSERT( teamInfo->serverIDs.size() > 0 ); //team can be empty at DB initialization
2018-11-08 13:05:31 +08:00
if ( badTeam ) {
2018-09-01 08:54:55 +08:00
badTeams . push_back ( teamInfo ) ;
} else {
teams . push_back ( teamInfo ) ;
2018-11-22 03:18:26 +08:00
for ( int i = 0 ; i < newTeamServers . size ( ) ; + + i ) {
2018-11-08 13:05:31 +08:00
newTeamServers [ i ] - > teams . push_back ( teamInfo ) ;
2018-09-01 08:54:55 +08:00
}
}
2017-05-26 04:48:44 +08:00
}
2018-11-22 03:18:26 +08:00
void addTeam ( std : : set < UID > const & team , bool isInitialTeam ) { addTeam ( team . begin ( ) , team . end ( ) , isInitialTeam ) ; }
2018-08-30 05:40:39 +08:00
2018-11-01 07:46:32 +08:00
// Add a machine team specified by input machines
2018-11-22 03:18:26 +08:00
Reference < TCMachineTeamInfo > addMachineTeam ( vector < Reference < TCMachineInfo > > machines ) {
Reference < TCMachineTeamInfo > machineTeamInfo ( new TCMachineTeamInfo ( machines ) ) ;
2018-10-16 07:25:17 +08:00
machineTeams . push_back ( machineTeamInfo ) ;
2018-08-30 05:40:39 +08:00
2018-10-16 07:25:17 +08:00
// Assign machine teams to machine
2018-11-22 03:18:26 +08:00
for ( auto machine : machines ) {
2018-10-16 07:25:17 +08:00
machine - > machineTeams . push_back ( machineTeamInfo ) ;
}
return machineTeamInfo ;
2018-08-30 05:40:39 +08:00
}
2018-11-01 07:46:32 +08:00
// Add a machine team by using the machineIDs from begin to end
2018-11-22 03:18:26 +08:00
Reference < TCMachineTeamInfo > addMachineTeam ( vector < Standalone < StringRef > > : : iterator begin ,
vector < Standalone < StringRef > > : : iterator end ) {
vector < Reference < TCMachineInfo > > machines ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
for ( auto i = begin ; i ! = end ; + + i ) {
if ( machine_info . find ( * i ) ! = machine_info . end ( ) ) {
2018-08-30 05:40:39 +08:00
machines . push_back ( machine_info [ * i ] ) ;
} else {
TraceEvent ( SevWarn , " AddMachineTeamError " ) . detail ( " MachineIDNotExist " , i - > contents ( ) . toString ( ) ) ;
2017-05-26 04:48:44 +08:00
}
}
2017-10-11 01:36:33 +08:00
2018-10-16 07:25:17 +08:00
return addMachineTeam ( machines ) ;
2017-05-26 04:48:44 +08:00
}
2018-11-01 07:46:32 +08:00
// Enumerate all possible teams by backtracing. Add a team, if it's valid, into the teamCollection
// FIXME: Remove this function since it can be replaced by addTeamsBestOf()
2018-11-22 03:18:26 +08:00
ACTOR Future < Void > addAllTeams ( DDTeamCollection * self , int location , vector < LocalityEntry > * history ,
Reference < LocalityMap < UID > > processes , vector < std : : vector < UID > > * output ,
int teamLimit , int * addedTeams ) {
2018-08-11 04:57:10 +08:00
wait ( yield ( TaskDataDistributionLaunch ) ) ;
2017-05-26 04:48:44 +08:00
// Add team, if valid
2017-09-08 06:32:08 +08:00
if ( history - > size ( ) = = self - > configuration . storageTeamSize ) {
2018-11-01 07:46:32 +08:00
auto valid = self - > configuration . storagePolicy - > validate ( * history , processes ) ; // Can be very slow!
2017-05-26 04:48:44 +08:00
if ( ! valid ) {
return Void ( ) ;
}
std : : vector < UID > team ;
for ( auto it = history - > begin ( ) ; it ! = history - > end ( ) ; it + + ) {
team . push_back ( * processes - > getObject ( * it ) ) ;
}
if ( ! self - > teamExists ( team ) & & * addedTeams < teamLimit ) {
output - > push_back ( team ) ;
( * addedTeams ) + + ;
}
return Void ( ) ;
}
//loop through remaining potential team members, add one and recursively call function
for ( ; location < processes - > size ( ) ; location + + ) {
history - > push_back ( processes - > getEntry ( location ) ) ;
state int depth = history - > size ( ) ;
2018-08-11 04:57:10 +08:00
wait ( self - > addAllTeams ( self , location + 1 , history , processes , output , teamLimit , addedTeams ) ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( history - > size ( ) = = depth ) ; // the "stack" should be unchanged by this call
history - > pop_back ( ) ;
if ( * addedTeams > teamLimit )
break ;
}
return Void ( ) ;
}
2018-11-10 02:07:55 +08:00
ACTOR Future < int > addAllTeams ( DDTeamCollection * self , vector < UID > input , vector < std : : vector < UID > > * output , int teamLimit ) {
2017-05-26 04:48:44 +08:00
state int addedTeams = 0 ;
state vector < LocalityEntry > history ;
state Reference < LocalityMap < UID > > processes ( new LocalityMap < UID > ( ) ) ;
for ( auto it = input . begin ( ) ; it ! = input . end ( ) ; it + + ) {
if ( self - > server_info [ * it ] ) {
processes - > add ( self - > server_info [ * it ] - > lastKnownInterface . locality , & * it ) ;
}
}
2018-11-22 03:18:26 +08:00
wait ( self - > addAllTeams ( self , 0 , & history , processes , output , teamLimit , & addedTeams ) ) ;
self - > configuration . storagePolicy - > traceLocalityRecords ( processes ) ; // Debug purpose only
2017-05-26 04:48:44 +08:00
return addedTeams ;
}
2018-11-22 03:18:26 +08:00
int constructMachineFor1Server ( UID const & uid ) {
ASSERT ( server_info . find ( uid ) ! = server_info . end ( ) ) ;
auto & server = server_info [ uid ] ;
auto & ssi = server - > lastKnownInterface ;
auto & locality = server - > lastKnownInterface . locality ;
Standalone < StringRef > machine_id = locality . zoneId ( ) . get ( ) ; // locality to machine_id with std::string type
2017-05-26 04:48:44 +08:00
2018-11-22 03:18:26 +08:00
if ( machine_info . find ( machine_id ) = =
machine_info . end ( ) ) { // uid is the first storage server process on the machine
// For each machine, store the first server's localityEntry into machineInfo for later use.
LocalityEntry localityEntry = machineLocalityMap . add ( locality , & uid ) ;
2018-08-30 05:40:39 +08:00
Reference < TCMachineInfo > machineInfo = Reference < TCMachineInfo > ( new TCMachineInfo ( server , localityEntry ) ) ;
server - > machine = machineInfo ;
machine_info . insert ( std : : make_pair ( machine_id , machineInfo ) ) ;
} else {
Reference < TCMachineInfo > machineInfo = machine_info . find ( machine_id ) - > second ;
machineInfo - > serversOnMachine . push_back ( server ) ;
server - > machine = machineInfo ;
}
return 0 ;
}
2018-11-01 07:46:32 +08:00
// Group storage servers (process) based on their machineId in LocalityData
// All created machines are healthy
// Return The number of healthy servers we grouped into machines
2018-08-30 05:40:39 +08:00
int constructMachinesFromServers ( ) {
int totalServerIndex = 0 ;
2017-05-26 04:48:44 +08:00
for ( auto i = server_info . begin ( ) ; i ! = server_info . end ( ) ; + + i ) {
2018-11-22 03:18:26 +08:00
if ( ! server_status . get ( i - > first ) . isUnhealthy ( ) ) {
2018-08-30 05:40:39 +08:00
constructMachineFor1Server ( i - > first ) ;
totalServerIndex + + ;
2017-05-26 04:48:44 +08:00
}
}
2018-08-30 05:40:39 +08:00
return totalServerIndex ;
}
void traceServerInfo ( ) {
int i = 0 ;
TraceEvent ( " ServerInfo " ) . detail ( " Size " , server_info . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( auto & server : server_info ) {
const UID & uid = server . first ;
TraceEvent ( " ServerInfo " )
. detail ( " ServerInfoIndex " , i + + )
. detail ( " ServerID " , server . first . toString ( ) )
. detail ( " ServerTeamOwned " , server . second - > teams . size ( ) )
. detail ( " MachineID " , server . second - > machine - > machineID . contents ( ) . toString ( ) ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
for ( auto & server : server_info ) {
const UID & uid = server . first ;
TraceEvent ( " ServerStatus " , uid )
. detail ( " Healthy " , ! server_status . get ( uid ) . isUnhealthy ( ) )
. detail ( " MachineIsValid " , server_info [ uid ] - > machine . isValid ( ) )
. detail ( " MachineTeamSize " ,
server_info [ uid ] - > machine . isValid ( ) ? server_info [ uid ] - > machine - > machineTeams . size ( ) : - 1 ) ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
}
void traceServerTeamInfo ( ) {
int i = 0 ;
TraceEvent ( " ServerTeamInfo " ) . detail ( " Size " , teams . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( auto & team : teams ) {
TraceEvent ( " ServerTeamInfo " )
. detail ( " TeamIndex " , i + + )
. detail ( " Healthy " , team - > isHealthy ( ) )
. detail ( " ServerNumber " , team - > serverIDs . size ( ) )
. detail ( " MemberIDs " , team - > getServerIDsStr ( ) ) ;
2018-08-30 05:40:39 +08:00
}
}
void traceMachineInfo ( ) {
int i = 0 ;
TraceEvent ( " MachineInfo " ) . detail ( " Size " , machine_info . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( auto & machine : machine_info ) {
TraceEvent ( " MachineInfo " )
. detail ( " MachineInfoIndex " , i + + )
. detail ( " MachineID " , machine . first . contents ( ) . toString ( ) )
. detail ( " MachineTeamOwned " , machine . second - > machineTeams . size ( ) )
. detail ( " ServerNumOnMachine " , machine . second - > serversOnMachine . size ( ) )
. detail ( " ServersID " , machine . second - > getServersIDStr ( ) ) ;
2018-08-30 05:40:39 +08:00
}
}
void traceMachineTeamInfo ( ) {
int i = 0 ;
TraceEvent ( " MachineTeamInfo " ) . detail ( " Size " , machineTeams . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( auto & team : machineTeams ) {
TraceEvent ( " MachineTeamInfo " )
. detail ( " TeamIndex " , i + + )
. detail ( " MachineIDs " , team - > getMachineIDsStr ( ) )
. detail ( " MachineTeamScore " , calculateMachineTeamScore ( team ) ) ;
2018-08-30 05:40:39 +08:00
}
}
void traceMachineLocalityMap ( ) {
int i = 0 ;
TraceEvent ( " MachineLocalityMap " ) . detail ( " Size " , machineLocalityMap . size ( ) ) ;
2018-11-22 03:18:26 +08:00
for ( auto & uid : machineLocalityMap . getObjects ( ) ) {
2018-08-30 05:40:39 +08:00
Reference < LocalityRecord > record = machineLocalityMap . getRecord ( i ) ;
2018-11-22 03:18:26 +08:00
if ( record . isValid ( ) ) {
TraceEvent ( " MachineLocalityMap " )
. detail ( " LocalityIndex " , i + + )
. detail ( " UID " , uid - > toString ( ) )
. detail ( " LocalityRecord " , record - > toString ( ) ) ;
2018-08-30 05:40:39 +08:00
} else {
2018-11-22 03:18:26 +08:00
TraceEvent ( " MachineLocalityMap " )
. detail ( " LocalityIndex " , i + + )
. detail ( " UID " , uid - > toString ( ) )
. detail ( " LocalityRecord " , " [NotFound] " ) ;
2018-08-30 05:40:39 +08:00
}
}
}
2018-11-02 04:32:13 +08:00
// To enable verbose debug info, set shouldPrint to true
void traceAllInfo ( bool shouldPrint = false ) {
2018-11-22 03:18:26 +08:00
if ( ! shouldPrint ) return ;
2018-08-30 05:40:39 +08:00
TraceEvent ( " TraceAllInfo " ) . detail ( " Primary " , primary ) . detail ( " DesiredTeamSize " , configuration . storageTeamSize ) ;
traceServerInfo ( ) ;
traceServerTeamInfo ( ) ;
traceMachineInfo ( ) ;
traceMachineTeamInfo ( ) ;
traceMachineLocalityMap ( ) ;
}
2018-11-02 04:32:13 +08:00
// We must rebuild machine locality map whenever the entry in the map is inserted or removed
2018-08-30 05:40:39 +08:00
void rebuildMachineLocalityMap ( ) {
machineLocalityMap . clear ( ) ;
int numHealthyMachine = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto machine = machine_info . begin ( ) ; machine ! = machine_info . end ( ) ; + + machine ) {
if ( machine - > second - > serversOnMachine . empty ( ) ) {
TraceEvent ( SevWarn , " RebuildMachineLocalityMapError " )
. detail ( " Machine " , machine - > second - > machineID . toString ( ) )
. detail ( " NumServersOnMachine " , 0 ) ;
2018-08-30 05:40:39 +08:00
continue ;
}
2018-11-22 03:18:26 +08:00
if ( ! isMachineHealthy ( machine - > second ) ) {
2018-08-30 05:40:39 +08:00
continue ;
}
Reference < TCServerInfo > representativeServer = machine - > second - > serversOnMachine [ 0 ] ;
2018-11-22 03:18:26 +08:00
auto & locality = representativeServer - > lastKnownInterface . locality ;
const LocalityEntry & localityEntry = machineLocalityMap . add ( locality , & representativeServer - > id ) ;
2018-08-30 05:40:39 +08:00
machine - > second - > localityEntry = localityEntry ;
+ + numHealthyMachine ;
}
}
2018-11-02 04:32:13 +08:00
// Create machineTeamsToBuild number of machine teams
// No operation if machineTeamsToBuild is 0
//
// Five steps to create each machine team, which are document in the function
// Reuse ReplicationPolicy selectReplicas func to select machine team
// return number of added machine teams
2018-08-30 05:40:39 +08:00
int addBestMachineTeams ( int targetMachineTeamsToBuild ) {
int addedMachineTeams = 0 ;
int totalServerIndex = 0 ;
int machineTeamsToBuild = 0 ;
2018-11-22 03:18:26 +08:00
ASSERT ( targetMachineTeamsToBuild > = 0 ) ;
2018-08-30 05:40:39 +08:00
// Not build any machine team if asked to build none
2018-11-22 03:18:26 +08:00
if ( targetMachineTeamsToBuild = = 0 ) return 0 ;
2018-08-30 05:40:39 +08:00
machineTeamsToBuild = targetMachineTeamsToBuild ;
2018-11-22 03:18:26 +08:00
if ( machine_info . size ( ) < configuration . storageTeamSize ) {
TraceEvent ( SevWarn , " DataDistributionBuildMachineTeams " , masterId )
. suppressFor ( 10 )
. detail ( " Reason " , " Not enough machines for a team. Machine number should be larger than Team size " )
. detail ( " MachineNumber " , machine_info . size ( ) )
. detail ( " TeamSize " , configuration . storageTeamSize ) ;
2018-08-30 05:40:39 +08:00
return addedMachineTeams ;
}
2018-11-22 03:18:26 +08:00
// Step 1: Create machineLocalityMap which will be used in building machine team
2018-08-30 05:40:39 +08:00
rebuildMachineLocalityMap ( ) ;
2017-05-26 04:48:44 +08:00
int loopCount = 0 ;
2018-11-22 03:18:26 +08:00
// Add a team in each iteration
while ( addedMachineTeams < machineTeamsToBuild ) {
// Step 2: Get least used machines from which we choose machines as a machine team
std : : vector < Reference < TCMachineInfo > > leastUsedMachines ; // A less used machine has less number of teams
2018-08-30 05:40:39 +08:00
int minTeamCount = std : : numeric_limits < int > : : max ( ) ;
2018-11-22 03:18:26 +08:00
for ( auto & machine : machine_info ) {
// Skip invalid machine whose representative server is not in server_info
if ( server_info . find ( machine . second - > serversOnMachine [ 0 ] - > id ) = = server_info . end ( ) ) continue ;
// Skip unhealthy machines
if ( ! isMachineHealthy ( machine . second ) ) continue ;
2018-08-30 05:40:39 +08:00
int teamCount = countCorrectSizedMachineTeam ( machine . second , configuration . storageTeamSize ) ;
2018-11-22 03:18:26 +08:00
if ( teamCount < minTeamCount ) {
2018-08-30 05:40:39 +08:00
leastUsedMachines . clear ( ) ;
2017-05-26 04:48:44 +08:00
minTeamCount = teamCount ;
}
2018-11-22 03:18:26 +08:00
if ( teamCount = = minTeamCount ) {
2018-08-30 05:40:39 +08:00
leastUsedMachines . push_back ( machine . second ) ;
2017-05-26 04:48:44 +08:00
}
}
std : : vector < UID * > team ;
std : : vector < LocalityEntry > forcedAttributes ;
2018-08-30 05:40:39 +08:00
// Step 3: Create a representative process for each machine.
2018-11-22 03:18:26 +08:00
// Construct forcedAttribute from leastUsedMachines.
// We will use forcedAttribute to call existing function to form a team
if ( leastUsedMachines . size ( ) ) {
2018-08-30 05:40:39 +08:00
// Randomly choose 1 least used machine
Reference < TCMachineInfo > tcMachineInfo = g_random - > randomChoice ( leastUsedMachines ) ;
2018-11-22 03:18:26 +08:00
if ( tcMachineInfo - > serversOnMachine . empty ( ) ) { // TODO: Change to assert if it never happens
TraceEvent ( SevError , " NoServersOnMachine " )
. detail ( " Primary " , primary )
. detail ( " LeastUsedMachinesNumber " , leastUsedMachines . size ( ) )
. detail ( " NumServersOnMachine " , tcMachineInfo - > serversOnMachine . size ( ) ) ;
2018-08-30 05:40:39 +08:00
continue ;
}
LocalityEntry process = tcMachineInfo - > localityEntry ;
forcedAttributes . push_back ( process ) ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
// Step 4: Reuse Policy's selectReplicas() to create team for the representative process.
2017-05-26 04:48:44 +08:00
std : : vector < UID * > bestTeam ;
2018-08-30 05:40:39 +08:00
int bestScore = std : : numeric_limits < int > : : max ( ) ;
int maxAttempts = SERVER_KNOBS - > BEST_OF_AMT ; // BEST_OF_AMT = 4
2018-11-22 03:18:26 +08:00
for ( int i = 0 ; i < maxAttempts & & i < 100 ; + + i ) {
// Choose a team that balances the # of teams per server among the teams
// that have the least-utilized server
2017-05-26 04:48:44 +08:00
team . clear ( ) ;
2018-08-30 05:40:39 +08:00
auto success = machineLocalityMap . selectReplicas ( configuration . storagePolicy , forcedAttributes , team ) ;
2018-11-22 03:18:26 +08:00
if ( ! success ) {
2017-05-26 04:48:44 +08:00
break ;
}
2018-11-22 03:18:26 +08:00
if ( forcedAttributes . size ( ) > 0 ) {
2018-08-30 05:40:39 +08:00
team . push_back ( ( UID * ) machineLocalityMap . getObject ( forcedAttributes [ 0 ] ) ) ;
}
// selectReplicas() may return server not in server_info. Retry if it happens
2018-11-22 03:18:26 +08:00
// Reproduce the situation with the following test case
// -r simulation -f foundationdb/tests/fast/BackupToDBCorrectnessClean.txt -b on -s 801184616
2018-11-02 04:32:13 +08:00
// TODO: Test it by triggering SevError. If no SevError is triggered, change it to assert
2018-08-30 05:40:39 +08:00
int valid = true ;
2018-11-22 03:18:26 +08:00
for ( auto & pUID : team ) {
if ( server_info . find ( * pUID ) = = server_info . end ( ) ) {
TraceEvent ( SevError , " SelectReplicasChoseInvalidTeam " )
. detail ( " Primary " , primary )
. detail ( " AddedMachineTeams " , addedMachineTeams )
. detail ( " Attempt " , i )
. detail ( " ServerInfoSize " , server_info . size ( ) )
. detail ( " InvalidUID " , ( * pUID ) . toString ( ) )
. detail ( " ServerTeamNum " , server_info [ * pUID ] - > teams . size ( ) )
. detail ( " DesiredTeamsPerServer " , SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER ) ;
2018-11-02 04:32:13 +08:00
traceAllInfo ( true ) ;
2018-08-30 05:40:39 +08:00
valid = false ;
break ;
}
}
2018-11-22 03:18:26 +08:00
if ( valid = = false ) {
2018-08-30 05:40:39 +08:00
maxAttempts + = 1 ;
continue ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
// MX:Q: why will this happen?
// If this happens, it means selectReplicas() did not choose a correct team in the first place!
2018-11-02 04:32:13 +08:00
// TODO: Test it by triggering SevError. If no SevError is triggered, change it to assert
2018-11-22 03:18:26 +08:00
if ( team . size ( ) ! = configuration . storageTeamSize ) {
2018-11-02 04:32:13 +08:00
TraceEvent ( SevError , " SelectReplicasChooseATeamWithIncorrectSize " ) ;
traceAllInfo ( true ) ;
2017-05-26 04:48:44 +08:00
maxAttempts + = 1 ;
}
int score = 0 ;
2018-08-30 05:40:39 +08:00
vector < Standalone < StringRef > > machineIDs ;
2018-11-22 03:18:26 +08:00
for ( auto process = team . begin ( ) ; process ! = team . end ( ) ; process + + ) {
2017-05-26 04:48:44 +08:00
score + = server_info [ * * process ] - > teams . size ( ) ;
2018-11-22 03:18:26 +08:00
Standalone < StringRef > machine_id =
server_info [ * * process ] - > lastKnownInterface . locality . zoneId ( ) . get ( ) ;
2018-08-30 05:40:39 +08:00
machineIDs . push_back ( machine_id ) ;
}
2018-11-22 03:18:26 +08:00
if ( ! isMachineIDValid ( machineIDs ) ) {
2018-08-30 05:40:39 +08:00
maxAttempts + = 1 ;
continue ;
}
2018-11-22 03:18:26 +08:00
if ( ! isMachineTeamHealthy ( machineIDs ) ) { // TODO: Change to assert if it never happens
2018-08-30 05:40:39 +08:00
TraceEvent ( SevError , " MachineTeamUnhealthy_ShouldNeverHappenHere " )
2018-11-22 03:18:26 +08:00
. detail ( " Primary " , primary )
. detail ( " MachineIDSize " , machineIDs . size ( ) ) ;
2018-08-30 05:40:39 +08:00
traceAllInfo ( true ) ;
maxAttempts + = 1 ;
continue ;
}
std : : sort ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
2018-11-22 03:18:26 +08:00
if ( machineTeamExists ( machineIDs ) ) {
2018-08-30 05:40:39 +08:00
maxAttempts + = 1 ;
continue ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
// SOMEDAY: randomly pick one from teams with the lowest score
2018-11-22 03:18:26 +08:00
if ( score < bestScore ) {
// bestTeam is the team which has the smallest number of teams its team members belong to.
bestTeam = team ;
2017-05-26 04:48:44 +08:00
bestScore = score ;
}
}
2018-11-22 03:18:26 +08:00
// Step 5: Restore machine from its representative process team and get the machine team
if ( bestTeam . size ( ) = = configuration . storageTeamSize ) {
// machineIDs is used to quickly check if the machineIDs belong to an existed team
vector < Standalone < StringRef > > machineIDs ;
// machines keep machines reference for performance benefit by avoiding looking up machine by machineID
vector < Reference < TCMachineInfo > > machines ;
for ( auto process = bestTeam . begin ( ) ; process < bestTeam . end ( ) ; process + + ) {
2018-10-16 07:25:17 +08:00
Reference < TCMachineInfo > machine = server_info [ * * process ] - > machine ;
2018-11-22 03:18:26 +08:00
Standalone < StringRef > machine_id =
server_info [ * * process ] - > lastKnownInterface . locality . zoneId ( ) . get ( ) ;
// ASSERT( machine_info.find(machine_id) != machine_info.end() );
2018-08-30 05:40:39 +08:00
machineIDs . push_back ( machine_id ) ;
2018-10-16 07:25:17 +08:00
machines . push_back ( machine ) ;
2018-08-30 05:40:39 +08:00
}
2017-05-26 04:48:44 +08:00
2018-08-30 05:40:39 +08:00
std : : sort ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
2018-11-22 03:18:26 +08:00
if ( ! machineTeamExists ( machineIDs ) ) {
2018-10-16 07:25:17 +08:00
addMachineTeam ( machines ) ;
2018-08-30 05:40:39 +08:00
addedMachineTeams + + ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
} else {
2018-11-22 03:18:26 +08:00
TraceEvent ( SevWarn , " DataDistributionBuildTeams " , masterId )
. detail ( " Primary " , primary )
. detail ( " Reason " , " Unable to make desired machine Teams " ) ;
2018-08-30 05:40:39 +08:00
break ;
}
2018-11-22 03:18:26 +08:00
if ( + + loopCount > 2 * machineTeamsToBuild * ( configuration . storageTeamSize + 1 ) ) {
2018-08-30 05:40:39 +08:00
break ;
}
}
2017-05-26 04:48:44 +08:00
2018-08-30 05:40:39 +08:00
return addedMachineTeams ;
}
2017-05-26 04:48:44 +08:00
2018-11-22 03:18:26 +08:00
bool isMachineTeamHealthy ( vector < Standalone < StringRef > > const & machineIDs ) {
2018-08-30 05:40:39 +08:00
int healthyNum = 0 ;
// A healthy machine team should have the desired number of machines
2018-11-22 03:18:26 +08:00
if ( machineIDs . size ( ) ! = configuration . storageTeamSize ) return false ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
for ( auto & id : machineIDs ) {
auto & machine = machine_info [ id ] ;
if ( isMachineHealthy ( machine ) ) {
2018-08-30 05:40:39 +08:00
healthyNum + + ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
return ( healthyNum = = machineIDs . size ( ) ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
bool isMachineTeamHealthy ( Reference < TCMachineTeamInfo > const & machineTeam ) {
2018-08-30 05:40:39 +08:00
int healthyNum = 0 ;
// A healthy machine team should have the desired number of machines
2018-11-22 03:18:26 +08:00
if ( machineTeam - > size ( ) ! = configuration . storageTeamSize ) return false ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
for ( auto & machine : machineTeam - > machines ) {
if ( isMachineHealthy ( machine ) ) {
2018-08-30 05:40:39 +08:00
healthyNum + + ;
}
}
2018-11-22 03:18:26 +08:00
return ( healthyNum = = machineTeam - > machines . size ( ) ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
bool isMachineHealthy ( Reference < TCMachineInfo > const & machine ) {
if ( ! machine . isValid ( ) | | machine_info . find ( machine - > machineID ) = = machine_info . end ( ) | |
machine_info [ machine - > machineID ] - > serversOnMachine . empty ( ) ) {
//TODO: Remove this debug trace
// Debug trace
// if ( !machine.isValid() )
// TraceEvent(SevWarn, "InvalidMachineTeam").detail("IsValid", machine.isValid());
// else
// TraceEvent(SevWarn, "InvalidMachineTeam").detail("IsValid", machine.isValid())
// .detail("InMachineInfo", machine_info.find(machine->machineID) != machine_info.end())
// .detail("ServerNumber", machine_info[machine->machineID]->serversOnMachine.size());
2018-08-30 05:40:39 +08:00
return false ;
}
// Healthy machine has at least one healthy server
2018-11-22 03:18:26 +08:00
for ( auto & server : machine - > serversOnMachine ) {
if ( ! server_status . get ( server - > id ) . isUnhealthy ( ) ) {
2018-11-01 07:46:32 +08:00
return true ;
2017-05-26 04:48:44 +08:00
}
2018-08-30 05:40:39 +08:00
}
2018-11-01 07:46:32 +08:00
return false ;
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
bool isMachineIDValid ( vector < Standalone < StringRef > > const & machineIDs ) {
for ( auto & id : machineIDs ) {
if ( machine_info . find ( id ) = = machine_info . end ( ) | | machine_info [ id ] - > serversOnMachine . size ( ) = = 0 ) {
TraceEvent ( SevError , " InvalidMachineID " )
. detail ( " MachineID " , id . contents ( ) . printable ( ) )
. detail ( " ServerNumber " , machine_info [ id ] - > serversOnMachine . size ( ) ) ;
2018-08-30 05:40:39 +08:00
return false ;
}
}
return true ;
}
2018-11-22 03:18:26 +08:00
// Return targetMachineTeam with a random machine team among those have the smallest machine-team-score
// targetMachineTeam is invalid if no such machine team is found
Reference < TCMachineTeamInfo > findOneLeastUsedMachineTeam (
std : : map < Reference < TCMachineTeamInfo > , int , CompareTCMachineTeamInfoRef > & machineTeamStats ,
std : : map < Reference < TCMachineTeamInfo > , int , CompareTCMachineTeamInfoRef > & machineTeamPenalties ) {
2018-08-30 05:40:39 +08:00
machineTeamStats . clear ( ) ;
2018-11-22 03:18:26 +08:00
for ( auto & machineTeam : machineTeams ) {
if ( ! isMachineTeamHealthy ( machineTeam ) ) {
2018-08-30 05:40:39 +08:00
TraceEvent ( SevWarn , " MachineTeamUnhealthy " ) . detail ( " MachineInfo " , machineTeam - > getMachineIDsStr ( ) ) ;
continue ;
}
int score = calculateMachineTeamScore ( machineTeam ) ;
2018-11-22 03:18:26 +08:00
ASSERT ( machineTeamPenalties . find ( machineTeam ) ! = machineTeamPenalties . end ( ) ) ;
// Penalize the team if we chose a/Tracen existing server team from the machine team
score + = machineTeamPenalties [ machineTeam ] ;
2018-08-30 05:40:39 +08:00
machineTeamStats . insert ( std : : make_pair ( machineTeam , score ) ) ;
}
std : : vector < Reference < TCMachineTeamInfo > > leastUsedMachineTeams ;
int minUsedNumber = std : : numeric_limits < int > : : max ( ) ;
2018-11-22 03:18:26 +08:00
for ( auto & machineTeamStat : machineTeamStats ) {
if ( machineTeamStat . second < minUsedNumber ) {
2018-08-30 05:40:39 +08:00
leastUsedMachineTeams . clear ( ) ;
minUsedNumber = machineTeamStat . second ;
}
2018-11-22 03:18:26 +08:00
if ( machineTeamStat . second = = minUsedNumber ) {
2018-08-30 05:40:39 +08:00
leastUsedMachineTeams . push_back ( machineTeamStat . first ) ;
}
}
2018-11-22 03:18:26 +08:00
if ( leastUsedMachineTeams . size ( ) > 0 ) {
2018-11-08 06:37:10 +08:00
return g_random - > randomChoice ( leastUsedMachineTeams ) ;
2018-08-30 05:40:39 +08:00
} else {
TraceEvent ( " LeastUsedMachineTeamsNotFound " ) . detail ( " Debug " , " CheckInfoBelow " ) ;
traceAllInfo ( true ) ;
2018-11-08 06:37:10 +08:00
return Reference < TCMachineTeamInfo > ( ) ;
2018-08-30 05:40:39 +08:00
}
}
2018-11-22 03:18:26 +08:00
// A server's team may have incorrect size. We do NOT want to count those teams because they will be deleted any way
int countCorrectSizeTeam ( Reference < TCServerInfo > & server , int expectedSize ) {
2018-08-30 05:40:39 +08:00
int count = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto & team : server - > teams ) {
if ( team - > size ( ) = = expectedSize ) {
2018-08-30 05:40:39 +08:00
+ + count ;
}
}
return count ;
}
UID findLeastUsedServerOnMachine ( Reference < TCMachineInfo > machine ) {
vector < UID > leastUsedServers ;
int minTeamNumber = std : : numeric_limits < int > : : max ( ) ;
2018-11-22 03:18:26 +08:00
for ( auto & server : machine - > serversOnMachine ) {
if ( ! server . isValid ( ) | | server_info . find ( server - > id ) = = server_info . end ( ) ) continue ;
// Only pick healthy server, which is not failed or excluded.
if ( server_status . get ( server - > id ) . isUnhealthy ( ) ) continue ;
// if ( server->teams.size() < minTeamNumber ) {
int numTeams = countCorrectSizeTeam ( server , configuration . storageTeamSize ) ; // server->teams.size();
if ( numTeams < minTeamNumber ) {
minTeamNumber = numTeams ; // server->teams.size();
2018-08-30 05:40:39 +08:00
leastUsedServers . clear ( ) ;
}
2018-11-22 03:18:26 +08:00
if ( minTeamNumber < = numTeams ) { // server->teams.size();
2018-08-30 05:40:39 +08:00
leastUsedServers . push_back ( server - > id ) ;
}
}
return g_random - > randomChoice ( leastUsedServers ) ;
}
2018-11-22 03:18:26 +08:00
// Machine team score is the total number of server teams owned by the servers on the machine team
// plus the machine score.
2018-11-01 07:46:32 +08:00
// Machine score is the max number of server teams of servers on the machine.
2018-11-22 03:18:26 +08:00
// Adding machine core to penalize the machine that includes a server that owns a large number of teams.
int calculateMachineTeamScore ( Reference < TCMachineTeamInfo > machineTeam ) {
2018-08-30 05:40:39 +08:00
int score = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto & machine : machineTeam - > machines ) {
2018-08-30 05:40:39 +08:00
int machineScore = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto & server : machine - > serversOnMachine ) {
2018-08-30 05:40:39 +08:00
score + = server - > teams . size ( ) ;
2018-11-22 03:18:26 +08:00
if ( server - > teams . size ( ) > machineScore ) machineScore = server - > teams . size ( ) ;
2018-08-30 05:40:39 +08:00
}
score + = machineScore ;
}
return score ;
}
2018-11-01 07:46:32 +08:00
// Check if machines on which server team is on belong to a machine team.
// A server team should always come from servers on a machine team
2018-11-22 03:18:26 +08:00
bool isOnSameMachineTeam ( Reference < TCTeamInfo > & team ) {
std : : vector < Standalone < StringRef > > machineIDs ;
for ( auto & server : team - > servers ) {
if ( ! server - > machine . isValid ( ) ) return false ;
2018-08-30 05:40:39 +08:00
machineIDs . push_back ( server - > machine - > machineID ) ;
}
std : : sort ( machineIDs . begin ( ) , machineIDs . end ( ) ) ;
int numExistance = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto & server : team - > servers ) {
for ( auto & candidateMachineTeam : server - > machine - > machineTeams ) {
2018-08-30 05:40:39 +08:00
std : : sort ( candidateMachineTeam - > machineIDs . begin ( ) , candidateMachineTeam - > machineIDs . end ( ) ) ;
2018-11-22 03:18:26 +08:00
if ( machineIDs = =
candidateMachineTeam - > machineIDs ) { // TODO: We may be able to return here for performance benefit
2018-08-30 05:40:39 +08:00
numExistance + + ;
break ;
}
}
}
2018-11-22 03:18:26 +08:00
return ( numExistance = = team - > servers . size ( ) ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-01 07:46:32 +08:00
// Sanity check the property of teams in unit test
// Return true if all server teams belong to machine teams
2018-08-30 05:40:39 +08:00
bool sanityCheckTeams ( ) {
int teamIndex = 0 ;
int alwaysOnSameMachineTeam = true ;
2018-11-22 03:18:26 +08:00
for ( auto & team : teams ) {
2018-08-30 05:40:39 +08:00
bool onSameMachineTeam = isOnSameMachineTeam ( team ) ;
2018-11-22 03:18:26 +08:00
if ( onSameMachineTeam = = false ) alwaysOnSameMachineTeam = false ;
2018-08-30 05:40:39 +08:00
int memberIndex = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto & server : team - > servers ) {
2018-08-30 05:40:39 +08:00
memberIndex + + ;
}
teamIndex + + ;
}
return alwaysOnSameMachineTeam ;
}
2018-11-01 07:46:32 +08:00
// A machine may have some invalid machine teams, which will eventually be removed
// We should only count valid machine teams for the machine so that the machine can be chosen into a new team
// expectedSize is the expected team size
// Return the number of machine teams that match the correct team size
2018-11-22 03:18:26 +08:00
int countCorrectSizedMachineTeam ( Reference < TCMachineInfo > & machine , const int expectedSize ) {
2018-08-30 05:40:39 +08:00
int count = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto & machineTeam : machine - > machineTeams ) {
if ( machineTeam - > size ( ) = = expectedSize ) + + count ;
2018-08-30 05:40:39 +08:00
}
return count ;
}
2018-11-01 07:46:32 +08:00
// Create server teams based on machine teams
// Before the number of machine teams reaches the threshold, build a machine team for each server team
// When it reaches the threshold, first try to build a server team with existing machine teams; if failed,
// build an extra machine team and record the event in trace
2018-11-22 03:18:26 +08:00
int addTeamsBestOf ( int teamsToBuild ) {
2018-10-16 07:25:17 +08:00
ASSERT ( teamsToBuild > 0 ) ;
2018-11-22 03:18:26 +08:00
if ( machine_info . size ( ) = = 0 & & server_info . size ( ) ! = 0 ) {
2018-08-30 05:40:39 +08:00
constructMachinesFromServers ( ) ;
2018-11-22 03:18:26 +08:00
TraceEvent ( " AddTeamsBestOfBuildMachineInfo " )
. detail ( " ServerInfoSize " , server_info . size ( ) )
. detail ( " MachineInfoSize " , machine_info . size ( ) ) ;
2018-08-30 05:40:39 +08:00
}
int addedMachineTeams = 0 ;
int addedTeams = 0 ;
int loopCount = 0 ;
int machineTeamThreshold = machine_info . size ( ) * SERVER_KNOBS - > MAX_TEAMS_PER_SERVER ;
bool ignoreMachineTeamThreshhold = false ;
2018-11-22 03:18:26 +08:00
while ( addedTeams < teamsToBuild ) {
2018-08-30 05:40:39 +08:00
int machineTeamsToBuild = 1 ;
2018-11-22 03:18:26 +08:00
// Step 1: Create 1 best machine team
if ( machineTeams . size ( ) > machineTeamThreshold ) { // TODO: only count valid machine team number
2018-08-30 05:40:39 +08:00
TEST ( true ) ;
2018-11-22 03:18:26 +08:00
if ( ! ignoreMachineTeamThreshhold ) {
2018-08-30 05:40:39 +08:00
machineTeamsToBuild = 0 ; // First try to find a server team from existing machine teams
} else {
2018-11-22 03:18:26 +08:00
machineTeamsToBuild =
1 ; // Ignore the machine team limit and build a new one, hoping to find a server team
}
// Record the exception that we build more machine teams than the threshold
TraceEvent ( " MachineTeamNumReachThreshold " )
. detail ( " IgnoreThreshold " , ignoreMachineTeamThreshhold )
. detail ( " Primary " , primary )
. detail ( " AddedMachineTeamsNumber " , addedMachineTeams )
. detail ( " AimToBuildMachineNumber " , machineTeamsToBuild )
. detail ( " MachineTeamsNumber " , machineTeams . size ( ) )
. detail ( " CurrentUniqueMachineTeamNum " , countMachineTeams ( ) )
. detail ( " StorageTeamSize " , configuration . storageTeamSize )
. detail ( " TeamsToBuild " , teamsToBuild )
. detail ( " CurrentTeamNumber " , teams . size ( ) ) ;
2018-08-30 05:40:39 +08:00
}
2018-11-01 07:46:32 +08:00
// Build 1 machine team if machineTeamsToBuild is not zero
2018-08-30 05:40:39 +08:00
addedMachineTeams = addBestMachineTeams ( machineTeamsToBuild ) ;
std : : map < Reference < TCMachineTeamInfo > , int , CompareTCMachineTeamInfoRef > machineTeamStats ;
std : : map < Reference < TCMachineTeamInfo > , int , CompareTCMachineTeamInfoRef > machineTeamPenalties ;
2018-11-22 03:18:26 +08:00
for ( auto & machineTeam : machineTeams ) {
2018-08-30 05:40:39 +08:00
machineTeamPenalties . insert ( std : : make_pair ( machineTeam , 0 ) ) ;
machineTeamStats . insert ( std : : make_pair ( machineTeam , 0 ) ) ;
}
std : : vector < UID > bestServerTeam ;
Reference < TCMachineTeamInfo > bestChosenMachineTeam ;
int bestScore = std : : numeric_limits < int > : : max ( ) ;
2018-11-22 03:18:26 +08:00
int maxAttempts = SERVER_KNOBS - > BEST_OF_AMT ; // BEST_OF_AMT = 4
for ( int i = 0 ; i < maxAttempts & & i < 100 ; + + i ) {
// Step 2: Choose 1 least used machine team
Reference < TCMachineTeamInfo > chosenMachineTeam =
findOneLeastUsedMachineTeam ( machineTeamStats , machineTeamPenalties ) ;
if ( ! chosenMachineTeam . isValid ( ) ) {
// TODO: MX: Debug: may change SevWarn to SevError to trigger error in correctness test.
// TODO: MX: Ask Evan: We may face the situation that temporarily we have no healthy machine. What
// should we do?
TraceEvent ( SevWarn , " MachineTeamNotFound " )
. detail ( " Primary " , primary )
. detail ( " MachineTeamNumber " , machineTeams . size ( ) ) ;
2018-08-30 05:40:39 +08:00
traceAllInfo ( ) ;
break ;
}
2018-11-22 03:18:26 +08:00
// Step 3: Randomly pick 1 server from each machine in the chosen machine team to form a server team
2018-08-30 05:40:39 +08:00
vector < UID > serverTeam ;
int tmpIndex = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto & machine : chosenMachineTeam - > machines ) {
UID chosenServer = findLeastUsedServerOnMachine ( machine ) ; // machine->findOneLeastUsedServer();
2018-08-30 05:40:39 +08:00
serverTeam . push_back ( chosenServer ) ;
}
2018-11-22 03:18:26 +08:00
if ( serverTeam . size ( ) ! = configuration . storageTeamSize ) {
TraceEvent ( SevWarn , " DataDistributionBuildTeams " , masterId )
. detail ( " Primary " , primary )
. detail ( " Reason " , " Unable to make desiredTeams " )
. detail ( " AddedTeams " , addedTeams )
. detail ( " TeamsToBuild " , teamsToBuild )
. detail ( " IncorrectTeamSize " , serverTeam . size ( ) ) ;
2018-08-30 05:40:39 +08:00
maxAttempts + + ;
continue ;
}
std : : sort ( serverTeam . begin ( ) , serverTeam . end ( ) ) ;
2018-11-22 03:18:26 +08:00
if ( teamExists ( serverTeam ) ) {
2018-08-30 05:40:39 +08:00
// Decrease the possibility the chosenMachineTeam will be chosen again by increasing its team score
2018-11-22 03:18:26 +08:00
// Otherwise, we may trap into the least used machine team which always generate an existing team
machineTeamPenalties [ chosenMachineTeam ] + = 1 ;
2018-08-30 05:40:39 +08:00
maxAttempts + = 1 ;
continue ;
}
2018-11-22 03:18:26 +08:00
// Pick the server team with smallest score in all attempts
2018-08-30 05:40:39 +08:00
int score = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto & server : serverTeam ) {
2018-08-30 05:40:39 +08:00
score + = server_info [ server ] - > teams . size ( ) ;
}
2018-11-22 03:18:26 +08:00
if ( score < bestScore ) {
2018-08-30 05:40:39 +08:00
bestScore = score ;
2018-11-22 03:18:26 +08:00
bestServerTeam = serverTeam ; // TODO: Improve the code efficiency
2018-08-30 05:40:39 +08:00
bestChosenMachineTeam = chosenMachineTeam ;
}
}
2018-11-22 03:18:26 +08:00
if ( bestServerTeam . size ( ) ! = configuration . storageTeamSize ) { // Not find any team
if ( ignoreMachineTeamThreshhold ) { // Has tried to build one more machine team but still failed
2018-08-30 05:40:39 +08:00
break ;
} else {
2018-11-22 03:18:26 +08:00
ignoreMachineTeamThreshhold = true ; // Try to find a server team by building more machine teams
2018-08-30 05:40:39 +08:00
continue ; // while( addedTeams < teamsToBuild )
}
}
2018-11-22 03:18:26 +08:00
// Step 4: Add the server team
2018-08-30 05:40:39 +08:00
std : : sort ( bestServerTeam . begin ( ) , bestServerTeam . end ( ) ) ;
2018-11-22 03:18:26 +08:00
if ( ! teamExists ( bestServerTeam ) ) {
2018-08-30 05:40:39 +08:00
addTeam ( bestServerTeam . begin ( ) , bestServerTeam . end ( ) , false ) ;
addedTeams + + ;
}
2018-11-22 03:18:26 +08:00
if ( + + loopCount > 2 * teamsToBuild * ( configuration . storageTeamSize + 1 ) ) {
2017-05-26 04:48:44 +08:00
break ;
}
2018-08-30 05:40:39 +08:00
}
2018-11-22 03:18:26 +08:00
TraceEvent ( " AddTeamsBestOf " )
. detail ( " Primary " , primary )
. detail ( " AddedTeamNumber " , addedTeams )
. detail ( " AimToBuildTeamNumber " , teamsToBuild )
. detail ( " CurrentTeamNumber " , teams . size ( ) )
. detail ( " StorageTeamSize " , configuration . storageTeamSize )
. detail ( " MachineTeamNum " , machineTeams . size ( ) ) ;
2018-08-30 05:40:39 +08:00
2017-05-26 04:48:44 +08:00
return addedTeams ;
}
// Use the current set of known processes (from server_info) to compute an optimized set of storage server teams.
// The following are guarantees of the process:
// - Each newly-built team will meet the replication policy
// - All newly-built teams will have exactly teamSize machines
//
// buildTeams() only ever adds teams to the list of teams. Teams are only removed from the list when all data has been removed.
//
// buildTeams will not count teams larger than teamSize against the desired teams.
2018-11-08 13:05:31 +08:00
ACTOR static Future < Void > buildTeams ( DDTeamCollection * self ) {
2017-05-26 04:48:44 +08:00
state int desiredTeams ;
int serverCount = 0 ;
int uniqueDataCenters = 0 ;
int uniqueMachines = 0 ;
std : : set < Optional < Standalone < StringRef > > > machines ;
2018-11-22 03:18:26 +08:00
for ( auto i = self - > server_info . begin ( ) ; i ! = self - > server_info . end ( ) ; + + i ) {
2018-06-08 05:05:53 +08:00
if ( ! self - > server_status . get ( i - > first ) . isUnhealthy ( ) ) {
2017-05-26 04:48:44 +08:00
+ + serverCount ;
LocalityData & serverLocation = i - > second - > lastKnownInterface . locality ;
machines . insert ( serverLocation . zoneId ( ) ) ;
}
}
uniqueMachines = machines . size ( ) ;
2018-11-22 03:18:26 +08:00
TraceEvent ( " BuildTeams " )
. detail ( " ServerNumber " , self - > server_info . size ( ) )
. detail ( " UniqueMachines " , uniqueMachines )
. detail ( " StorageTeamSize " , self - > configuration . storageTeamSize ) ;
if ( self - > teams . size ( ) = = 0 ) {
2018-08-30 05:40:39 +08:00
TraceEvent ( " BuildTeams " ) . detail ( " ZeroTeams " , " Check the server and machine info below " ) ;
self - > traceAllInfo ( true ) ;
}
2017-05-26 04:48:44 +08:00
// If there are too few machines to even build teams or there are too few represented datacenters, build no new teams
2017-09-08 06:32:08 +08:00
if ( uniqueMachines > = self - > configuration . storageTeamSize ) {
2017-05-26 04:48:44 +08:00
desiredTeams = SERVER_KNOBS - > DESIRED_TEAMS_PER_SERVER * serverCount ;
2018-07-08 07:51:16 +08:00
int maxTeams = SERVER_KNOBS - > MAX_TEAMS_PER_SERVER * serverCount ;
2017-05-26 04:48:44 +08:00
// Count only properly sized teams against the desired number of teams. This is to prevent "emergency" merged teams (see MoveKeys)
// from overwhelming the team count (since we really did not want that team in the first place). These larger teams will not be
// returned from getRandomTeam() (as used by bestTeam to find a new home for a shard).
// Also exclude teams who have members in the wrong configuration, since we don't want these teams either
int teamCount = 0 ;
2018-07-08 07:51:16 +08:00
int totalTeamCount = 0 ;
2018-11-22 03:18:26 +08:00
for ( int i = 0 ; i < self - > teams . size ( ) ; + + i ) {
2017-09-08 06:32:08 +08:00
if ( self - > teams [ i ] - > getServerIDs ( ) . size ( ) = = self - > configuration . storageTeamSize & & ! self - > teams [ i ] - > isWrongConfiguration ( ) ) {
2018-07-08 07:51:16 +08:00
if ( self - > teams [ i ] - > isHealthy ( ) ) {
teamCount + + ;
}
totalTeamCount + + ;
2017-05-26 04:48:44 +08:00
}
}
2018-08-22 12:08:15 +08:00
TraceEvent ( " BuildTeamsBegin " , self - > masterId ) . detail ( " DesiredTeams " , desiredTeams ) . detail ( " MaxTeams " , maxTeams ) . detail ( " BadTeams " , self - > badTeams . size ( ) )
2018-07-08 07:51:16 +08:00
. detail ( " UniqueMachines " , uniqueMachines ) . detail ( " TeamSize " , self - > configuration . storageTeamSize ) . detail ( " Servers " , serverCount )
. detail ( " CurrentTrackedTeams " , self - > teams . size ( ) ) . detail ( " HealthyTeamCount " , teamCount ) . detail ( " TotalTeamCount " , totalTeamCount ) ;
2018-11-22 03:18:26 +08:00
// Situation when unhealthy teams are a LOT.
teamCount = std : : max ( teamCount , desiredTeams + totalTeamCount - maxTeams ) ;
2017-05-26 04:48:44 +08:00
if ( desiredTeams > teamCount ) {
std : : set < UID > desiredServerSet ;
2018-11-22 03:18:26 +08:00
for ( auto i = self - > server_info . begin ( ) ; i ! = self - > server_info . end ( ) ; + + i ) {
if ( ! self - > server_status . get ( i - > first ) . isUnhealthy ( ) ) {
2017-05-26 04:48:44 +08:00
desiredServerSet . insert ( i - > second - > id ) ;
2018-11-22 03:18:26 +08:00
}
}
2017-05-26 04:48:44 +08:00
vector < UID > desiredServerVector ( desiredServerSet . begin ( ) , desiredServerSet . end ( ) ) ;
state int teamsToBuild = desiredTeams - teamCount ;
state vector < std : : vector < UID > > builtTeams ;
2018-11-01 07:46:32 +08:00
int addedTeams = self - > addTeamsBestOf ( teamsToBuild ) ;
2018-11-22 03:18:26 +08:00
if ( addedTeams < = 0 & & self - > teams . size ( ) = = 0 ) {
TraceEvent ( SevWarn , " NoTeamAfterBuildTeam " )
. detail ( " TeamNum " , self - > teams . size ( ) )
. detail ( " Debug " , " Check information below " ) ;
2018-11-01 07:46:32 +08:00
self - > traceAllInfo ( true ) ;
2017-05-26 04:48:44 +08:00
}
}
}
self - > evaluateTeamQuality ( ) ;
//Building teams can cause servers to become undesired, which can make teams unhealthy.
//Let all of these changes get worked out before responding to the get team request
2018-08-11 04:57:10 +08:00
wait ( delay ( 0 , TaskDataDistributionLaunch ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
void noHealthyTeams ( ) {
std : : set < UID > desiredServerSet ;
std : : string desc ;
2018-11-22 03:18:26 +08:00
for ( auto i = server_info . begin ( ) ; i ! = server_info . end ( ) ; + + i ) {
2017-05-26 04:48:44 +08:00
ASSERT ( i - > first = = i - > second - > id ) ;
if ( ! server_status . get ( i - > first ) . isFailed ) {
desiredServerSet . insert ( i - > first ) ;
desc + = i - > first . shortString ( ) + " ( " + i - > second - > lastKnownInterface . toString ( ) + " ), " ;
}
}
vector < UID > desiredServerVector ( desiredServerSet . begin ( ) , desiredServerSet . end ( ) ) ;
TraceEvent ( SevWarn , " NoHealthyTeams " , masterId )
. detail ( " CurrentTeamCount " , teams . size ( ) )
. detail ( " ServerCount " , server_info . size ( ) )
. detail ( " NonFailedServerCount " , desiredServerVector . size ( ) ) ;
}
2018-08-30 05:40:39 +08:00
void countHealthyTeams ( ) {
int healthy = 0 ;
2018-11-22 03:18:26 +08:00
for ( auto it = teams . begin ( ) ; it ! = teams . end ( ) ; it + + ) {
if ( ( * it ) - > isHealthy ( ) ) {
2018-08-30 05:40:39 +08:00
healthy + + ;
}
}
TraceEvent ( healthy = = healthyTeamCount ? SevInfo : SevWarnAlways , " HealthyTeamCheck " , masterId )
2018-11-22 03:18:26 +08:00
. detail ( " ValidatedCount " , healthy )
. detail ( " ProvidedCount " , healthyTeamCount )
. detail ( " Primary " , primary ) ;
2018-08-30 05:40:39 +08:00
}
2017-10-11 01:36:33 +08:00
bool shouldHandleServer ( const StorageServerInterface & newServer ) {
2018-11-22 03:18:26 +08:00
return ( includedDCs . empty ( ) | |
std : : find ( includedDCs . begin ( ) , includedDCs . end ( ) , newServer . locality . dcId ( ) ) ! = includedDCs . end ( ) | |
( otherTrackedDCs . present ( ) & &
std : : find ( otherTrackedDCs . get ( ) . begin ( ) , otherTrackedDCs . get ( ) . end ( ) , newServer . locality . dcId ( ) ) = = otherTrackedDCs . get ( ) . end ( ) ) ) ;
2017-10-11 01:36:33 +08:00
}
2018-05-06 09:16:28 +08:00
void addServer ( StorageServerInterface newServer , ProcessClass processClass , Promise < Void > errorOut , Version addedVersion ) {
2017-10-11 01:36:33 +08:00
if ( ! shouldHandleServer ( newServer ) ) {
return ;
}
2017-05-26 04:48:44 +08:00
allServers . push_back ( newServer . id ( ) ) ;
2018-06-09 02:11:08 +08:00
TraceEvent ( " AddedStorageServer " , masterId ) . detail ( " ServerID " , newServer . id ( ) ) . detail ( " ProcessClass " , processClass . toString ( ) ) . detail ( " WaitFailureToken " , newServer . waitFailure . getEndpoint ( ) . token ) . detail ( " Address " , newServer . waitFailure . getEndpoint ( ) . address ) ;
2018-09-01 08:54:55 +08:00
auto & r = server_info [ newServer . id ( ) ] = Reference < TCServerInfo > ( new TCServerInfo ( newServer , processClass , includedDCs . empty ( ) | | std : : find ( includedDCs . begin ( ) , includedDCs . end ( ) , newServer . locality . dcId ( ) ) ! = includedDCs . end ( ) , storageServerSet ) ) ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
// Establish the relation between server and machine
2018-08-30 05:40:39 +08:00
constructMachineFor1Server ( newServer . id ( ) ) ;
2018-06-08 07:14:40 +08:00
r - > tracker = storageServerTracker ( this , cx , r . getPtr ( ) , & server_status , lock , masterId , & server_info , serverChanges , errorOut , addedVersion ) ;
2018-11-22 03:18:26 +08:00
doBuildTeams = true ; // Adding a new server triggers to build new teams
2017-05-26 04:48:44 +08:00
restartTeamBuilder . trigger ( ) ;
}
2018-11-08 13:05:31 +08:00
bool removeTeam ( Reference < TCTeamInfo > team ) {
TraceEvent ( " RemovedTeam " , masterId ) . detail ( " Team " , team - > getDesc ( ) ) ;
bool found = false ;
for ( int t = 0 ; t < teams . size ( ) ; t + + ) {
if ( teams [ t ] = = team ) {
teams [ t - - ] = teams . back ( ) ;
teams . pop_back ( ) ;
found = true ;
break ;
}
}
for ( auto & server : team - > servers ) {
for ( int t = 0 ; t < server - > teams . size ( ) ; t + + ) {
if ( server - > teams [ t ] = = team ) {
ASSERT ( found ) ;
server - > teams [ t - - ] = server - > teams . back ( ) ;
server - > teams . pop_back ( ) ;
break ;
}
}
}
team - > tracker . cancel ( ) ;
return found ;
}
2017-05-26 04:48:44 +08:00
void removeServer ( UID removedServer ) {
TraceEvent ( " RemovedStorageServer " , masterId ) . detail ( " ServerID " , removedServer ) ;
// ASSERT( !shardsAffectedByTeamFailure->getServersForTeam( t ) for all t in teams that contain removedServer )
2018-11-22 03:18:26 +08:00
Reference < TCServerInfo > removedServerInfo = server_info [ removedServer ] ;
2017-05-26 04:48:44 +08:00
2018-08-30 05:40:39 +08:00
// Step: Remove server team that relate to removedServer
2017-05-26 04:48:44 +08:00
// Find all servers with which the removedServer shares teams
std : : set < UID > serversWithAjoiningTeams ;
2018-11-22 03:18:26 +08:00
auto & sharedTeams = removedServerInfo - > teams ;
for ( int i = 0 ; i < sharedTeams . size ( ) ; + + i ) {
auto & teamIds = sharedTeams [ i ] - > getServerIDs ( ) ;
2017-05-26 04:48:44 +08:00
serversWithAjoiningTeams . insert ( teamIds . begin ( ) , teamIds . end ( ) ) ;
}
serversWithAjoiningTeams . erase ( removedServer ) ;
// For each server in a team with the removedServer, erase shared teams from the list of teams in that other server
for ( auto it = serversWithAjoiningTeams . begin ( ) ; it ! = serversWithAjoiningTeams . end ( ) ; + + it ) {
2018-11-22 03:18:26 +08:00
auto & serverTeams = server_info [ * it ] - > teams ;
for ( int t = 0 ; t < serverTeams . size ( ) ; t + + ) {
auto & serverIds = serverTeams [ t ] - > getServerIDs ( ) ;
2017-05-26 04:48:44 +08:00
if ( std : : count ( serverIds . begin ( ) , serverIds . end ( ) , removedServer ) ) {
2018-08-30 05:40:39 +08:00
serverTeams [ t - - ] = serverTeams . back ( ) ;
serverTeams . pop_back ( ) ;
2017-05-26 04:48:44 +08:00
}
}
}
2018-08-30 05:40:39 +08:00
// Step: Remove machine info related to removedServer
// Remove the server from its machine
Reference < TCMachineInfo > removedMachineInfo = removedServerInfo - > machine ;
2018-11-22 03:18:26 +08:00
for ( int i = 0 ; i < removedMachineInfo - > serversOnMachine . size ( ) ; + + i ) {
if ( removedMachineInfo - > serversOnMachine [ i ] = = removedServerInfo ) {
// Safe even when removedServerInfo is the last one
2018-08-30 05:40:39 +08:00
removedMachineInfo - > serversOnMachine [ i - - ] = removedMachineInfo - > serversOnMachine . back ( ) ;
removedMachineInfo - > serversOnMachine . pop_back ( ) ;
break ;
}
}
// Remove machine if no server on it
2018-11-22 03:18:26 +08:00
if ( removedMachineInfo - > serversOnMachine . size ( ) = = 0 ) {
2018-08-30 05:40:39 +08:00
// Find machines that share teams with the removed machine
2018-11-22 03:18:26 +08:00
std : : set < Standalone < StringRef > > machinesWithAjoiningTeams ;
for ( auto & machineTeam : removedMachineInfo - > machineTeams ) {
machinesWithAjoiningTeams . insert ( machineTeam - > machineIDs . begin ( ) , machineTeam - > machineIDs . end ( ) ) ;
2018-08-30 05:40:39 +08:00
}
machinesWithAjoiningTeams . erase ( removedMachineInfo - > machineID ) ;
2018-11-22 03:18:26 +08:00
// For each machine in a machine team with the removed machine,
// erase shared machine teams from the list of teams.
for ( auto it = machinesWithAjoiningTeams . begin ( ) ; it ! = machinesWithAjoiningTeams . end ( ) ; + + it ) {
auto & machineTeams = machine_info [ * it ] - > machineTeams ;
for ( int t = 0 ; t < machineTeams . size ( ) ; t + + ) {
auto & machineTeam = machineTeams [ t ] ;
if ( std : : count ( machineTeam - > machineIDs . begin ( ) , machineTeam - > machineIDs . end ( ) ,
removedMachineInfo - > machineID ) ) {
2018-08-30 05:40:39 +08:00
machineTeams [ t - - ] = machineTeams . back ( ) ;
machineTeams . pop_back ( ) ;
}
}
}
// Remove global machine team that includes removedMachineInfo
2018-11-22 03:18:26 +08:00
for ( int t = 0 ; t < machineTeams . size ( ) ; t + + ) {
auto & machineTeam = machineTeams [ t ] ;
if ( std : : count ( machineTeam - > machineIDs . begin ( ) , machineTeam - > machineIDs . end ( ) ,
removedMachineInfo - > machineID ) ) {
2018-08-30 05:40:39 +08:00
machineTeams [ t - - ] = machineTeams . back ( ) ;
machineTeams . pop_back ( ) ;
}
}
// Remove removedMachineInfo from machine's global info
machine_info . erase ( removedMachineInfo - > machineID ) ;
2018-11-22 03:18:26 +08:00
TraceEvent ( " MachineLocalityMapUpdate " )
. detail ( " MachineUIDRemoved " , removedMachineInfo - > machineID . toString ( ) )
. detail ( " ServerID " , removedServerInfo - > id . toString ( ) ) ;
2018-08-30 05:40:39 +08:00
} else { // Update machine's locality if machine uses removed server's locality and machine still has servers
2018-11-22 03:18:26 +08:00
auto & representativeServer = removedMachineInfo - > serversOnMachine [ 0 ] ;
auto & locality = representativeServer - > lastKnownInterface . locality ;
ASSERT ( server_info . find ( representativeServer - > id ) ! = server_info . end ( ) ) ;
2018-08-30 05:40:39 +08:00
LocalityEntry localityEntry = machineLocalityMap . add ( locality , & representativeServer - > id ) ;
removedMachineInfo - > localityEntry = localityEntry ;
2018-11-22 03:18:26 +08:00
TraceEvent ( " MachineLocalityMapUpdate " )
. detail ( " ServerIDOld " , removedServerInfo - > id . toString ( ) )
. detail ( " ServerIDNew " , representativeServer - > id . toString ( ) )
. detail ( " IsNewServerIDValid " , server_info . find ( representativeServer - > id ) ! = server_info . end ( ) ) ;
2018-08-30 05:40:39 +08:00
}
// Update macineLocalityMap by rebuilding the map
2018-11-22 03:18:26 +08:00
// FIXME: add remove support to localitySet so we do not have to recreate it
2018-08-30 05:40:39 +08:00
rebuildMachineLocalityMap ( ) ;
// Step: Remove removedServer from server's global data
2018-11-22 03:18:26 +08:00
for ( int s = 0 ; s < allServers . size ( ) ; s + + ) {
2017-05-26 04:48:44 +08:00
if ( allServers [ s ] = = removedServer ) {
allServers [ s - - ] = allServers . back ( ) ;
allServers . pop_back ( ) ;
}
}
server_info . erase ( removedServer ) ;
2018-09-22 07:14:39 +08:00
if ( server_status . get ( removedServer ) . initialized & & server_status . get ( removedServer ) . isUnhealthy ( ) ) {
2018-09-01 08:40:27 +08:00
unhealthyServers - - ;
}
server_status . clear ( removedServer ) ;
2018-09-01 08:54:55 +08:00
//FIXME: add remove support to localitySet so we do not have to recreate it
resetLocalitySet ( ) ;
2017-05-26 04:48:44 +08:00
// remove all teams that contain removedServer
// SOMEDAY: can we avoid walking through all teams, since we have an index of teams in which removedServer participated
2018-08-30 05:40:39 +08:00
int removedCount = 0 ;
2018-11-22 03:18:26 +08:00
for ( int t = 0 ; t < teams . size ( ) ; t + + ) {
2017-05-26 04:48:44 +08:00
if ( std : : count ( teams [ t ] - > getServerIDs ( ) . begin ( ) , teams [ t ] - > getServerIDs ( ) . end ( ) , removedServer ) ) {
2018-11-22 03:18:26 +08:00
TraceEvent ( " TeamRemoved " )
. detail ( " Primary " , primary )
. detail ( " TeamServerIDs " , teams [ t ] - > getServerIDsStr ( ) ) ;
2017-05-26 04:48:44 +08:00
teams [ t ] - > tracker . cancel ( ) ;
teams [ t - - ] = teams . back ( ) ;
teams . pop_back ( ) ;
2018-08-30 05:40:39 +08:00
removedCount + + ;
2017-05-26 04:48:44 +08:00
}
}
2018-08-22 12:08:15 +08:00
2018-11-22 03:18:26 +08:00
if ( removedCount = = 0 ) {
TraceEvent ( SevInfo , " NoneTeamRemovedWhenServerRemoved " )
. detail ( " Primary " , primary )
. detail ( " Debug " , " ThisShouldRarelyHappen_CheckInfoBelow " ) ;
2018-08-30 05:40:39 +08:00
traceAllInfo ( ) ;
}
2017-05-26 04:48:44 +08:00
doBuildTeams = true ;
restartTeamBuilder . trigger ( ) ;
TraceEvent ( " DataDistributionTeamCollectionUpdate " , masterId )
2018-11-22 03:18:26 +08:00
. detail ( " Teams " , teams . size ( ) )
. detail ( " BadTeams " , badTeams . size ( ) )
. detail ( " Servers " , allServers . size ( ) ) ;
2017-05-26 04:48:44 +08:00
}
} ;
// Track a team and issue RelocateShards when the level of degradation changes
2018-11-10 02:07:55 +08:00
ACTOR Future < Void > teamTracker ( DDTeamCollection * self , Reference < TCTeamInfo > team , bool badTeam ) {
2017-05-26 04:48:44 +08:00
state int lastServersLeft = team - > getServerIDs ( ) . size ( ) ;
state bool lastAnyUndesired = false ;
2018-11-08 13:05:31 +08:00
state bool logTeamEvents = g_network - > isSimulated ( ) | | ! badTeam ;
2018-11-03 04:00:15 +08:00
state bool lastReady = false ;
2018-09-22 07:14:39 +08:00
state bool lastHealthy ;
state bool lastOptimal ;
2017-05-26 04:48:44 +08:00
state bool lastWrongConfiguration = team - > isWrongConfiguration ( ) ;
2018-08-30 05:40:39 +08:00
2018-02-03 03:46:04 +08:00
state bool lastZeroHealthy = self - > zeroHealthyTeams - > get ( ) ;
2018-09-13 09:29:49 +08:00
state bool firstCheck = true ;
2018-01-31 09:00:51 +08:00
2018-11-08 13:05:31 +08:00
if ( logTeamEvents ) {
2018-10-18 02:45:47 +08:00
TraceEvent ( " TeamTrackerStarting " , self - > masterId ) . detail ( " Reason " , " Initial wait complete (sc) " ) . detail ( " Team " , team - > getDesc ( ) ) ;
}
2018-08-10 04:16:09 +08:00
self - > priority_teams [ team - > getPriority ( ) ] + + ;
2017-05-26 04:48:44 +08:00
try {
loop {
2018-11-22 03:18:26 +08:00
TraceEvent ( " TeamHealthChangeDetected " , self - > masterId )
. detail ( " Primary " , self - > primary )
. detail ( " IsReady " , self - > initialFailureReactionDelay . isReady ( ) ) ;
2017-05-26 04:48:44 +08:00
// Check if the number of degraded machines has changed
state vector < Future < Void > > change ;
auto servers = team - > getServerIDs ( ) ;
bool anyUndesired = false ;
bool anyWrongConfiguration = false ;
2018-09-01 08:54:55 +08:00
int serversLeft = 0 ;
2017-05-26 04:48:44 +08:00
for ( auto s = servers . begin ( ) ; s ! = servers . end ( ) ; + + s ) {
change . push_back ( self - > server_status . onChange ( * s ) ) ;
auto & status = self - > server_status . get ( * s ) ;
2018-09-01 08:54:55 +08:00
if ( ! status . isFailed ) {
serversLeft + + ;
}
if ( status . isUndesired ) {
2017-05-26 04:48:44 +08:00
anyUndesired = true ;
2018-09-01 08:54:55 +08:00
}
if ( status . isWrongConfiguration ) {
2017-05-26 04:48:44 +08:00
anyWrongConfiguration = true ;
2018-09-01 08:54:55 +08:00
}
2017-05-26 04:48:44 +08:00
}
2018-04-09 12:24:05 +08:00
if ( ! self - > initialFailureReactionDelay . isReady ( ) ) {
2017-05-26 04:48:44 +08:00
change . push_back ( self - > initialFailureReactionDelay ) ;
2018-04-09 12:24:05 +08:00
}
2018-02-03 03:46:04 +08:00
change . push_back ( self - > zeroHealthyTeams - > onChange ( ) ) ;
2017-05-26 04:48:44 +08:00
2018-11-08 13:05:31 +08:00
bool healthy = ! badTeam & & ! anyUndesired & & serversLeft = = self - > configuration . storageTeamSize ;
2018-09-22 07:14:39 +08:00
team - > setHealthy ( healthy ) ; // Unhealthy teams won't be chosen by bestTeam
2018-09-13 09:29:49 +08:00
bool optimal = team - > isOptimal ( ) & & healthy ;
2018-09-01 08:54:55 +08:00
bool recheck = ! healthy & & ( lastReady ! = self - > initialFailureReactionDelay . isReady ( ) | | ( lastZeroHealthy & & ! self - > zeroHealthyTeams - > get ( ) ) ) ;
2018-08-30 05:40:39 +08:00
2017-05-26 04:48:44 +08:00
lastReady = self - > initialFailureReactionDelay . isReady ( ) ;
2018-02-03 03:46:04 +08:00
lastZeroHealthy = self - > zeroHealthyTeams - > get ( ) ;
2017-05-26 04:48:44 +08:00
2018-09-13 09:29:49 +08:00
if ( firstCheck ) {
2018-09-22 07:14:39 +08:00
firstCheck = false ;
2018-09-13 09:29:49 +08:00
if ( healthy ) {
self - > healthyTeamCount + + ;
self - > zeroHealthyTeams - > set ( false ) ;
}
2018-09-22 07:14:39 +08:00
lastHealthy = healthy ;
2018-09-13 09:29:49 +08:00
if ( optimal ) {
self - > optimalTeamCount + + ;
self - > zeroOptimalTeams . set ( false ) ;
}
2018-09-22 07:14:39 +08:00
lastOptimal = optimal ;
2018-09-13 09:29:49 +08:00
}
2018-11-22 03:18:26 +08:00
if ( serversLeft ! = lastServersLeft | | anyUndesired ! = lastAnyUndesired | |
anyWrongConfiguration ! = lastWrongConfiguration | | recheck ) { // NOTE: do not check wrongSize
2018-11-08 13:05:31 +08:00
if ( logTeamEvents ) {
2018-10-18 02:45:47 +08:00
TraceEvent ( " TeamHealthChanged " , self - > masterId )
. detail ( " Team " , team - > getDesc ( ) ) . detail ( " ServersLeft " , serversLeft )
. detail ( " LastServersLeft " , lastServersLeft ) . detail ( " ContainsUndesiredServer " , anyUndesired )
. detail ( " HealthyTeamsCount " , self - > healthyTeamCount ) . detail ( " IsWrongConfiguration " , anyWrongConfiguration ) ;
}
2017-05-26 04:48:44 +08:00
team - > setWrongConfiguration ( anyWrongConfiguration ) ;
2018-09-22 07:14:39 +08:00
if ( optimal ! = lastOptimal ) {
lastOptimal = optimal ;
2018-01-31 09:00:51 +08:00
self - > optimalTeamCount + = optimal ? 1 : - 1 ;
ASSERT ( self - > optimalTeamCount > = 0 ) ;
self - > zeroOptimalTeams . set ( self - > optimalTeamCount = = 0 ) ;
2017-05-26 04:48:44 +08:00
}
2018-09-22 07:14:39 +08:00
if ( lastHealthy ! = healthy ) {
lastHealthy = healthy ;
2018-11-22 03:18:26 +08:00
// Update healthy team count when the team healthy changes
self - > healthyTeamCount + = healthy ? 1 : - 1 ;
2017-05-26 04:48:44 +08:00
ASSERT ( self - > healthyTeamCount > = 0 ) ;
2018-02-03 03:46:04 +08:00
self - > zeroHealthyTeams - > set ( self - > healthyTeamCount = = 0 ) ;
2017-05-26 04:48:44 +08:00
if ( self - > healthyTeamCount = = 0 ) {
2018-11-22 03:18:26 +08:00
TraceEvent ( SevWarn , " ZeroTeamsHealthySignalling " , self - > masterId )
. detail ( " SignallingTeam " , team - > getDesc ( ) )
. detail ( " Primary " , self - > primary ) ;
2017-05-26 04:48:44 +08:00
}
TraceEvent ( " TeamHealthDifference " , self - > masterId )
. detail ( " LastOptimal " , lastOptimal )
. detail ( " LastHealthy " , lastHealthy )
. detail ( " Optimal " , optimal )
. detail ( " OptimalTeamCount " , self - > optimalTeamCount ) ;
}
lastServersLeft = serversLeft ;
lastAnyUndesired = anyUndesired ;
lastWrongConfiguration = anyWrongConfiguration ;
state int lastPriority = team - > getPriority ( ) ;
2017-09-08 06:32:08 +08:00
if ( serversLeft < self - > configuration . storageTeamSize ) {
2017-05-26 04:48:44 +08:00
if ( serversLeft = = 0 )
team - > setPriority ( PRIORITY_TEAM_0_LEFT ) ;
else if ( serversLeft = = 1 )
team - > setPriority ( PRIORITY_TEAM_1_LEFT ) ;
else if ( serversLeft = = 2 )
team - > setPriority ( PRIORITY_TEAM_2_LEFT ) ;
else
team - > setPriority ( PRIORITY_TEAM_UNHEALTHY ) ;
}
2018-11-08 13:05:31 +08:00
else if ( badTeam | | anyWrongConfiguration )
2017-05-26 04:48:44 +08:00
team - > setPriority ( PRIORITY_TEAM_UNHEALTHY ) ;
else if ( anyUndesired )
team - > setPriority ( PRIORITY_TEAM_CONTAINS_UNDESIRED_SERVER ) ;
else
team - > setPriority ( PRIORITY_TEAM_HEALTHY ) ;
2018-08-10 04:16:09 +08:00
if ( lastPriority ! = team - > getPriority ( ) ) {
self - > priority_teams [ lastPriority ] - - ;
self - > priority_teams [ team - > getPriority ( ) ] + + ;
}
2018-11-08 13:05:31 +08:00
if ( logTeamEvents ) {
2018-10-18 02:45:47 +08:00
TraceEvent ( " TeamPriorityChange " , self - > masterId ) . detail ( " Priority " , team - > getPriority ( ) ) ;
}
2017-05-26 04:48:44 +08:00
2018-02-03 03:46:04 +08:00
lastZeroHealthy = self - > zeroHealthyTeams - > get ( ) ; //set this again in case it changed from this teams health changing
if ( self - > initialFailureReactionDelay . isReady ( ) & & ! self - > zeroHealthyTeams - > get ( ) ) {
2018-03-09 02:50:05 +08:00
vector < KeyRange > shards = self - > shardsAffectedByTeamFailure - > getShardsFor ( ShardsAffectedByTeamFailure : : Team ( team - > getServerIDs ( ) , self - > primary ) ) ;
2017-05-26 04:48:44 +08:00
for ( int i = 0 ; i < shards . size ( ) ; i + + ) {
int maxPriority = team - > getPriority ( ) ;
2018-08-10 03:37:46 +08:00
if ( maxPriority < PRIORITY_TEAM_0_LEFT ) {
auto teams = self - > shardsAffectedByTeamFailure - > getTeamsFor ( shards [ i ] ) ;
2018-11-12 04:33:31 +08:00
for ( int j = 0 ; j < teams . first . size ( ) + teams . second . size ( ) ; j + + ) {
auto & t = j < teams . first . size ( ) ? teams . first [ j ] : teams . second [ j - teams . first . size ( ) ] ;
if ( ! t . servers . size ( ) ) {
maxPriority = PRIORITY_TEAM_0_LEFT ;
break ;
}
auto tc = self - > teamCollections [ t . primary ? 0 : 1 ] ;
ASSERT ( tc - > primary = = t . primary ) ;
if ( tc - > server_info . count ( t . servers [ 0 ] ) ) {
auto & info = tc - > server_info [ t . servers [ 0 ] ] ;
bool found = false ;
for ( int k = 0 ; k < info - > teams . size ( ) ; k + + ) {
if ( info - > teams [ k ] - > serverIDs = = t . servers ) {
maxPriority = std : : max ( maxPriority , info - > teams [ k ] - > getPriority ( ) ) ;
found = true ;
break ;
2018-08-10 03:37:46 +08:00
}
2017-05-26 04:48:44 +08:00
}
2018-11-12 04:33:31 +08:00
//If we cannot find the team, it could be a bad team so assume unhealthy priority
if ( ! found ) {
maxPriority = std : : max < int > ( maxPriority , PRIORITY_TEAM_UNHEALTHY ) ;
}
} else {
TEST ( true ) ; // A removed server is still associated with a team in SABTF
2018-08-10 03:37:46 +08:00
}
2017-05-26 04:48:44 +08:00
}
}
2018-11-12 04:33:31 +08:00
RelocateShard rs ;
rs . keys = shards [ i ] ;
rs . priority = maxPriority ;
self - > output . send ( rs ) ;
if ( g_random - > random01 ( ) < 0.01 ) {
TraceEvent ( " SendRelocateToDDQx100 " , self - > masterId )
. detail ( " Team " , team - > getDesc ( ) )
. detail ( " KeyBegin " , printable ( rs . keys . begin ) )
. detail ( " KeyEnd " , printable ( rs . keys . end ) )
. detail ( " Priority " , rs . priority )
. detail ( " TeamFailedMachines " , team - > getServerIDs ( ) . size ( ) - serversLeft )
. detail ( " TeamOKMachines " , serversLeft ) ;
2017-05-26 04:48:44 +08:00
}
}
} else {
2018-11-08 13:05:31 +08:00
if ( logTeamEvents ) {
2018-10-18 02:45:47 +08:00
TraceEvent ( " TeamHealthNotReady " , self - > masterId ) . detail ( " HealthyTeamCount " , self - > healthyTeamCount ) ;
}
2017-05-26 04:48:44 +08:00
}
}
// Wait for any of the machines to change status
2018-08-11 04:57:10 +08:00
wait ( quorum ( change , 1 ) ) ;
wait ( yield ( ) ) ;
2017-05-26 04:48:44 +08:00
}
} catch ( Error & e ) {
2018-08-10 04:16:09 +08:00
self - > priority_teams [ team - > getPriority ( ) ] - - ;
2017-05-26 04:48:44 +08:00
if ( team - > isHealthy ( ) ) {
self - > healthyTeamCount - - ;
ASSERT ( self - > healthyTeamCount > = 0 ) ;
if ( self - > healthyTeamCount = = 0 ) {
TraceEvent ( SevWarn , " ZeroTeamsHealthySignalling " , self - > masterId ) . detail ( " SignallingTeam " , team - > getDesc ( ) ) ;
2018-02-03 03:46:04 +08:00
self - > zeroHealthyTeams - > set ( true ) ;
2017-05-26 04:48:44 +08:00
}
}
throw ;
}
}
2018-11-10 02:07:55 +08:00
ACTOR Future < Void > trackExcludedServers ( DDTeamCollection * self ) {
2017-05-26 04:48:44 +08:00
loop {
// Fetch the list of excluded servers
2018-11-10 02:07:55 +08:00
state Transaction tr ( self - > cx ) ;
2017-05-26 04:48:44 +08:00
state Optional < Value > lastChangeID ;
loop {
try {
state Future < Standalone < RangeResultRef > > fresults = tr . getRange ( excludedServersKeys , CLIENT_KNOBS - > TOO_MANY ) ;
state Future < Optional < Value > > fchid = tr . get ( excludedServersVersionKey ) ;
2018-08-11 04:57:10 +08:00
wait ( success ( fresults ) & & success ( fchid ) ) ;
2017-05-26 04:48:44 +08:00
Standalone < RangeResultRef > results = fresults . get ( ) ;
lastChangeID = fchid . get ( ) ;
ASSERT ( ! results . more & & results . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
std : : set < AddressExclusion > excluded ;
for ( auto r = results . begin ( ) ; r ! = results . end ( ) ; + + r ) {
AddressExclusion addr = decodeExcludedServersKey ( r - > key ) ;
if ( addr . isValid ( ) )
excluded . insert ( addr ) ;
}
TraceEvent ( " DDExcludedServersChanged " , self - > masterId ) . detail ( " Rows " , results . size ( ) ) . detail ( " Exclusions " , excluded . size ( ) ) ;
// Reset and reassign self->excludedServers based on excluded, but weonly
// want to trigger entries that are different
auto old = self - > excludedServers . getKeys ( ) ;
for ( auto & o : old )
if ( ! excluded . count ( o ) )
self - > excludedServers . set ( o , false ) ;
for ( auto & n : excluded )
self - > excludedServers . set ( n , true ) ;
self - > restartRecruiting . trigger ( ) ;
break ;
} catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
// Wait for a change in the list of excluded servers
loop {
try {
Optional < Value > nchid = wait ( tr . get ( excludedServersVersionKey ) ) ;
if ( nchid ! = lastChangeID )
break ;
2018-08-11 04:57:10 +08:00
wait ( delay ( SERVER_KNOBS - > SERVER_LIST_DELAY , TaskDataDistribution ) ) ; // FIXME: make this tr.watch( excludedServersVersionKey ) instead
2018-11-10 02:07:55 +08:00
tr = Transaction ( self - > cx ) ;
2017-05-26 04:48:44 +08:00
} catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
}
}
ACTOR Future < vector < std : : pair < StorageServerInterface , ProcessClass > > > getServerListAndProcessClasses ( Transaction * tr ) {
state Future < vector < ProcessData > > workers = getWorkers ( tr ) ;
state Future < Standalone < RangeResultRef > > serverList = tr - > getRange ( serverListKeys , CLIENT_KNOBS - > TOO_MANY ) ;
2018-08-11 04:57:10 +08:00
wait ( success ( workers ) & & success ( serverList ) ) ;
2017-05-26 04:48:44 +08:00
ASSERT ( ! serverList . get ( ) . more & & serverList . get ( ) . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
std : : map < Optional < Standalone < StringRef > > , ProcessData > id_data ;
for ( int i = 0 ; i < workers . get ( ) . size ( ) ; i + + )
id_data [ workers . get ( ) [ i ] . locality . processId ( ) ] = workers . get ( ) [ i ] ;
vector < std : : pair < StorageServerInterface , ProcessClass > > results ;
for ( int i = 0 ; i < serverList . get ( ) . size ( ) ; i + + ) {
auto ssi = decodeServerListValue ( serverList . get ( ) [ i ] . value ) ;
results . push_back ( std : : make_pair ( ssi , id_data [ ssi . locality . processId ( ) ] . processClass ) ) ;
}
return results ;
}
2018-11-10 02:07:55 +08:00
ACTOR Future < Void > waitServerListChange ( DDTeamCollection * self , FutureStream < Void > serverRemoved ) {
2018-01-13 04:51:07 +08:00
state Future < Void > checkSignal = delay ( SERVER_KNOBS - > SERVER_LIST_DELAY ) ;
2017-05-26 04:48:44 +08:00
state Future < vector < std : : pair < StorageServerInterface , ProcessClass > > > serverListAndProcessClasses = Never ( ) ;
state bool isFetchingResults = false ;
2018-11-10 02:07:55 +08:00
state Transaction tr ( self - > cx ) ;
2017-05-26 04:48:44 +08:00
loop {
try {
choose {
2018-08-11 04:57:10 +08:00
when ( wait ( checkSignal ) ) {
2017-05-26 04:48:44 +08:00
checkSignal = Never ( ) ;
isFetchingResults = true ;
serverListAndProcessClasses = getServerListAndProcessClasses ( & tr ) ;
}
when ( vector < std : : pair < StorageServerInterface , ProcessClass > > results = wait ( serverListAndProcessClasses ) ) {
serverListAndProcessClasses = Never ( ) ;
isFetchingResults = false ;
for ( int i = 0 ; i < results . size ( ) ; i + + ) {
UID serverId = results [ i ] . first . id ( ) ;
StorageServerInterface const & ssi = results [ i ] . first ;
ProcessClass const & processClass = results [ i ] . second ;
2017-10-11 01:36:33 +08:00
if ( ! self - > shouldHandleServer ( ssi ) ) {
continue ;
}
else if ( self - > server_info . count ( serverId ) ) {
2017-05-26 04:48:44 +08:00
auto & serverInfo = self - > server_info [ serverId ] ;
if ( ssi . getValue . getEndpoint ( ) ! = serverInfo - > lastKnownInterface . getValue . getEndpoint ( ) | | processClass ! = serverInfo - > lastKnownClass . classType ( ) ) {
Promise < std : : pair < StorageServerInterface , ProcessClass > > currentInterfaceChanged = serverInfo - > interfaceChanged ;
serverInfo - > interfaceChanged = Promise < std : : pair < StorageServerInterface , ProcessClass > > ( ) ;
serverInfo - > onInterfaceChanged = Future < std : : pair < StorageServerInterface , ProcessClass > > ( serverInfo - > interfaceChanged . getFuture ( ) ) ;
currentInterfaceChanged . send ( std : : make_pair ( ssi , processClass ) ) ;
}
} else if ( ! self - > recruitingIds . count ( ssi . id ( ) ) ) {
2018-05-06 09:16:28 +08:00
self - > addServer ( ssi , processClass , self - > serverTrackerErrorOut , tr . getReadVersion ( ) . get ( ) ) ;
2017-05-26 04:48:44 +08:00
self - > doBuildTeams = true ;
}
}
2018-11-10 02:07:55 +08:00
tr = Transaction ( self - > cx ) ;
2018-01-13 04:51:07 +08:00
checkSignal = delay ( SERVER_KNOBS - > SERVER_LIST_DELAY ) ;
2017-05-26 04:48:44 +08:00
}
2018-08-11 04:57:10 +08:00
when ( waitNext ( serverRemoved ) ) {
2017-05-26 04:48:44 +08:00
if ( isFetchingResults ) {
2018-11-10 02:07:55 +08:00
tr = Transaction ( self - > cx ) ;
2017-05-26 04:48:44 +08:00
serverListAndProcessClasses = getServerListAndProcessClasses ( & tr ) ;
}
}
}
} catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
serverListAndProcessClasses = Never ( ) ;
isFetchingResults = false ;
checkSignal = Void ( ) ;
}
}
}
ACTOR Future < Void > serverMetricsPolling ( TCServerInfo * server ) {
state double lastUpdate = now ( ) ;
loop {
2018-08-11 04:57:10 +08:00
wait ( updateServerMetrics ( server ) ) ;
wait ( delayUntil ( lastUpdate + SERVER_KNOBS - > STORAGE_METRICS_POLLING_DELAY + SERVER_KNOBS - > STORAGE_METRICS_RANDOM_DELAY * g_random - > random01 ( ) , TaskDataDistributionLaunch ) ) ;
2017-05-26 04:48:44 +08:00
lastUpdate = now ( ) ;
}
}
//Returns the KeyValueStoreType of server if it is different from self->storeType
2018-11-10 02:07:55 +08:00
ACTOR Future < KeyValueStoreType > keyValueStoreTypeTracker ( DDTeamCollection * self , TCServerInfo * server ) {
2017-05-26 04:48:44 +08:00
state KeyValueStoreType type = wait ( brokenPromiseToNever ( server - > lastKnownInterface . getKeyValueStoreType . getReplyWithTaskID < KeyValueStoreType > ( TaskDataDistribution ) ) ) ;
2018-02-14 09:01:34 +08:00
if ( type = = self - > configuration . storageServerStoreType & & ( self - > includedDCs . empty ( ) | | std : : find ( self - > includedDCs . begin ( ) , self - > includedDCs . end ( ) , server - > lastKnownInterface . locality . dcId ( ) ) ! = self - > includedDCs . end ( ) ) )
2018-08-11 04:57:10 +08:00
wait ( Future < Void > ( Never ( ) ) ) ;
2017-05-26 04:48:44 +08:00
return type ;
}
2018-11-08 13:05:31 +08:00
ACTOR Future < Void > removeBadTeams ( DDTeamCollection * self ) {
2018-11-11 05:04:24 +08:00
wait ( self - > initialFailureReactionDelay ) ;
2018-11-08 13:05:31 +08:00
loop {
while ( self - > zeroHealthyTeams - > get ( ) | | self - > processingUnhealthy - > get ( ) ) {
2018-11-11 05:04:24 +08:00
wait ( self - > zeroHealthyTeams - > onChange ( ) | | self - > processingUnhealthy - > onChange ( ) ) ;
2018-11-08 13:05:31 +08:00
}
2018-11-11 05:04:24 +08:00
wait ( delay ( FLOW_KNOBS - > PREVENT_FAST_SPIN_DELAY , TaskLowPriority ) ) ; //After the team trackers wait on the initial failure reaction delay, they yield. We want to make sure every tracker has had the opportunity to send their relocations to the queue.
2018-11-08 13:05:31 +08:00
if ( ! self - > zeroHealthyTeams - > get ( ) & & ! self - > processingUnhealthy - > get ( ) ) {
break ;
}
}
2018-11-11 05:04:24 +08:00
wait ( self - > addSubsetComplete . getFuture ( ) ) ;
2018-11-12 04:33:31 +08:00
TraceEvent ( " DDRemovingBadTeams " , self - > masterId ) . detail ( " Primary " , self - > primary ) ;
2018-11-08 13:05:31 +08:00
for ( auto it : self - > badTeams ) {
it - > tracker . cancel ( ) ;
}
self - > badTeams . clear ( ) ;
return Void ( ) ;
}
2018-11-10 10:06:00 +08:00
ACTOR Future < Void > storageServerFailureTracker (
DDTeamCollection * self ,
TCServerInfo * server ,
Database cx ,
ServerStatusMap * statusMap ,
ServerStatus * status ,
Version addedVersion )
{
state StorageServerInterface interf = server - > lastKnownInterface ;
loop {
if ( statusMap - > get ( interf . id ( ) ) . initialized ) {
bool unhealthy = statusMap - > get ( interf . id ( ) ) . isUnhealthy ( ) ;
if ( unhealthy & & ! status - > isUnhealthy ( ) ) {
self - > unhealthyServers - - ;
}
if ( ! unhealthy & & status - > isUnhealthy ( ) ) {
self - > unhealthyServers + + ;
}
} else if ( status - > isUnhealthy ( ) ) {
self - > unhealthyServers + + ;
}
statusMap - > set ( interf . id ( ) , * status ) ;
if ( status - > isFailed )
self - > restartRecruiting . trigger ( ) ;
state double startTime = now ( ) ;
choose {
2018-11-11 05:04:24 +08:00
when ( wait ( status - > isFailed
2018-11-10 10:06:00 +08:00
? IFailureMonitor : : failureMonitor ( ) . onStateEqual ( interf . waitFailure . getEndpoint ( ) , FailureStatus ( false ) )
: waitFailureClient ( interf . waitFailure , SERVER_KNOBS - > DATA_DISTRIBUTION_FAILURE_REACTION_TIME , 0 , TaskDataDistribution ) ) )
{
double elapsed = now ( ) - startTime ;
if ( ! status - > isFailed & & elapsed < SERVER_KNOBS - > DATA_DISTRIBUTION_FAILURE_REACTION_TIME ) {
2018-11-11 05:04:24 +08:00
wait ( delay ( SERVER_KNOBS - > DATA_DISTRIBUTION_FAILURE_REACTION_TIME - elapsed ) ) ;
2018-11-10 10:06:00 +08:00
}
status - > isFailed = ! status - > isFailed ;
if ( ! status - > isFailed & & ! server - > teams . size ( ) ) {
self - > doBuildTeams = true ;
}
TraceEvent ( " StatusMapChange " , self - > masterId ) . detail ( " ServerID " , interf . id ( ) ) . detail ( " Status " , status - > toString ( ) )
. detail ( " Available " , IFailureMonitor : : failureMonitor ( ) . getState ( interf . waitFailure . getEndpoint ( ) ) . isAvailable ( ) ) ;
}
2018-11-11 05:04:24 +08:00
when ( wait ( status - > isUnhealthy ( ) ? waitForAllDataRemoved ( cx , interf . id ( ) , addedVersion ) : Never ( ) ) ) { break ; }
2018-11-10 10:06:00 +08:00
}
}
return Void ( ) ;
}
2018-08-30 05:40:39 +08:00
// Check the status of a storage server.
// Apply all requirements to the server and mark it as excluded if it fails to satisfies these requirements
2017-05-26 04:48:44 +08:00
ACTOR Future < Void > storageServerTracker (
2018-11-10 02:07:55 +08:00
DDTeamCollection * self ,
2017-05-26 04:48:44 +08:00
Database cx ,
TCServerInfo * server , //This actor is owned by this TCServerInfo
ServerStatusMap * statusMap ,
MoveKeysLock lock ,
UID masterId ,
std : : map < UID , Reference < TCServerInfo > > * other_servers ,
2018-06-19 08:23:55 +08:00
Optional < PromiseStream < std : : pair < UID , Optional < StorageServerInterface > > > > changes ,
2018-05-06 09:16:28 +08:00
Promise < Void > errorOut ,
Version addedVersion )
2017-05-26 04:48:44 +08:00
{
state Future < Void > failureTracker ;
state ServerStatus status ( false , false , server - > lastKnownInterface . locality ) ;
2018-06-08 05:05:53 +08:00
state bool lastIsUnhealthy = false ;
2017-05-26 04:48:44 +08:00
state Future < Void > metricsTracker = serverMetricsPolling ( server ) ;
state Future < std : : pair < StorageServerInterface , ProcessClass > > interfaceChanged = server - > onInterfaceChanged ;
state Future < KeyValueStoreType > storeTracker = keyValueStoreTypeTracker ( self , server ) ;
2017-09-08 06:32:08 +08:00
state bool hasWrongStoreTypeOrDC = false ;
2017-05-26 04:48:44 +08:00
2018-06-19 08:23:55 +08:00
if ( changes . present ( ) ) {
changes . get ( ) . send ( std : : make_pair ( server - > id , server - > lastKnownInterface ) ) ;
}
2017-05-26 04:48:44 +08:00
try {
loop {
status . isUndesired = false ;
status . isWrongConfiguration = false ;
// If there is any other server on this exact NetworkAddress, this server is undesired and will eventually be eliminated
state std : : vector < Future < Void > > otherChanges ;
std : : vector < Promise < Void > > wakeUpTrackers ;
for ( auto i = other_servers - > begin ( ) ; i ! = other_servers - > end ( ) ; + + i ) {
if ( i - > second . getPtr ( ) ! = server & & i - > second - > lastKnownInterface . address ( ) = = server - > lastKnownInterface . address ( ) ) {
auto & statusInfo = statusMap - > get ( i - > first ) ;
TraceEvent ( " SameAddress " , masterId )
. detail ( " Failed " , statusInfo . isFailed )
. detail ( " Undesired " , statusInfo . isUndesired )
. detail ( " Server " , server - > id ) . detail ( " OtherServer " , i - > second - > id )
. detail ( " Address " , server - > lastKnownInterface . address ( ) )
. detail ( " NumShards " , self - > shardsAffectedByTeamFailure - > getNumberOfShards ( server - > id ) )
. detail ( " OtherNumShards " , self - > shardsAffectedByTeamFailure - > getNumberOfShards ( i - > second - > id ) )
. detail ( " OtherHealthy " , ! statusMap - > get ( i - > second - > id ) . isUnhealthy ( ) ) ;
2018-11-22 03:18:26 +08:00
// wait for the server's ip to be changed
otherChanges . push_back ( statusMap - > onChange ( i - > second - > id ) ) ;
2017-05-26 04:48:44 +08:00
if ( ! statusMap - > get ( i - > second - > id ) . isUnhealthy ( ) ) {
if ( self - > shardsAffectedByTeamFailure - > getNumberOfShards ( i - > second - > id ) > = self - > shardsAffectedByTeamFailure - > getNumberOfShards ( server - > id ) )
{
TraceEvent ( SevWarn , " UndesiredStorageServer " , masterId )
. detail ( " Server " , server - > id )
. detail ( " Address " , server - > lastKnownInterface . address ( ) )
. detail ( " OtherServer " , i - > second - > id )
. detail ( " NumShards " , self - > shardsAffectedByTeamFailure - > getNumberOfShards ( server - > id ) )
. detail ( " OtherNumShards " , self - > shardsAffectedByTeamFailure - > getNumberOfShards ( i - > second - > id ) ) ;
status . isUndesired = true ;
}
else
wakeUpTrackers . push_back ( i - > second - > wakeUpTracker ) ;
}
}
}
for ( auto & p : wakeUpTrackers ) {
if ( ! p . isSet ( ) )
p . send ( Void ( ) ) ;
}
if ( server - > lastKnownClass . machineClassFitness ( ProcessClass : : Storage ) > ProcessClass : : UnsetFit ) {
if ( self - > optimalTeamCount > 0 ) {
2018-11-22 03:18:26 +08:00
TraceEvent ( SevWarn , " UndesiredStorageServer " , masterId )
. detail ( " Server " , server - > id )
. detail ( " OptimalTeamCount " , self - > optimalTeamCount )
. detail ( " Fitness " , server - > lastKnownClass . machineClassFitness ( ProcessClass : : Storage ) ) ;
2017-05-26 04:48:44 +08:00
status . isUndesired = true ;
}
2018-01-31 09:00:51 +08:00
otherChanges . push_back ( self - > zeroOptimalTeams . onChange ( ) ) ;
2017-05-26 04:48:44 +08:00
}
//If this storage server has the wrong key-value store type, then mark it undesired so it will be replaced with a server having the correct type
2017-09-08 06:32:08 +08:00
if ( hasWrongStoreTypeOrDC ) {
2017-05-26 04:48:44 +08:00
TraceEvent ( SevWarn , " UndesiredStorageServer " , masterId ) . detail ( " Server " , server - > id ) . detail ( " StoreType " , " ? " ) ;
status . isUndesired = true ;
status . isWrongConfiguration = true ;
}
// If the storage server is in the excluded servers list, it is undesired
NetworkAddress a = server - > lastKnownInterface . address ( ) ;
AddressExclusion addr ( a . ip , a . port ) ;
AddressExclusion ipaddr ( a . ip ) ;
if ( self - > excludedServers . get ( addr ) | | self - > excludedServers . get ( ipaddr ) ) {
TraceEvent ( SevWarn , " UndesiredStorageServer " , masterId ) . detail ( " Server " , server - > id )
. detail ( " Excluded " , self - > excludedServers . get ( addr ) ? addr . toString ( ) : ipaddr . toString ( ) ) ;
status . isUndesired = true ;
status . isWrongConfiguration = true ;
}
otherChanges . push_back ( self - > excludedServers . onChange ( addr ) ) ;
otherChanges . push_back ( self - > excludedServers . onChange ( ipaddr ) ) ;
2018-11-10 10:06:00 +08:00
failureTracker = storageServerFailureTracker ( self , server , cx , statusMap , & status , addedVersion ) ;
2017-05-26 04:48:44 +08:00
//We need to recruit new storage servers if the key value store type has changed
2017-09-08 06:32:08 +08:00
if ( hasWrongStoreTypeOrDC )
2017-05-26 04:48:44 +08:00
self - > restartRecruiting . trigger ( ) ;
2018-06-08 05:05:53 +08:00
if ( lastIsUnhealthy & & ! status . isUnhealthy ( ) & & ! server - > teams . size ( ) )
2017-05-26 04:48:44 +08:00
self - > doBuildTeams = true ;
2018-06-08 05:05:53 +08:00
lastIsUnhealthy = status . isUnhealthy ( ) ;
2017-05-26 04:48:44 +08:00
choose {
2018-08-11 04:57:10 +08:00
when ( wait ( failureTracker ) ) {
2017-05-26 04:48:44 +08:00
// The server is failed AND all data has been removed from it, so permanently remove it.
TraceEvent ( " StatusMapChange " , masterId ) . detail ( " ServerID " , server - > id ) . detail ( " Status " , " Removing " ) ;
2018-06-19 08:23:55 +08:00
if ( changes . present ( ) ) {
changes . get ( ) . send ( std : : make_pair ( server - > id , Optional < StorageServerInterface > ( ) ) ) ;
}
2017-05-26 04:48:44 +08:00
2018-11-13 09:39:40 +08:00
if ( server - > updated . canBeSet ( ) ) {
server - > updated . send ( Void ( ) ) ;
}
2017-05-26 04:48:44 +08:00
// Remove server from FF/serverList
2018-08-11 04:57:10 +08:00
wait ( removeStorageServer ( cx , server - > id , lock ) ) ;
2017-05-26 04:48:44 +08:00
TraceEvent ( " StatusMapChange " , masterId ) . detail ( " ServerID " , server - > id ) . detail ( " Status " , " Removed " ) ;
// Sets removeSignal (alerting dataDistributionTeamCollection to remove the storage server from its own data structures)
server - > removed . send ( Void ( ) ) ;
self - > removedServers . send ( server - > id ) ;
return Void ( ) ;
}
when ( std : : pair < StorageServerInterface , ProcessClass > newInterface = wait ( interfaceChanged ) ) {
bool restartRecruiting = newInterface . first . waitFailure . getEndpoint ( ) . address ! = server - > lastKnownInterface . waitFailure . getEndpoint ( ) . address ;
2018-09-01 08:54:55 +08:00
bool localityChanged = server - > lastKnownInterface . locality ! = newInterface . first . locality ;
2017-05-26 04:48:44 +08:00
TraceEvent ( " StorageServerInterfaceChanged " , masterId ) . detail ( " ServerID " , server - > id )
. detail ( " NewWaitFailureToken " , newInterface . first . waitFailure . getEndpoint ( ) . token )
2018-09-01 08:54:55 +08:00
. detail ( " OldWaitFailureToken " , server - > lastKnownInterface . waitFailure . getEndpoint ( ) . token )
. detail ( " LocalityChanged " , localityChanged ) ;
2017-05-26 04:48:44 +08:00
server - > lastKnownInterface = newInterface . first ;
server - > lastKnownClass = newInterface . second ;
2018-09-01 08:54:55 +08:00
if ( localityChanged ) {
server - > inDesiredDC = ( self - > includedDCs . empty ( ) | | std : : find ( self - > includedDCs . begin ( ) , self - > includedDCs . end ( ) , server - > lastKnownInterface . locality . dcId ( ) ) ! = self - > includedDCs . end ( ) ) ;
self - > resetLocalitySet ( ) ;
2018-11-08 13:05:31 +08:00
vector < Reference < TCTeamInfo > > newBadTeams ;
bool addedNewBadTeam = false ;
for ( auto it : server - > teams ) {
if ( ! self - > satisfiesPolicy ( it - > servers ) ) {
newBadTeams . push_back ( it ) ;
}
}
for ( auto it : newBadTeams ) {
if ( self - > removeTeam ( it ) ) {
2018-11-08 06:37:10 +08:00
self - > addTeam ( it - > servers , true ) ;
2018-11-08 13:05:31 +08:00
addedNewBadTeam = true ;
}
}
if ( addedNewBadTeam & & self - > badTeamRemover . isReady ( ) ) {
self - > badTeamRemover = removeBadTeams ( self ) ;
self - > addActor . send ( self - > badTeamRemover ) ;
}
2018-09-01 08:54:55 +08:00
}
2017-05-26 04:48:44 +08:00
interfaceChanged = server - > onInterfaceChanged ;
2018-06-19 08:23:55 +08:00
if ( changes . present ( ) ) {
changes . get ( ) . send ( std : : make_pair ( server - > id , server - > lastKnownInterface ) ) ;
}
2017-05-26 04:48:44 +08:00
// We rely on the old failureTracker being actorCancelled since the old actor now has a pointer to an invalid location
status = ServerStatus ( status . isFailed , status . isUndesired , server - > lastKnownInterface . locality ) ;
//Restart the storeTracker for the new interface
storeTracker = keyValueStoreTypeTracker ( self , server ) ;
2017-09-08 06:32:08 +08:00
hasWrongStoreTypeOrDC = false ;
2017-05-26 04:48:44 +08:00
self - > restartTeamBuilder . trigger ( ) ;
if ( restartRecruiting )
self - > restartRecruiting . trigger ( ) ;
}
2018-08-11 04:57:10 +08:00
when ( wait ( otherChanges . empty ( ) ? Never ( ) : quorum ( otherChanges , 1 ) ) ) {
2017-05-26 04:48:44 +08:00
TraceEvent ( " SameAddressChangedStatus " , masterId ) . detail ( " ServerID " , server - > id ) ;
}
when ( KeyValueStoreType type = wait ( storeTracker ) ) {
TraceEvent ( " KeyValueStoreTypeChanged " , masterId )
. detail ( " ServerID " , server - > id )
. detail ( " StoreType " , type . toString ( ) )
2017-09-08 06:32:08 +08:00
. detail ( " DesiredType " , self - > configuration . storageServerStoreType . toString ( ) ) ;
2017-05-26 04:48:44 +08:00
TEST ( true ) ; //KeyValueStore type changed
storeTracker = Never ( ) ;
2017-09-08 06:32:08 +08:00
hasWrongStoreTypeOrDC = true ;
2017-05-26 04:48:44 +08:00
}
2018-08-11 04:57:10 +08:00
when ( wait ( server - > wakeUpTracker . getFuture ( ) ) ) {
2017-05-26 04:48:44 +08:00
server - > wakeUpTracker = Promise < Void > ( ) ;
}
}
}
} catch ( Error & e ) {
if ( e . code ( ) ! = error_code_actor_cancelled )
errorOut . sendError ( e ) ;
throw ;
}
}
//Monitor whether or not storage servers are being recruited. If so, then a database cannot be considered quiet
2018-11-10 02:07:55 +08:00
ACTOR Future < Void > monitorStorageServerRecruitment ( DDTeamCollection * self ) {
2017-05-26 04:48:44 +08:00
state bool recruiting = false ;
TraceEvent ( " StorageServerRecruitment " , self - > masterId )
2018-11-22 03:18:26 +08:00
. detail ( " State " , " Idle " )
. trackLatest ( ( " StorageServerRecruitment_ " + self - > masterId . toString ( ) ) . c_str ( ) ) ;
2017-05-26 04:48:44 +08:00
loop {
if ( ! recruiting ) {
while ( self - > recruitingStream . get ( ) = = 0 ) {
2018-08-11 04:57:10 +08:00
wait ( self - > recruitingStream . onChange ( ) ) ;
2017-05-26 04:48:44 +08:00
}
TraceEvent ( " StorageServerRecruitment " , self - > masterId )
. detail ( " State " , " Recruiting " )
2018-08-17 01:24:12 +08:00
. trackLatest ( ( " StorageServerRecruitment_ " + self - > masterId . toString ( ) ) . c_str ( ) ) ;
2017-05-26 04:48:44 +08:00
recruiting = true ;
} else {
loop {
choose {
2018-08-11 04:57:10 +08:00
when ( wait ( self - > recruitingStream . onChange ( ) ) ) { }
when ( wait ( self - > recruitingStream . get ( ) = = 0 ? delay ( SERVER_KNOBS - > RECRUITMENT_IDLE_DELAY , TaskDataDistribution ) : Future < Void > ( Never ( ) ) ) ) { break ; }
2017-05-26 04:48:44 +08:00
}
}
TraceEvent ( " StorageServerRecruitment " , self - > masterId )
. detail ( " State " , " Idle " )
2018-08-17 01:24:12 +08:00
. trackLatest ( ( " StorageServerRecruitment_ " + self - > masterId . toString ( ) ) . c_str ( ) ) ;
2017-05-26 04:48:44 +08:00
recruiting = false ;
}
}
}
2018-11-10 02:07:55 +08:00
ACTOR Future < Void > initializeStorage ( DDTeamCollection * self , RecruitStorageReply candidateWorker ) {
2017-05-26 04:48:44 +08:00
// SOMEDAY: Cluster controller waits for availability, retry quickly if a server's Locality changes
self - > recruitingStream . set ( self - > recruitingStream . get ( ) + 1 ) ;
state UID interfaceId = g_random - > randomUniqueID ( ) ;
InitializeStorageRequest isr ;
2017-09-08 06:32:08 +08:00
isr . storeType = self - > configuration . storageServerStoreType ;
2017-05-26 04:48:44 +08:00
isr . seedTag = invalidTag ;
isr . reqId = g_random - > randomUniqueID ( ) ;
isr . interfaceId = interfaceId ;
TraceEvent ( " DDRecruiting " ) . detail ( " State " , " Sending request to worker " ) . detail ( " WorkerID " , candidateWorker . worker . id ( ) )
2018-06-09 02:11:08 +08:00
. detail ( " WorkerLocality " , candidateWorker . worker . locality . toString ( ) ) . detail ( " Interf " , interfaceId ) . detail ( " Addr " , candidateWorker . worker . address ( ) ) ;
2017-05-26 04:48:44 +08:00
self - > recruitingIds . insert ( interfaceId ) ;
self - > recruitingLocalities . insert ( candidateWorker . worker . address ( ) ) ;
2018-10-03 08:31:07 +08:00
state ErrorOr < InitializeStorageReply > newServer = wait ( candidateWorker . worker . storage . tryGetReply ( isr , TaskDataDistribution ) ) ;
if ( newServer . isError ( ) ) {
TraceEvent ( SevWarn , " DDRecruitmentError " ) . error ( newServer . getError ( ) ) ;
if ( ! newServer . isError ( error_code_recruitment_failed ) & & ! newServer . isError ( error_code_request_maybe_delivered ) )
throw newServer . getError ( ) ;
2018-10-04 07:57:18 +08:00
wait ( delay ( SERVER_KNOBS - > STORAGE_RECRUITMENT_DELAY , TaskDataDistribution ) ) ;
2018-10-03 08:31:07 +08:00
}
2017-05-26 04:48:44 +08:00
self - > recruitingIds . erase ( interfaceId ) ;
self - > recruitingLocalities . erase ( candidateWorker . worker . address ( ) ) ;
self - > recruitingStream . set ( self - > recruitingStream . get ( ) - 1 ) ;
TraceEvent ( " DDRecruiting " ) . detail ( " State " , " Finished request " ) . detail ( " WorkerID " , candidateWorker . worker . id ( ) )
2018-06-09 02:11:08 +08:00
. detail ( " WorkerLocality " , candidateWorker . worker . locality . toString ( ) ) . detail ( " Interf " , interfaceId ) . detail ( " Addr " , candidateWorker . worker . address ( ) ) ;
2017-05-26 04:48:44 +08:00
2018-10-03 08:31:07 +08:00
if ( newServer . present ( ) ) {
2018-05-06 09:16:28 +08:00
if ( ! self - > server_info . count ( newServer . get ( ) . interf . id ( ) ) )
self - > addServer ( newServer . get ( ) . interf , candidateWorker . processClass , self - > serverTrackerErrorOut , newServer . get ( ) . addedVersion ) ;
2017-05-26 04:48:44 +08:00
else
TraceEvent ( SevWarn , " DDRecruitmentError " ) . detail ( " Reason " , " Server ID already recruited " ) ;
self - > doBuildTeams = true ;
if ( self - > healthyTeamCount = = 0 ) {
2018-08-11 04:57:10 +08:00
wait ( self - > checkBuildTeams ( self ) ) ;
2017-05-26 04:48:44 +08:00
}
}
self - > restartRecruiting . trigger ( ) ;
return Void ( ) ;
}
2018-11-10 02:07:55 +08:00
ACTOR Future < Void > storageRecruiter ( DDTeamCollection * self , Reference < AsyncVar < struct ServerDBInfo > > db ) {
2017-05-26 04:48:44 +08:00
state Future < RecruitStorageReply > fCandidateWorker ;
state RecruitStorageRequest lastRequest ;
loop {
try {
RecruitStorageRequest rsr ;
std : : set < AddressExclusion > exclusions ;
for ( auto s = self - > server_info . begin ( ) ; s ! = self - > server_info . end ( ) ; + + s ) {
auto serverStatus = self - > server_status . get ( s - > second - > lastKnownInterface . id ( ) ) ;
if ( serverStatus . excludeOnRecruit ( ) ) {
2018-02-17 08:01:19 +08:00
TraceEvent ( SevDebug , " DDRecruitExcl1 " ) . detail ( " Excluding " , s - > second - > lastKnownInterface . address ( ) ) ;
2017-05-26 04:48:44 +08:00
auto addr = s - > second - > lastKnownInterface . address ( ) ;
exclusions . insert ( AddressExclusion ( addr . ip , addr . port ) ) ;
}
}
for ( auto addr : self - > recruitingLocalities ) {
exclusions . insert ( AddressExclusion ( addr . ip , addr . port ) ) ;
}
auto excl = self - > excludedServers . getKeys ( ) ;
for ( auto & s : excl )
if ( self - > excludedServers . get ( s ) ) {
2018-02-17 08:01:19 +08:00
TraceEvent ( SevDebug , " DDRecruitExcl2 " ) . detail ( " Excluding " , s . toString ( ) ) ;
2017-05-26 04:48:44 +08:00
exclusions . insert ( s ) ;
}
rsr . criticalRecruitment = self - > healthyTeamCount = = 0 ;
for ( auto it : exclusions ) {
rsr . excludeAddresses . push_back ( it ) ;
}
2017-10-11 01:36:33 +08:00
rsr . includeDCs = self - > includedDCs ;
2017-09-08 06:32:08 +08:00
2017-05-26 04:48:44 +08:00
TraceEvent ( rsr . criticalRecruitment ? SevWarn : SevInfo , " DDRecruiting " ) . detail ( " State " , " Sending request to CC " )
. detail ( " Exclusions " , rsr . excludeAddresses . size ( ) ) . detail ( " Critical " , rsr . criticalRecruitment ) ;
if ( rsr . criticalRecruitment ) {
TraceEvent ( SevWarn , " DDRecruitingEmergency " , self - > masterId ) ;
}
if ( ! fCandidateWorker . isValid ( ) | | fCandidateWorker . isReady ( ) | | rsr . excludeAddresses ! = lastRequest . excludeAddresses | | rsr . criticalRecruitment ! = lastRequest . criticalRecruitment ) {
lastRequest = rsr ;
fCandidateWorker = brokenPromiseToNever ( db - > get ( ) . clusterInterface . recruitStorage . getReply ( rsr , TaskDataDistribution ) ) ;
}
choose {
when ( RecruitStorageReply candidateWorker = wait ( fCandidateWorker ) ) {
self - > addActor . send ( initializeStorage ( self , candidateWorker ) ) ;
}
2018-08-11 04:57:10 +08:00
when ( wait ( db - > onChange ( ) ) ) { // SOMEDAY: only if clusterInterface changes?
2017-05-26 04:48:44 +08:00
fCandidateWorker = Future < RecruitStorageReply > ( ) ;
}
2018-08-11 04:57:10 +08:00
when ( wait ( self - > restartRecruiting . onTrigger ( ) ) ) { }
2017-05-26 04:48:44 +08:00
}
2018-08-11 04:57:10 +08:00
wait ( delay ( FLOW_KNOBS - > PREVENT_FAST_SPIN_DELAY ) ) ;
2017-05-26 04:48:44 +08:00
} catch ( Error & e ) {
if ( e . code ( ) ! = error_code_timed_out ) {
throw ;
}
TEST ( true ) ; //Storage recruitment timed out
}
}
}
2018-04-09 12:24:05 +08:00
ACTOR Future < Void > updateReplicasKey ( DDTeamCollection * self , Optional < Key > dcId ) {
2018-11-13 09:39:40 +08:00
std : : vector < Future < Void > > serverUpdates ;
for ( auto & it : self - > server_info ) {
serverUpdates . push_back ( it . second - > updated . getFuture ( ) ) ;
}
2018-11-13 12:26:58 +08:00
wait ( self - > initialFailureReactionDelay & & waitForAll ( serverUpdates ) ) ;
2018-07-09 08:44:19 +08:00
loop {
while ( self - > zeroHealthyTeams - > get ( ) | | self - > processingUnhealthy - > get ( ) ) {
TraceEvent ( " DDUpdatingStalled " , self - > masterId ) . detail ( " DcId " , printable ( dcId ) ) . detail ( " ZeroHealthy " , self - > zeroHealthyTeams - > get ( ) ) . detail ( " ProcessingUnhealthy " , self - > processingUnhealthy - > get ( ) ) ;
2018-08-11 04:57:10 +08:00
wait ( self - > zeroHealthyTeams - > onChange ( ) | | self - > processingUnhealthy - > onChange ( ) ) ;
2018-07-09 08:44:19 +08:00
}
2018-08-11 04:57:10 +08:00
wait ( delay ( FLOW_KNOBS - > PREVENT_FAST_SPIN_DELAY , TaskLowPriority ) ) ; //After the team trackers wait on the initial failure reaction delay, they yield. We want to make sure every tracker has had the opportunity to send their relocations to the queue.
2018-07-09 08:44:19 +08:00
if ( ! self - > zeroHealthyTeams - > get ( ) & & ! self - > processingUnhealthy - > get ( ) ) {
break ;
}
2018-04-09 12:24:05 +08:00
}
2018-06-09 02:11:08 +08:00
TraceEvent ( " DDUpdatingReplicas " , self - > masterId ) . detail ( " DcId " , printable ( dcId ) ) . detail ( " Replicas " , self - > configuration . storageTeamSize ) ;
2018-04-09 12:24:05 +08:00
state Transaction tr ( self - > cx ) ;
loop {
try {
2018-11-09 07:44:03 +08:00
Optional < Value > val = wait ( tr . get ( datacenterReplicasKeyFor ( dcId ) ) ) ;
state int oldReplicas = val . present ( ) ? decodeDatacenterReplicasValue ( val . get ( ) ) : 0 ;
if ( oldReplicas = = self - > configuration . storageTeamSize ) {
TraceEvent ( " DDUpdatedAlready " , self - > masterId ) . detail ( " DcId " , printable ( dcId ) ) . detail ( " Replicas " , self - > configuration . storageTeamSize ) ;
return Void ( ) ;
}
if ( oldReplicas < self - > configuration . storageTeamSize ) {
tr . set ( rebootWhenDurableKey , StringRef ( ) ) ;
}
2018-04-09 12:24:05 +08:00
tr . set ( datacenterReplicasKeyFor ( dcId ) , datacenterReplicasValue ( self - > configuration . storageTeamSize ) ) ;
2018-08-11 04:57:10 +08:00
wait ( tr . commit ( ) ) ;
2018-11-09 07:44:03 +08:00
TraceEvent ( " DDUpdatedReplicas " , self - > masterId ) . detail ( " DcId " , printable ( dcId ) ) . detail ( " Replicas " , self - > configuration . storageTeamSize ) . detail ( " OldReplicas " , oldReplicas ) ;
2018-04-09 12:24:05 +08:00
return Void ( ) ;
} catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2018-04-09 12:24:05 +08:00
}
}
}
2018-06-08 07:14:40 +08:00
ACTOR Future < Void > serverGetTeamRequests ( TeamCollectionInterface tci , DDTeamCollection * self ) {
loop {
GetTeamRequest req = waitNext ( tci . getTeam . getFuture ( ) ) ;
self - > addActor . send ( self - > getTeam ( self , req ) ) ;
}
}
2017-05-26 04:48:44 +08:00
// Keep track of servers and teams -- serves requests for getRandomTeam
ACTOR Future < Void > dataDistributionTeamCollection (
2018-11-10 02:07:55 +08:00
Reference < DDTeamCollection > teamCollection ,
2017-05-26 04:48:44 +08:00
Reference < InitialDataDistribution > initData ,
TeamCollectionInterface tci ,
2018-11-10 02:07:55 +08:00
Reference < AsyncVar < struct ServerDBInfo > > db )
2017-05-26 04:48:44 +08:00
{
2018-11-10 02:07:55 +08:00
state DDTeamCollection * self = teamCollection . getPtr ( ) ;
2017-05-26 04:48:44 +08:00
state Future < Void > loggingTrigger = Void ( ) ;
state PromiseStream < Void > serverRemoved ;
2018-11-10 02:07:55 +08:00
state Future < Void > error = actorCollection ( self - > addActor . getFuture ( ) ) ;
2017-05-26 04:48:44 +08:00
try {
2018-11-11 05:04:24 +08:00
wait ( DDTeamCollection : : init ( self , initData ) ) ;
2017-05-26 04:48:44 +08:00
initData = Reference < InitialDataDistribution > ( ) ;
2018-11-10 02:07:55 +08:00
self - > addActor . send ( serverGetTeamRequests ( tci , self ) ) ;
2018-06-08 07:14:40 +08:00
2018-11-10 02:07:55 +08:00
TraceEvent ( " DDTeamCollectionBegin " , self - > masterId ) . detail ( " Primary " , self - > primary ) ;
2018-11-11 05:04:24 +08:00
wait ( self - > readyToStart | | error ) ;
2018-11-10 02:07:55 +08:00
TraceEvent ( " DDTeamCollectionReadyToStart " , self - > masterId ) . detail ( " Primary " , self - > primary ) ;
2018-11-08 13:05:31 +08:00
2018-11-10 02:07:55 +08:00
if ( self - > badTeamRemover . isReady ( ) ) {
self - > badTeamRemover = removeBadTeams ( self ) ;
self - > addActor . send ( self - > badTeamRemover ) ;
2018-11-08 13:05:31 +08:00
}
2018-11-10 02:07:55 +08:00
if ( self - > includedDCs . size ( ) ) {
2018-11-13 09:39:40 +08:00
//start this actor before any potential recruitments can happen
2018-11-10 02:07:55 +08:00
self - > addActor . send ( updateReplicasKey ( self , self - > includedDCs [ 0 ] ) ) ;
2018-04-09 12:24:05 +08:00
}
2018-11-13 09:39:40 +08:00
self - > addActor . send ( storageRecruiter ( self , db ) ) ;
self - > addActor . send ( monitorStorageServerRecruitment ( self ) ) ;
self - > addActor . send ( waitServerListChange ( self , serverRemoved . getFuture ( ) ) ) ;
self - > addActor . send ( trackExcludedServers ( self ) ) ;
2017-05-26 04:48:44 +08:00
// SOMEDAY: Monitor FF/serverList for (new) servers that aren't in allServers and add or remove them
loop choose {
2018-11-10 02:07:55 +08:00
when ( UID removedServer = waitNext ( self - > removedServers . getFuture ( ) ) ) {
2017-05-26 04:48:44 +08:00
TEST ( true ) ; // Storage server removed from database
2018-11-10 02:07:55 +08:00
self - > removeServer ( removedServer ) ;
2017-05-26 04:48:44 +08:00
serverRemoved . send ( Void ( ) ) ;
2018-11-10 02:07:55 +08:00
self - > restartRecruiting . trigger ( ) ;
2017-05-26 04:48:44 +08:00
}
2018-11-11 05:04:24 +08:00
when ( wait ( self - > zeroHealthyTeams - > onChange ( ) ) ) {
2018-11-10 02:07:55 +08:00
if ( self - > zeroHealthyTeams - > get ( ) ) {
self - > restartRecruiting . trigger ( ) ;
self - > noHealthyTeams ( ) ;
2018-01-31 09:00:51 +08:00
}
2017-05-26 04:48:44 +08:00
}
2018-08-11 04:57:10 +08:00
when ( wait ( loggingTrigger ) ) {
2018-08-10 04:16:09 +08:00
int highestPriority = 0 ;
2018-11-10 02:07:55 +08:00
for ( auto it : self - > priority_teams ) {
2018-08-10 04:16:09 +08:00
if ( it . second > 0 ) {
highestPriority = std : : max ( highestPriority , it . first ) ;
}
}
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
TraceEvent ( " TotalDataInFlight " , self - > masterId )
. detail ( " Primary " , self - > primary )
. detail ( " TotalBytes " , self - > getDebugTotalDataInFlight ( ) )
. detail ( " UnhealthyServers " , self - > unhealthyServers )
. detail ( " ServerNumber " , self - > server_info . size ( ) )
. detail ( " StorageTeamSize " , self - > configuration . storageTeamSize )
. detail ( " HighestPriority " , highestPriority )
. trackLatest ( self - > primary ? " TotalDataInFlight " : " TotalDataInFlightRemote " ) ;
2017-05-26 04:48:44 +08:00
loggingTrigger = delay ( SERVER_KNOBS - > DATA_DISTRIBUTION_LOGGING_INTERVAL ) ;
}
2018-11-11 05:04:24 +08:00
when ( wait ( self - > serverTrackerErrorOut . getFuture ( ) ) ) { } // Propagate errors from storageServerTracker
2018-08-11 04:57:10 +08:00
when ( wait ( error ) ) { }
2017-05-26 04:48:44 +08:00
}
} catch ( Error & e ) {
if ( e . code ( ) ! = error_code_movekeys_conflict )
2018-11-10 02:07:55 +08:00
TraceEvent ( SevError , " DataDistributionTeamCollectionError " , self - > masterId ) . error ( e ) ;
2017-05-26 04:48:44 +08:00
throw e ;
}
}
ACTOR Future < Void > waitForDataDistributionEnabled ( Database cx ) {
state Transaction tr ( cx ) ;
loop {
2018-08-11 04:57:10 +08:00
wait ( delay ( SERVER_KNOBS - > DD_ENABLED_CHECK_DELAY , TaskDataDistribution ) ) ;
2017-05-26 04:48:44 +08:00
try {
Optional < Value > mode = wait ( tr . get ( dataDistributionModeKey ) ) ;
if ( ! mode . present ( ) ) return Void ( ) ;
if ( mode . present ( ) ) {
BinaryReader rd ( mode . get ( ) , Unversioned ( ) ) ;
int m ;
rd > > m ;
if ( m ) return Void ( ) ;
}
tr . reset ( ) ;
} catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
}
ACTOR Future < bool > isDataDistributionEnabled ( Database cx ) {
state Transaction tr ( cx ) ;
loop {
try {
Optional < Value > mode = wait ( tr . get ( dataDistributionModeKey ) ) ;
if ( ! mode . present ( ) ) return true ;
if ( mode . present ( ) ) {
BinaryReader rd ( mode . get ( ) , Unversioned ( ) ) ;
int m ;
rd > > m ;
if ( m ) return true ;
}
// SOMEDAY: Write a wrapper in MoveKeys.h
Optional < Value > readVal = wait ( tr . get ( moveKeysLockOwnerKey ) ) ;
UID currentOwner = readVal . present ( ) ? BinaryReader : : fromStringRef < UID > ( readVal . get ( ) , Unversioned ( ) ) : UID ( ) ;
if ( currentOwner ! = dataDistributionModeLock )
return true ;
return false ;
} catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
}
//Ensures that the serverKeys key space is properly coalesced
//This method is only used for testing and is not implemented in a manner that is safe for large databases
ACTOR Future < Void > debugCheckCoalescing ( Database cx ) {
state Transaction tr ( cx ) ;
loop {
try {
state Standalone < RangeResultRef > serverList = wait ( tr . getRange ( serverListKeys , CLIENT_KNOBS - > TOO_MANY ) ) ;
ASSERT ( ! serverList . more & & serverList . size ( ) < CLIENT_KNOBS - > TOO_MANY ) ;
state int i ;
for ( i = 0 ; i < serverList . size ( ) ; i + + ) {
state UID id = decodeServerListValue ( serverList [ i ] . value ) . id ( ) ;
Standalone < RangeResultRef > ranges = wait ( krmGetRanges ( & tr , serverKeysPrefixFor ( id ) , allKeys ) ) ;
ASSERT ( ranges . end ( ) [ - 1 ] . key = = allKeys . end ) ;
for ( int j = 0 ; j < ranges . size ( ) - 2 ; j + + )
if ( ranges [ j ] . value = = ranges [ j + 1 ] . value )
TraceEvent ( SevError , " UncoalescedValues " , id ) . detail ( " Key1 " , printable ( ranges [ j ] . key ) ) . detail ( " Key2 " , printable ( ranges [ j + 1 ] . key ) ) . detail ( " Value " , printable ( ranges [ j ] . value ) ) ;
}
TraceEvent ( " DoneCheckingCoalescing " ) ;
return Void ( ) ;
}
catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
}
static std : : set < int > const & normalDDQueueErrors ( ) {
static std : : set < int > s ;
if ( s . empty ( ) ) {
s . insert ( error_code_movekeys_conflict ) ;
s . insert ( error_code_broken_promise ) ;
}
return s ;
}
ACTOR Future < Void > pollMoveKeysLock ( Database cx , MoveKeysLock lock ) {
loop {
2018-08-11 04:57:10 +08:00
wait ( delay ( SERVER_KNOBS - > MOVEKEYS_LOCK_POLLING_DELAY ) ) ;
2017-05-26 04:48:44 +08:00
state Transaction tr ( cx ) ;
loop {
try {
2018-08-11 04:57:10 +08:00
wait ( checkMoveKeysLockReadOnly ( & tr , lock ) ) ;
2017-05-26 04:48:44 +08:00
break ;
} catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
}
}
ACTOR Future < Void > dataDistribution (
Reference < AsyncVar < struct ServerDBInfo > > db ,
MasterInterface mi , DatabaseConfiguration configuration ,
PromiseStream < std : : pair < UID , Optional < StorageServerInterface > > > serverChanges ,
Reference < ILogSystem > logSystem ,
Version recoveryCommitVersion ,
2018-01-09 04:04:19 +08:00
std : : vector < Optional < Key > > primaryDcId ,
2018-02-27 09:09:09 +08:00
std : : vector < Optional < Key > > remoteDcIds ,
2018-04-09 12:24:05 +08:00
double * lastLimited ,
Future < Void > remoteRecovered )
2017-05-26 04:48:44 +08:00
{
state Database cx = openDBOnServer ( db , TaskDataDistributionLaunch , true , true ) ;
cx - > locationCacheSize = SERVER_KNOBS - > DD_LOCATION_CACHE_SIZE ;
2018-04-09 12:24:05 +08:00
state Transaction tr ( cx ) ;
2017-05-26 04:48:44 +08:00
loop {
try {
2018-04-09 12:24:05 +08:00
tr . setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
tr . setOption ( FDBTransactionOptions : : PRIORITY_SYSTEM_IMMEDIATE ) ;
2017-05-26 04:48:44 +08:00
2018-04-09 12:24:05 +08:00
Standalone < RangeResultRef > replicaKeys = wait ( tr . getRange ( datacenterReplicasKeys , CLIENT_KNOBS - > TOO_MANY ) ) ;
2017-05-26 04:48:44 +08:00
2018-04-09 12:24:05 +08:00
for ( auto & kv : replicaKeys ) {
auto dcId = decodeDatacenterReplicasKey ( kv . key ) ;
auto replicas = decodeDatacenterReplicasValue ( kv . value ) ;
2018-11-05 14:05:37 +08:00
if ( ( primaryDcId . size ( ) & & primaryDcId [ 0 ] = = dcId ) | | ( remoteDcIds . size ( ) & & remoteDcIds [ 0 ] = = dcId & & configuration . usableRegions > 1 ) ) {
2018-04-09 12:24:05 +08:00
if ( replicas > configuration . storageTeamSize ) {
tr . set ( kv . key , datacenterReplicasValue ( configuration . storageTeamSize ) ) ;
}
} else {
tr . clear ( kv . key ) ;
}
2017-05-26 04:48:44 +08:00
}
2018-08-11 04:57:10 +08:00
wait ( tr . commit ( ) ) ;
2017-05-26 04:48:44 +08:00
break ;
}
catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr . onError ( e ) ) ;
2017-05-26 04:48:44 +08:00
}
}
//cx->setOption( FDBDatabaseOptions::LOCATION_CACHE_SIZE, StringRef((uint8_t*) &SERVER_KNOBS->DD_LOCATION_CACHE_SIZE, 8) );
//ASSERT( cx->locationCacheSize == SERVER_KNOBS->DD_LOCATION_CACHE_SIZE );
2018-08-11 04:57:10 +08:00
//wait(debugCheckCoalescing(cx));
2017-05-26 04:48:44 +08:00
loop {
try {
loop {
TraceEvent ( " DDInitTakingMoveKeysLock " , mi . id ( ) ) ;
state MoveKeysLock lock = wait ( takeMoveKeysLock ( cx , mi . id ( ) ) ) ;
TraceEvent ( " DDInitTookMoveKeysLock " , mi . id ( ) ) ;
2018-06-18 10:31:15 +08:00
state Reference < InitialDataDistribution > initData = wait ( getInitialDataDistribution ( cx , mi . id ( ) , lock , configuration . usableRegions > 1 ? remoteDcIds : std : : vector < Optional < Key > > ( ) ) ) ;
2017-05-26 04:48:44 +08:00
if ( initData - > shards . size ( ) > 1 ) {
2018-11-22 03:18:26 +08:00
TraceEvent ( " DDInitGotInitialDD " , mi . id ( ) )
. detail ( " B " , printable ( initData - > shards . end ( ) [ - 2 ] . key ) )
. detail ( " E " , printable ( initData - > shards . end ( ) [ - 1 ] . key ) )
. detail ( " Src " , describe ( initData - > shards . end ( ) [ - 2 ] . primarySrc ) )
. detail ( " Dest " , describe ( initData - > shards . end ( ) [ - 2 ] . primaryDest ) )
. trackLatest ( " InitialDD " ) ;
2017-05-26 04:48:44 +08:00
} else {
2018-06-09 02:11:08 +08:00
TraceEvent ( " DDInitGotInitialDD " , mi . id ( ) ) . detail ( " B " , " " ) . detail ( " E " , " " ) . detail ( " Src " , " [no items] " ) . detail ( " Dest " , " [no items] " ) . trackLatest ( " InitialDD " ) ;
2017-05-26 04:48:44 +08:00
}
2018-11-01 07:46:32 +08:00
if ( initData - > mode ) break ; // mode may be set true by system operator using fdbcli
2017-05-26 04:48:44 +08:00
TraceEvent ( " DataDistributionDisabled " , mi . id ( ) ) ;
TraceEvent ( " MovingData " , mi . id ( ) )
. detail ( " InFlight " , 0 )
. detail ( " InQueue " , 0 )
. detail ( " AverageShardSize " , - 1 )
. detail ( " LowPriorityRelocations " , 0 )
. detail ( " HighPriorityRelocations " , 0 )
. detail ( " HighestPriority " , 0 )
2018-08-17 01:24:12 +08:00
. trackLatest ( " MovingData " ) ;
2017-05-26 04:48:44 +08:00
2018-08-10 04:16:09 +08:00
TraceEvent ( " TotalDataInFlight " , mi . id ( ) ) . detail ( " Primary " , true ) . detail ( " TotalBytes " , 0 ) . detail ( " UnhealthyServers " , 0 ) . detail ( " HighestPriority " , 0 ) . trackLatest ( " TotalDataInFlight " ) ;
TraceEvent ( " TotalDataInFlight " , mi . id ( ) ) . detail ( " Primary " , false ) . detail ( " TotalBytes " , 0 ) . detail ( " UnhealthyServers " , 0 ) . detail ( " HighestPriority " , configuration . usableRegions > 1 ? 0 : - 1 ) . trackLatest ( " TotalDataInFlightRemote " ) ;
2017-05-26 04:48:44 +08:00
2018-08-11 04:57:10 +08:00
wait ( waitForDataDistributionEnabled ( cx ) ) ;
2017-05-26 04:48:44 +08:00
TraceEvent ( " DataDistributionEnabled " ) ;
}
// When/If this assertion fails, Evan owes Ben a pat on the back for his foresight
ASSERT ( configuration . storageTeamSize > 0 ) ;
state PromiseStream < RelocateShard > output ;
2018-08-10 03:37:46 +08:00
state PromiseStream < RelocateShard > input ;
2017-05-26 04:48:44 +08:00
state PromiseStream < Promise < int64_t > > getAverageShardBytes ;
state PromiseStream < GetMetricsRequest > getShardMetrics ;
2018-04-09 12:24:05 +08:00
state Reference < AsyncVar < bool > > processingUnhealthy ( new AsyncVar < bool > ( false ) ) ;
2017-05-26 04:48:44 +08:00
state Promise < Void > readyToStart ;
2018-08-14 10:46:47 +08:00
state Reference < ShardsAffectedByTeamFailure > shardsAffectedByTeamFailure ( new ShardsAffectedByTeamFailure ) ;
state int shard = 0 ;
for ( ; shard < initData - > shards . size ( ) - 1 ; shard + + ) {
KeyRangeRef keys = KeyRangeRef ( initData - > shards [ shard ] . key , initData - > shards [ shard + 1 ] . key ) ;
shardsAffectedByTeamFailure - > defineShard ( keys ) ;
std : : vector < ShardsAffectedByTeamFailure : : Team > teams ;
teams . push_back ( ShardsAffectedByTeamFailure : : Team ( initData - > shards [ shard ] . primarySrc , true ) ) ;
if ( configuration . usableRegions > 1 ) {
teams . push_back ( ShardsAffectedByTeamFailure : : Team ( initData - > shards [ shard ] . remoteSrc , false ) ) ;
}
2018-11-12 04:33:31 +08:00
if ( g_network - > isSimulated ( ) ) {
TraceEvent ( " DDInitShard " ) . detail ( " Keys " , printable ( keys ) ) . detail ( " PrimarySrc " , describe ( initData - > shards [ shard ] . primarySrc ) ) . detail ( " RemoteSrc " , describe ( initData - > shards [ shard ] . remoteSrc ) )
. detail ( " PrimaryDest " , describe ( initData - > shards [ shard ] . primaryDest ) ) . detail ( " RemoteDest " , describe ( initData - > shards [ shard ] . remoteDest ) ) ;
}
2018-08-14 10:46:47 +08:00
shardsAffectedByTeamFailure - > moveShard ( keys , teams ) ;
if ( initData - > shards [ shard ] . hasDest ) {
// This shard is already in flight. Ideally we should use dest in sABTF and generate a dataDistributionRelocator directly in
// DataDistributionQueue to track it, but it's easier to just (with low priority) schedule it for movement.
2018-09-29 03:15:23 +08:00
bool unhealthy = initData - > shards [ shard ] . primarySrc . size ( ) ! = configuration . storageTeamSize ;
if ( ! unhealthy & & configuration . usableRegions > 1 ) {
unhealthy = initData - > shards [ shard ] . remoteSrc . size ( ) ! = configuration . storageTeamSize ;
}
output . send ( RelocateShard ( keys , unhealthy ? PRIORITY_TEAM_UNHEALTHY : PRIORITY_RECOVER_MOVE ) ) ;
2018-08-14 10:46:47 +08:00
}
2018-08-15 07:00:31 +08:00
wait ( yield ( TaskDataDistribution ) ) ;
2018-08-14 10:46:47 +08:00
}
2017-10-11 01:36:33 +08:00
vector < TeamCollectionInterface > tcis ;
2018-11-10 02:07:55 +08:00
2018-02-03 03:46:04 +08:00
Reference < AsyncVar < bool > > anyZeroHealthyTeams ;
vector < Reference < AsyncVar < bool > > > zeroHealthyTeams ;
2017-10-11 01:36:33 +08:00
tcis . push_back ( TeamCollectionInterface ( ) ) ;
2018-02-03 03:46:04 +08:00
zeroHealthyTeams . push_back ( Reference < AsyncVar < bool > > ( new AsyncVar < bool > ( true ) ) ) ;
int storageTeamSize = configuration . storageTeamSize ;
vector < Future < Void > > actors ;
2018-06-18 10:31:15 +08:00
if ( configuration . usableRegions > 1 ) {
2017-10-11 01:36:33 +08:00
tcis . push_back ( TeamCollectionInterface ( ) ) ;
2018-02-03 03:46:04 +08:00
storageTeamSize = 2 * configuration . storageTeamSize ;
zeroHealthyTeams . push_back ( Reference < AsyncVar < bool > > ( new AsyncVar < bool > ( true ) ) ) ;
anyZeroHealthyTeams = Reference < AsyncVar < bool > > ( new AsyncVar < bool > ( true ) ) ;
actors . push_back ( anyTrue ( zeroHealthyTeams , anyZeroHealthyTeams ) ) ;
} else {
anyZeroHealthyTeams = zeroHealthyTeams [ 0 ] ;
2017-10-11 01:36:33 +08:00
}
2017-05-26 04:48:44 +08:00
actors . push_back ( pollMoveKeysLock ( cx , lock ) ) ;
2018-08-10 03:37:46 +08:00
actors . push_back ( reportErrorsExcept ( dataDistributionTracker ( initData , cx , output , shardsAffectedByTeamFailure , getShardMetrics , getAverageShardBytes . getFuture ( ) , readyToStart , anyZeroHealthyTeams , mi . id ( ) ) , " DDTracker " , mi . id ( ) , & normalDDQueueErrors ( ) ) ) ;
actors . push_back ( reportErrorsExcept ( dataDistributionQueue ( cx , output , input . getFuture ( ) , getShardMetrics , processingUnhealthy , tcis , shardsAffectedByTeamFailure , lock , getAverageShardBytes , mi , storageTeamSize , lastLimited , recoveryCommitVersion ) , " DDQueue " , mi . id ( ) , & normalDDQueueErrors ( ) ) ) ;
2018-11-10 02:07:55 +08:00
vector < DDTeamCollection * > teamCollectionsPtrs ;
2018-11-12 04:33:31 +08:00
Reference < DDTeamCollection > primaryTeamCollection ( new DDTeamCollection ( cx , mi . id ( ) , lock , output , shardsAffectedByTeamFailure , configuration , primaryDcId , configuration . usableRegions > 1 ? remoteDcIds : std : : vector < Optional < Key > > ( ) , serverChanges , readyToStart . getFuture ( ) , zeroHealthyTeams [ 0 ] , true , processingUnhealthy ) ) ;
teamCollectionsPtrs . push_back ( primaryTeamCollection . getPtr ( ) ) ;
2018-06-18 10:31:15 +08:00
if ( configuration . usableRegions > 1 ) {
2018-11-12 04:33:31 +08:00
Reference < DDTeamCollection > remoteTeamCollection ( new DDTeamCollection ( cx , mi . id ( ) , lock , output , shardsAffectedByTeamFailure , configuration , remoteDcIds , Optional < std : : vector < Optional < Key > > > ( ) , serverChanges , readyToStart . getFuture ( ) & & remoteRecovered , zeroHealthyTeams [ 1 ] , false , processingUnhealthy ) ) ;
teamCollectionsPtrs . push_back ( remoteTeamCollection . getPtr ( ) ) ;
remoteTeamCollection - > teamCollections = teamCollectionsPtrs ;
actors . push_back ( reportErrorsExcept ( dataDistributionTeamCollection ( remoteTeamCollection , initData , tcis [ 1 ] , db ) , " DDTeamCollectionSecondary " , mi . id ( ) , & normalDDQueueErrors ( ) ) ) ;
2017-10-11 01:36:33 +08:00
}
2018-11-12 04:33:31 +08:00
primaryTeamCollection - > teamCollections = teamCollectionsPtrs ;
actors . push_back ( reportErrorsExcept ( dataDistributionTeamCollection ( primaryTeamCollection , initData , tcis [ 0 ] , db ) , " DDTeamCollectionPrimary " , mi . id ( ) , & normalDDQueueErrors ( ) ) ) ;
2018-08-14 13:29:55 +08:00
actors . push_back ( yieldPromiseStream ( output . getFuture ( ) , input ) ) ;
2017-05-26 04:48:44 +08:00
2018-08-11 04:57:10 +08:00
wait ( waitForAll ( actors ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
catch ( Error & e ) {
state Error err = e ;
if ( e . code ( ) ! = error_code_movekeys_conflict )
throw err ;
bool ddEnabled = wait ( isDataDistributionEnabled ( cx ) ) ;
2018-06-09 02:11:08 +08:00
TraceEvent ( " DataDistributionMoveKeysConflict " ) . detail ( " DataDistributionEnabled " , ddEnabled ) ;
2017-05-26 04:48:44 +08:00
if ( ddEnabled )
throw err ;
}
}
}
DDTeamCollection * testTeamCollection ( int teamSize , IRepPolicyRef policy , int processCount ) {
Database database = DatabaseContext : : create (
Reference < AsyncVar < ClientDBInfo > > ( new AsyncVar < ClientDBInfo > ( ) ) ,
Never ( ) ,
LocalityData ( ) ,
false
) ;
2017-09-08 06:32:08 +08:00
DatabaseConfiguration conf ;
conf . storageTeamSize = teamSize ;
conf . storagePolicy = policy ;
2017-05-26 04:48:44 +08:00
DDTeamCollection * collection = new DDTeamCollection (
database ,
UID ( 0 , 0 ) ,
MoveKeysLock ( ) ,
PromiseStream < RelocateShard > ( ) ,
Reference < ShardsAffectedByTeamFailure > ( new ShardsAffectedByTeamFailure ( ) ) ,
2017-09-08 06:32:08 +08:00
conf ,
2017-10-11 01:36:33 +08:00
{ } ,
2018-02-14 09:01:34 +08:00
{ } ,
2017-07-01 07:21:59 +08:00
PromiseStream < std : : pair < UID , Optional < StorageServerInterface > > > ( ) ,
2018-02-03 03:46:04 +08:00
Future < Void > ( Void ( ) ) ,
2018-03-09 02:50:05 +08:00
Reference < AsyncVar < bool > > ( new AsyncVar < bool > ( true ) ) ,
2018-04-09 12:24:05 +08:00
true ,
Reference < AsyncVar < bool > > ( new AsyncVar < bool > ( false ) )
2017-05-26 04:48:44 +08:00
) ;
2018-11-22 03:18:26 +08:00
for ( int id = 1 ; id < = processCount ; + + id ) {
2017-05-26 04:48:44 +08:00
UID uid ( id , 0 ) ;
StorageServerInterface interface ;
interface . uniqueID = uid ;
interface . locality . set ( LiteralStringRef ( " machineid " ) , Standalone < StringRef > ( std : : to_string ( id ) ) ) ;
interface . locality . set ( LiteralStringRef ( " zoneid " ) , Standalone < StringRef > ( std : : to_string ( id % 5 ) ) ) ;
interface . locality . set ( LiteralStringRef ( " data_hall " ) , Standalone < StringRef > ( std : : to_string ( id % 3 ) ) ) ;
2018-09-01 08:54:55 +08:00
collection - > server_info [ uid ] = Reference < TCServerInfo > ( new TCServerInfo ( interface , ProcessClass ( ) , true , collection - > storageServerSet ) ) ;
2018-06-08 05:05:53 +08:00
collection - > server_status . set ( uid , ServerStatus ( false , false , interface . locality ) ) ;
2018-08-30 05:40:39 +08:00
collection - > constructMachineFor1Server ( uid ) ;
2017-05-26 04:48:44 +08:00
}
return collection ;
}
2018-08-30 05:40:39 +08:00
DDTeamCollection * testMachineTeamCollection ( int teamSize , IRepPolicyRef policy , int processCount ) {
2018-11-22 03:18:26 +08:00
Database database = DatabaseContext : : create ( Reference < AsyncVar < ClientDBInfo > > ( new AsyncVar < ClientDBInfo > ( ) ) ,
Never ( ) , LocalityData ( ) , false ) ;
2018-08-30 05:40:39 +08:00
DatabaseConfiguration conf ;
conf . storageTeamSize = teamSize ;
conf . storagePolicy = policy ;
2018-11-22 03:18:26 +08:00
DDTeamCollection * collection =
new DDTeamCollection ( database , UID ( 0 , 0 ) , MoveKeysLock ( ) , PromiseStream < RelocateShard > ( ) ,
Reference < ShardsAffectedByTeamFailure > ( new ShardsAffectedByTeamFailure ( ) ) , conf , { } , { } ,
PromiseStream < std : : pair < UID , Optional < StorageServerInterface > > > ( ) , Future < Void > ( Void ( ) ) ,
Reference < AsyncVar < bool > > ( new AsyncVar < bool > ( true ) ) , true ,
Reference < AsyncVar < bool > > ( new AsyncVar < bool > ( false ) ) ) ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
for ( int id = 1 ; id < = processCount ; id + + ) {
2018-08-30 05:40:39 +08:00
UID uid ( id , 0 ) ;
StorageServerInterface interface ;
interface . uniqueID = uid ;
int process_id = id ;
int dc_id = process_id / 1000 ;
int data_hall_id = process_id / 100 ;
int zone_id = process_id / 10 ;
int machine_id = process_id / 5 ;
printf ( " testMachineTeamCollection: process_id:%d zone_id:%d machine_id:%d ip_addr:%s \n " ,
process_id , zone_id , machine_id , interface . address ( ) . toString ( ) . c_str ( ) ) ;
interface . locality . set ( LiteralStringRef ( " processid " ) , Standalone < StringRef > ( std : : to_string ( process_id ) ) ) ;
interface . locality . set ( LiteralStringRef ( " machineid " ) , Standalone < StringRef > ( std : : to_string ( machine_id ) ) ) ;
interface . locality . set ( LiteralStringRef ( " zoneid " ) , Standalone < StringRef > ( std : : to_string ( zone_id ) ) ) ;
interface . locality . set ( LiteralStringRef ( " data_hall " ) , Standalone < StringRef > ( std : : to_string ( data_hall_id ) ) ) ;
interface . locality . set ( LiteralStringRef ( " dcid " ) , Standalone < StringRef > ( std : : to_string ( dc_id ) ) ) ;
2018-11-22 03:18:26 +08:00
collection - > server_info [ uid ] =
Reference < TCServerInfo > ( new TCServerInfo ( interface , ProcessClass ( ) , true , collection - > storageServerSet ) ) ;
2018-08-30 05:40:39 +08:00
collection - > server_status . set ( uid , ServerStatus ( false , false , interface . locality ) ) ;
}
int totalServerIndex = collection - > constructMachinesFromServers ( ) ;
printf ( " testMachineTeamCollection: construct machines for %d servers \n " , totalServerIndex ) ;
return collection ;
}
TEST_CASE ( " DataDistribution/AddTeamsBestOf/UseMachineID " ) {
wait ( Future < Void > ( Void ( ) ) ) ;
2018-11-22 03:18:26 +08:00
int teamSize = 3 ; // replication size
2018-08-30 05:40:39 +08:00
int processSize = 60 ;
IRepPolicyRef policy = IRepPolicyRef ( new PolicyAcross ( teamSize , " zoneid " , IRepPolicyRef ( new PolicyOne ( ) ) ) ) ;
state DDTeamCollection * collection = testMachineTeamCollection ( teamSize , policy , processSize ) ;
int result = collection - > addTeamsBestOf ( 30 ) ;
2018-11-22 03:18:26 +08:00
ASSERT ( collection - > sanityCheckTeams ( ) = = true ) ;
2018-08-30 05:40:39 +08:00
2018-11-22 03:18:26 +08:00
delete ( collection ) ;
2018-08-30 05:40:39 +08:00
return Void ( ) ;
}
TEST_CASE ( " DataDistribution/AddTeamsBestOf/NotUseMachineID " ) {
wait ( Future < Void > ( Void ( ) ) ) ;
2018-11-22 03:18:26 +08:00
int teamSize = 3 ; // replication size
2018-08-30 05:40:39 +08:00
int processSize = 60 ;
IRepPolicyRef policy = IRepPolicyRef ( new PolicyAcross ( teamSize , " zoneid " , IRepPolicyRef ( new PolicyOne ( ) ) ) ) ;
state DDTeamCollection * collection = testMachineTeamCollection ( teamSize , policy , processSize ) ;
2018-11-22 03:18:26 +08:00
if ( collection = = NULL ) {
2018-08-30 05:40:39 +08:00
fprintf ( stderr , " collection is null \n " ) ;
return Void ( ) ;
}
2018-11-22 03:18:26 +08:00
2018-08-30 05:40:39 +08:00
collection - > addBestMachineTeams ( 30 ) ; // Creat machine teams to help debug
int result = collection - > addTeamsBestOf ( 30 ) ;
collection - > sanityCheckTeams ( ) ; // Server team may happen to be on the same machine team, although unlikely
2018-11-22 03:18:26 +08:00
if ( collection ) delete ( collection ) ;
2018-08-30 05:40:39 +08:00
return Void ( ) ;
}
TEST_CASE ( " DataDistribution/AddAllTeams/isExhaustive " ) {
2017-05-26 04:48:44 +08:00
IRepPolicyRef policy = IRepPolicyRef ( new PolicyAcross ( 3 , " zoneid " , IRepPolicyRef ( new PolicyOne ( ) ) ) ) ;
state DDTeamCollection * collection = testTeamCollection ( 3 , policy , 10 ) ;
vector < UID > processes ;
for ( auto process = collection - > server_info . begin ( ) ; process ! = collection - > server_info . end ( ) ; process + + ) {
processes . push_back ( process - > first ) ;
}
state vector < vector < UID > > teams ;
int result = wait ( collection - > addAllTeams ( collection , processes , & teams , 200 ) ) ;
2018-08-30 05:40:39 +08:00
2017-05-26 04:48:44 +08:00
delete ( collection ) ;
for ( int i = 0 ; i < teams . size ( ) ; i + + ) {
auto team = teams [ i ] ;
}
ASSERT ( result = = 80 ) ;
ASSERT ( teams [ 0 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 3 , 0 ) } ) ) ;
ASSERT ( teams [ 1 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 4 , 0 ) } ) ) ;
ASSERT ( teams [ 2 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 5 , 0 ) } ) ) ;
ASSERT ( teams [ 3 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 8 , 0 ) } ) ) ;
ASSERT ( teams [ 4 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 9 , 0 ) } ) ) ;
ASSERT ( teams [ 5 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 10 , 0 ) } ) ) ;
ASSERT ( teams [ 6 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 4 , 0 ) } ) ) ;
ASSERT ( teams [ 7 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 5 , 0 ) } ) ) ;
ASSERT ( teams [ 8 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 7 , 0 ) } ) ) ;
ASSERT ( teams [ 9 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 9 , 0 ) } ) ) ;
ASSERT ( teams [ 10 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 10 , 0 ) } ) ) ;
ASSERT ( teams [ 79 ] = = std : : vector < UID > ( { UID ( 8 , 0 ) , UID ( 9 , 0 ) , UID ( 10 , 0 ) } ) ) ;
return Void ( ) ;
}
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /DataDistribution/AddAllTeams/withLimit " ) {
2017-05-26 04:48:44 +08:00
IRepPolicyRef policy = IRepPolicyRef ( new PolicyAcross ( 3 , " zoneid " , IRepPolicyRef ( new PolicyOne ( ) ) ) ) ;
state DDTeamCollection * collection = testTeamCollection ( 3 , policy , 10 ) ;
vector < UID > processes ;
for ( auto process = collection - > server_info . begin ( ) ; process ! = collection - > server_info . end ( ) ; process + + ) {
processes . push_back ( process - > first ) ;
}
state vector < vector < UID > > teams ;
int result = wait ( collection - > addAllTeams ( collection , processes , & teams , 10 ) ) ;
delete ( collection ) ;
for ( int i = 0 ; i < teams . size ( ) ; i + + ) {
auto team = teams [ i ] ;
}
ASSERT ( result = = 10 ) ;
ASSERT ( teams [ 0 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 3 , 0 ) } ) ) ;
ASSERT ( teams [ 1 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 4 , 0 ) } ) ) ;
ASSERT ( teams [ 2 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 5 , 0 ) } ) ) ;
ASSERT ( teams [ 3 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 8 , 0 ) } ) ) ;
ASSERT ( teams [ 4 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 9 , 0 ) } ) ) ;
ASSERT ( teams [ 5 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 10 , 0 ) } ) ) ;
ASSERT ( teams [ 6 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 4 , 0 ) } ) ) ;
ASSERT ( teams [ 7 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 5 , 0 ) } ) ) ;
ASSERT ( teams [ 8 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 7 , 0 ) } ) ) ;
ASSERT ( teams [ 9 ] = = std : : vector < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 9 , 0 ) } ) ) ;
return Void ( ) ;
}
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /DataDistribution/AddTeamsBestOf/SkippingBusyServers " ) {
2018-08-11 04:57:10 +08:00
wait ( Future < Void > ( Void ( ) ) ) ;
2017-05-26 04:48:44 +08:00
IRepPolicyRef policy = IRepPolicyRef ( new PolicyAcross ( 3 , " zoneid " , IRepPolicyRef ( new PolicyOne ( ) ) ) ) ;
state DDTeamCollection * collection = testTeamCollection ( 3 , policy , 10 ) ;
2018-11-22 03:18:26 +08:00
collection - > addTeam ( std : : set < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 3 , 0 ) } ) , true ) ;
collection - > addTeam ( std : : set < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 4 , 0 ) } ) , true ) ;
2017-05-26 04:48:44 +08:00
int result = collection - > addTeamsBestOf ( 8 ) ;
ASSERT ( result = = 8 ) ;
for ( auto process = collection - > server_info . begin ( ) ; process ! = collection - > server_info . end ( ) ; process + + ) {
auto teamCount = process - > second - > teams . size ( ) ;
ASSERT ( teamCount > = 1 ) ;
ASSERT ( teamCount < = 5 ) ;
}
delete ( collection ) ;
return Void ( ) ;
}
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /DataDistribution/AddTeamsBestOf/NotEnoughServers " ) {
2018-08-11 04:57:10 +08:00
wait ( Future < Void > ( Void ( ) ) ) ;
2017-05-26 04:48:44 +08:00
IRepPolicyRef policy = IRepPolicyRef ( new PolicyAcross ( 3 , " zoneid " , IRepPolicyRef ( new PolicyOne ( ) ) ) ) ;
state DDTeamCollection * collection = testTeamCollection ( 3 , policy , 5 ) ;
2018-11-22 03:18:26 +08:00
collection - > addTeam ( std : : set < UID > ( { UID ( 1 , 0 ) , UID ( 2 , 0 ) , UID ( 3 , 0 ) } ) , true ) ;
collection - > addTeam ( std : : set < UID > ( { UID ( 1 , 0 ) , UID ( 3 , 0 ) , UID ( 4 , 0 ) } ) , true ) ;
2017-05-26 04:48:44 +08:00
int result = collection - > addTeamsBestOf ( 10 ) ;
delete ( collection ) ;
ASSERT ( result = = 8 ) ;
return Void ( ) ;
}