2017-05-26 04:48:44 +08:00
/*
* CoordinatedState . actor . cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013 - 2018 Apple Inc . and the FoundationDB project authors
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* http : //www.apache.org/licenses/LICENSE-2.0
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
2018-10-20 01:30:13 +08:00
# include "fdbserver/CoordinatedState.h"
# include "fdbserver/CoordinationInterface.h"
# include "fdbserver/Knobs.h"
2017-05-26 04:48:44 +08:00
# include "flow/ActorCollection.h"
2018-10-20 01:30:13 +08:00
# include "fdbserver/LeaderElection.h"
2019-02-18 10:46:59 +08:00
# include "flow/actorcompiler.h" // has to be last include
2017-05-26 04:48:44 +08:00
ACTOR Future < GenerationRegReadReply > waitAndSendRead ( RequestStream < GenerationRegReadRequest > to , GenerationRegReadRequest req ) {
if ( SERVER_KNOBS - > BUGGIFY_ALL_COORDINATION | | BUGGIFY )
2019-05-11 05:01:52 +08:00
wait ( delay ( SERVER_KNOBS - > BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom ( ) - > random01 ( ) ) ) ;
2017-05-26 04:48:44 +08:00
state GenerationRegReadReply reply = wait ( retryBrokenPromise ( to , req ) ) ;
if ( SERVER_KNOBS - > BUGGIFY_ALL_COORDINATION | | BUGGIFY )
2019-05-11 05:01:52 +08:00
wait ( delay ( SERVER_KNOBS - > BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom ( ) - > random01 ( ) ) ) ;
2017-05-26 04:48:44 +08:00
return reply ;
}
ACTOR Future < UniqueGeneration > waitAndSendWrite ( RequestStream < GenerationRegWriteRequest > to , GenerationRegWriteRequest req ) {
if ( SERVER_KNOBS - > BUGGIFY_ALL_COORDINATION | | BUGGIFY )
2019-05-11 05:01:52 +08:00
wait ( delay ( SERVER_KNOBS - > BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom ( ) - > random01 ( ) ) ) ;
2017-05-26 04:48:44 +08:00
state UniqueGeneration reply = wait ( retryBrokenPromise ( to , req ) ) ;
if ( SERVER_KNOBS - > BUGGIFY_ALL_COORDINATION | | BUGGIFY )
2019-05-11 05:01:52 +08:00
wait ( delay ( SERVER_KNOBS - > BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom ( ) - > random01 ( ) ) ) ;
2017-05-26 04:48:44 +08:00
return reply ;
}
ACTOR Future < GenerationRegReadReply > emptyToNever ( Future < GenerationRegReadReply > f ) {
state GenerationRegReadReply r = wait ( f ) ;
if ( r . gen . generation = = 0 )
2018-08-11 04:57:10 +08:00
wait ( Future < Void > ( Never ( ) ) ) ;
2017-05-26 04:48:44 +08:00
return r ;
}
ACTOR Future < GenerationRegReadReply > nonemptyToNever ( Future < GenerationRegReadReply > f ) {
state GenerationRegReadReply r = wait ( f ) ;
if ( r . gen . generation ! = 0 )
2018-08-11 04:57:10 +08:00
wait ( Future < Void > ( Never ( ) ) ) ;
2017-05-26 04:48:44 +08:00
return r ;
}
struct CoordinatedStateImpl {
ServerCoordinators coordinators ;
int stage ;
UniqueGeneration gen ;
uint64_t conflictGen ;
bool doomed ;
ActorCollection ac ; //Errors are not reported
bool initial ;
CoordinatedStateImpl ( ServerCoordinators const & c ) : coordinators ( c ) , stage ( 0 ) , conflictGen ( 0 ) , doomed ( false ) , ac ( false ) , initial ( false ) { }
uint64_t getConflict ( ) { return conflictGen ; }
bool isDoomed ( GenerationRegReadReply const & rep ) {
return rep . gen > gen // setExclusive is doomed, because there was a write at least started at a higher generation, which means a read completed at that higher generation
// || rep.rgen > gen // setExclusive isn't absolutely doomed, but it may/probably will fail
;
}
ACTOR static Future < Value > read ( CoordinatedStateImpl * self ) {
ASSERT ( self - > stage = = 0 ) ;
2019-02-18 10:46:59 +08:00
{
self - > stage = 1 ;
GenerationRegReadReply rep = wait ( self - > replicatedRead ( self , GenerationRegReadRequest ( self - > coordinators . clusterKey , UniqueGeneration ( ) ) ) ) ;
self - > conflictGen = std : : max ( self - > conflictGen , std : : max ( rep . gen . generation , rep . rgen . generation ) ) + 1 ;
2019-05-11 05:01:52 +08:00
self - > gen = UniqueGeneration ( self - > conflictGen , deterministicRandom ( ) - > randomUniqueID ( ) ) ;
2019-02-18 10:46:59 +08:00
}
2017-05-26 04:48:44 +08:00
2019-02-18 10:46:59 +08:00
{
self - > stage = 2 ;
GenerationRegReadReply rep = wait ( self - > replicatedRead ( self , GenerationRegReadRequest ( self - > coordinators . clusterKey , self - > gen ) ) ) ;
self - > stage = 3 ;
self - > conflictGen = std : : max ( self - > conflictGen , std : : max ( rep . gen . generation , rep . rgen . generation ) ) ;
if ( self - > isDoomed ( rep ) )
self - > doomed = true ;
self - > initial = rep . gen . generation = = 0 ;
2017-05-26 04:48:44 +08:00
2019-02-18 10:46:59 +08:00
self - > stage = 4 ;
return rep . value . present ( ) ? rep . value . get ( ) : Value ( ) ;
}
2017-05-26 04:48:44 +08:00
}
ACTOR static Future < Void > onConflict ( CoordinatedStateImpl * self ) {
ASSERT ( self - > stage = = 4 ) ;
if ( self - > doomed ) return Void ( ) ;
loop {
2018-08-11 04:57:10 +08:00
wait ( delay ( SERVER_KNOBS - > COORDINATED_STATE_ONCONFLICT_POLL_INTERVAL ) ) ;
2017-05-26 04:48:44 +08:00
GenerationRegReadReply rep = wait ( self - > replicatedRead ( self , GenerationRegReadRequest ( self - > coordinators . clusterKey , UniqueGeneration ( ) ) ) ) ;
if ( self - > stage > 4 ) break ;
self - > conflictGen = std : : max ( self - > conflictGen , std : : max ( rep . gen . generation , rep . rgen . generation ) ) ;
if ( self - > isDoomed ( rep ) )
return Void ( ) ;
}
2018-08-11 04:57:10 +08:00
wait ( Future < Void > ( Never ( ) ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
ACTOR static Future < Void > setExclusive ( CoordinatedStateImpl * self , Value v ) {
ASSERT ( self - > stage = = 4 ) ;
self - > stage = 5 ;
UniqueGeneration wgen = wait ( self - > replicatedWrite ( self , GenerationRegWriteRequest ( KeyValueRef ( self - > coordinators . clusterKey , v ) , self - > gen ) ) ) ;
self - > stage = 6 ;
2018-06-09 02:11:08 +08:00
TraceEvent ( " CoordinatedStateSet " ) . detail ( " Gen " , self - > gen . generation ) . detail ( " Wgen " , wgen . generation )
. detail ( " Genu " , self - > gen . uid ) . detail ( " Wgenu " , wgen . uid )
. detail ( " Cgen " , self - > conflictGen ) ;
2017-05-26 04:48:44 +08:00
if ( wgen = = self - > gen )
return Void ( ) ;
else {
self - > conflictGen = std : : max ( self - > conflictGen , wgen . generation ) ;
throw coordinated_state_conflict ( ) ;
}
}
ACTOR static Future < GenerationRegReadReply > replicatedRead ( CoordinatedStateImpl * self , GenerationRegReadRequest req ) {
state std : : vector < GenerationRegInterface > & replicas = self - > coordinators . stateServers ;
state vector < Future < GenerationRegReadReply > > rep_empty_reply ;
state vector < Future < GenerationRegReadReply > > rep_reply ;
for ( int i = 0 ; i < replicas . size ( ) ; i + + ) {
Future < GenerationRegReadReply > reply = waitAndSendRead ( replicas [ i ] . read , GenerationRegReadRequest ( req . key , req . gen ) ) ;
rep_empty_reply . push_back ( nonemptyToNever ( reply ) ) ;
rep_reply . push_back ( emptyToNever ( reply ) ) ;
self - > ac . add ( success ( reply ) ) ;
}
state Future < Void > majorityEmpty = quorum ( rep_empty_reply , ( replicas . size ( ) + 1 ) / 2 ) ; //enough empty to ensure we cannot achieve a majority non-empty
2018-08-11 04:57:10 +08:00
wait ( quorum ( rep_reply , replicas . size ( ) / 2 + 1 ) | | majorityEmpty ) ;
2017-05-26 04:48:44 +08:00
if ( majorityEmpty . isReady ( ) ) {
int best = - 1 ;
for ( int i = 0 ; i < rep_empty_reply . size ( ) ; i + + )
if ( rep_empty_reply [ i ] . isReady ( ) & & ! rep_empty_reply [ i ] . isError ( ) ) {
if ( best < 0 | | rep_empty_reply [ i ] . get ( ) . rgen > rep_empty_reply [ best ] . get ( ) . rgen )
best = i ;
}
ASSERT ( best > = 0 ) ;
auto result = rep_empty_reply [ best ] . get ( ) ;
return result ;
} else {
int best = - 1 ;
for ( int i = 0 ; i < rep_reply . size ( ) ; i + + )
if ( rep_reply [ i ] . isReady ( ) & & ! rep_reply [ i ] . isError ( ) ) {
if ( best < 0 | |
rep_reply [ i ] . get ( ) . gen > rep_reply [ best ] . get ( ) . gen | |
( rep_reply [ i ] . get ( ) . gen = = rep_reply [ best ] . get ( ) . gen & & rep_reply [ i ] . get ( ) . rgen > rep_reply [ best ] . get ( ) . rgen ) )
best = i ;
}
ASSERT ( best > = 0 ) ;
auto result = rep_reply [ best ] . get ( ) ;
return result ;
}
}
ACTOR static Future < UniqueGeneration > replicatedWrite ( CoordinatedStateImpl * self , GenerationRegWriteRequest req ) {
state std : : vector < GenerationRegInterface > & replicas = self - > coordinators . stateServers ;
state vector < Future < UniqueGeneration > > wrep_reply ;
for ( int i = 0 ; i < replicas . size ( ) ; i + + ) {
Future < UniqueGeneration > reply = waitAndSendWrite ( replicas [ i ] . write , GenerationRegWriteRequest ( req . kv , req . gen ) ) ;
wrep_reply . push_back ( reply ) ;
self - > ac . add ( success ( reply ) ) ;
}
2018-08-11 04:57:10 +08:00
wait ( quorum ( wrep_reply , self - > initial ? replicas . size ( ) : replicas . size ( ) / 2 + 1 ) ) ;
2017-05-26 04:48:44 +08:00
UniqueGeneration maxGen ;
for ( int i = 0 ; i < wrep_reply . size ( ) ; i + + )
if ( wrep_reply [ i ] . isReady ( ) )
maxGen = std : : max ( maxGen , wrep_reply [ i ] . get ( ) ) ;
return maxGen ;
}
} ;
2020-11-24 14:25:51 +08:00
CoordinatedState : : CoordinatedState ( ServerCoordinators const & coord )
: impl ( std : : make_unique < CoordinatedStateImpl > ( coord ) ) { }
CoordinatedState : : ~ CoordinatedState ( ) = default ;
Future < Value > CoordinatedState : : read ( ) {
return CoordinatedStateImpl : : read ( impl . get ( ) ) ;
}
Future < Void > CoordinatedState : : onConflict ( ) {
return CoordinatedStateImpl : : onConflict ( impl . get ( ) ) ;
}
Future < Void > CoordinatedState : : setExclusive ( Value v ) {
return CoordinatedStateImpl : : setExclusive ( impl . get ( ) , v ) ;
}
2017-05-26 04:48:44 +08:00
uint64_t CoordinatedState : : getConflict ( ) { return impl - > getConflict ( ) ; }
struct MovableValue {
enum MoveState {
MaybeTo = 1 ,
Active = 2 ,
MovingFrom = 3
} ;
Value value ;
int32_t mode ;
Optional < Value > other ; // a cluster connection string
MovableValue ( ) : mode ( Active ) { }
MovableValue ( Value const & v , int mode , Optional < Value > other = Optional < Value > ( ) ) : value ( v ) , mode ( mode ) , other ( other ) { }
2020-05-23 07:35:01 +08:00
//To change this serialization, ProtocolVersion::MovableCoordinatedStateV2 must be updated, and downgrades need to be considered
2017-05-26 04:48:44 +08:00
template < class Ar >
void serialize ( Ar & ar ) {
2019-06-19 08:55:27 +08:00
ASSERT ( ar . protocolVersion ( ) . hasMovableCoordinatedState ( ) ) ;
2018-12-29 02:49:26 +08:00
serializer ( ar , value , mode , other ) ;
2017-05-26 04:48:44 +08:00
}
} ;
struct MovableCoordinatedStateImpl {
ServerCoordinators coordinators ;
CoordinatedState cs ;
Optional < Value > lastValue , // The value passed to setExclusive()
lastCSValue ; // The value passed to cs.setExclusive()
MovableCoordinatedStateImpl ( ServerCoordinators const & c ) : coordinators ( c ) , cs ( c ) { }
ACTOR static Future < Value > read ( MovableCoordinatedStateImpl * self ) {
state MovableValue moveState ;
Value rawValue = wait ( self - > cs . read ( ) ) ;
if ( rawValue . size ( ) ) {
BinaryReader r ( rawValue , IncludeVersion ( ) ) ;
2019-06-19 08:55:27 +08:00
if ( ! r . protocolVersion ( ) . hasMovableCoordinatedState ( ) ) {
2017-05-26 04:48:44 +08:00
// Old coordinated state, not a MovableValue
moveState . value = rawValue ;
} else
r > > moveState ;
}
// SOMEDAY: If moveState.mode == MovingFrom, read (without locking) old state and assert that it corresponds with our state and is ReallyTo(coordinators)
if ( moveState . mode = = MovableValue : : MaybeTo ) {
2020-11-12 05:07:54 +08:00
TEST ( true ) ; // Maybe moveto state
2017-05-26 04:48:44 +08:00
ASSERT ( moveState . other . present ( ) ) ;
2018-08-11 04:57:10 +08:00
wait ( self - > moveTo ( self , & self - > cs , ClusterConnectionString ( moveState . other . get ( ) . toString ( ) ) , moveState . value ) ) ;
2017-05-26 04:48:44 +08:00
}
return moveState . value ;
}
Future < Void > onConflict ( ) {
return cs . onConflict ( ) ;
}
Future < Void > setExclusive ( Value v ) {
lastValue = v ;
2020-05-23 07:35:01 +08:00
lastCSValue = BinaryWriter : : toValue ( MovableValue ( v , MovableValue : : Active ) , IncludeVersion ( ProtocolVersion : : withMovableCoordinatedStateV2 ( ) ) ) ;
2017-05-26 04:48:44 +08:00
return cs . setExclusive ( lastCSValue . get ( ) ) ;
}
ACTOR static Future < Void > move ( MovableCoordinatedStateImpl * self , ClusterConnectionString nc ) {
// Call only after setExclusive returns. Attempts to move the coordinated state
// permanently to the new ServerCoordinators, which must be uninitialized. Returns when the process has
// reached the point where a leader elected by the new coordinators should be doing the rest of the work
// (and therefore the caller should die).
state CoordinatedState cs ( self - > coordinators ) ;
state CoordinatedState nccs ( ServerCoordinators ( Reference < ClusterConnectionFile > ( new ClusterConnectionFile ( nc ) ) ) ) ;
state Future < Void > creationTimeout = delay ( 30 ) ;
ASSERT ( self - > lastValue . present ( ) & & self - > lastCSValue . present ( ) ) ;
TraceEvent ( " StartMove " ) . detail ( " ConnectionString " , nc . toString ( ) ) ;
choose {
2018-08-11 04:57:10 +08:00
when ( wait ( creationTimeout ) ) { throw new_coordinators_timed_out ( ) ; }
2017-05-26 04:48:44 +08:00
when ( Value ncInitialValue = wait ( nccs . read ( ) ) ) {
ASSERT ( ! ncInitialValue . size ( ) ) ; // The new coordinators must be uninitialized!
}
}
TraceEvent ( " FinishedRead " ) . detail ( " ConnectionString " , nc . toString ( ) ) ;
choose {
2018-08-11 04:57:10 +08:00
when ( wait ( creationTimeout ) ) { throw new_coordinators_timed_out ( ) ; }
2020-05-23 07:35:01 +08:00
when ( wait ( nccs . setExclusive ( BinaryWriter : : toValue ( MovableValue ( self - > lastValue . get ( ) , MovableValue : : MovingFrom , self - > coordinators . ccf - > getConnectionString ( ) . toString ( ) ) , IncludeVersion ( ProtocolVersion : : withMovableCoordinatedStateV2 ( ) ) ) ) ) ) { }
2017-05-26 04:48:44 +08:00
}
2018-08-11 04:57:10 +08:00
if ( BUGGIFY ) wait ( delay ( 5 ) ) ;
2017-05-26 04:48:44 +08:00
Value oldQuorumState = wait ( cs . read ( ) ) ;
if ( oldQuorumState ! = self - > lastCSValue . get ( ) ) {
TEST ( true ) ; // Quorum change aborted by concurrent write to old coordination state
TraceEvent ( " QuorumChangeAbortedByConcurrency " ) ;
throw coordinated_state_conflict ( ) ;
}
2018-08-11 04:57:10 +08:00
wait ( self - > moveTo ( self , & cs , nc , self - > lastValue . get ( ) ) ) ;
2017-05-26 04:48:44 +08:00
throw coordinators_changed ( ) ;
}
ACTOR static Future < Void > moveTo ( MovableCoordinatedStateImpl * self , CoordinatedState * coordinatedState , ClusterConnectionString nc , Value value ) {
2020-05-23 07:35:01 +08:00
wait ( coordinatedState - > setExclusive ( BinaryWriter : : toValue ( MovableValue ( value , MovableValue : : MaybeTo , nc . toString ( ) ) , IncludeVersion ( ProtocolVersion : : withMovableCoordinatedStateV2 ( ) ) ) ) ) ;
2017-05-26 04:48:44 +08:00
2018-08-11 04:57:10 +08:00
if ( BUGGIFY ) wait ( delay ( 5 ) ) ;
2017-05-26 04:48:44 +08:00
// SOMEDAY: If we are worried about someone magically getting the new cluster ID and interfering, do a second cs.setExclusive( encode( ReallyTo, ... ) )
TraceEvent ( " ChangingQuorum " ) . detail ( " ConnectionString " , nc . toString ( ) ) ;
2018-08-11 04:57:10 +08:00
wait ( changeLeaderCoordinators ( self - > coordinators , StringRef ( nc . toString ( ) ) ) ) ;
2017-05-26 04:48:44 +08:00
TraceEvent ( " ChangedQuorum " ) . detail ( " ConnectionString " , nc . toString ( ) ) ;
throw coordinators_changed ( ) ;
}
} ;
2020-11-24 14:25:51 +08:00
MovableCoordinatedState & MovableCoordinatedState : : operator = ( MovableCoordinatedState & & ) = default ;
MovableCoordinatedState : : MovableCoordinatedState ( class ServerCoordinators const & coord )
: impl ( std : : make_unique < MovableCoordinatedStateImpl > ( coord ) ) { }
MovableCoordinatedState : : ~ MovableCoordinatedState ( ) = default ;
Future < Value > MovableCoordinatedState : : read ( ) {
return MovableCoordinatedStateImpl : : read ( impl . get ( ) ) ;
2017-09-08 06:32:08 +08:00
}
2017-05-26 04:48:44 +08:00
Future < Void > MovableCoordinatedState : : onConflict ( ) { return impl - > onConflict ( ) ; }
Future < Void > MovableCoordinatedState : : setExclusive ( Value v ) { return impl - > setExclusive ( v ) ; }
2020-11-24 14:25:51 +08:00
Future < Void > MovableCoordinatedState : : move ( ClusterConnectionString const & nc ) {
return MovableCoordinatedStateImpl : : move ( impl . get ( ) , nc ) ;
}