occasionally throw wrong_shard_server when waitMetrics times out so that the waitMetrics request can get the correct number of shards if two shards have been merged or split and the same storage server owns all the chunks

This commit is contained in:
Evan Tschannen 2020-01-15 13:22:18 -08:00
parent fd5705a451
commit 4b90487b90
3 changed files with 14 additions and 3 deletions

View File

@ -476,6 +476,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
init( BEHIND_CHECK_DELAY, 2.0 );
init( BEHIND_CHECK_COUNT, 2 );
init( BEHIND_CHECK_VERSIONS, 5 * VERSIONS_PER_SECOND );
init( WAIT_METRICS_WRONG_SHARD_CHANCE, 0.1 );
//Wait Failure
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;

View File

@ -419,6 +419,7 @@ public:
double BEHIND_CHECK_DELAY;
int BEHIND_CHECK_COUNT;
int64_t BEHIND_CHECK_VERSIONS;
double WAIT_METRICS_WRONG_SHARD_CHANCE;
//Wait Failure
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;

View File

@ -3359,9 +3359,18 @@ ACTOR Future<Void> waitMetrics( StorageServerMetrics* self, WaitMetricsRequest r
break;
}
if ( timedout || !req.min.allLessOrEqual( metrics ) || !metrics.allLessOrEqual( req.max ) ) {
TEST( !timedout ); // ShardWaitMetrics return case 2 (delayed)
TEST( timedout ); // ShardWaitMetrics return on timeout
if( timedout ) {
TEST( true ); // ShardWaitMetrics return on timeout
if(deterministicRandom()->random01() < SERVER_KNOBS->WAIT_METRICS_WRONG_SHARD_CHANCE) {
req.reply.sendError( wrong_shard_server() );
} else {
req.reply.send( metrics );
}
break;
}
if ( !req.min.allLessOrEqual( metrics ) || !metrics.allLessOrEqual( req.max ) ) {
TEST( true ); // ShardWaitMetrics return case 2 (delayed)
req.reply.send( metrics );
break;
}