Prevent the byte sample recovery from interfering with storage server recovery

This commit is contained in:
Evan Tschannen 2019-06-13 15:55:25 -07:00
parent 2f78b08c1c
commit 924f92e5aa
3 changed files with 9 additions and 4 deletions

View File

@ -414,8 +414,9 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( MAX_STORAGE_SERVER_WATCH_BYTES, 100e6 ); if( randomize && BUGGIFY ) MAX_STORAGE_SERVER_WATCH_BYTES = 10e3;
init( MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE, 1e9 ); if( randomize && BUGGIFY ) MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE = 1e3;
init( LONG_BYTE_SAMPLE_RECOVERY_DELAY, 60.0 );
init( BYTE_SAMPLE_LOAD_PARALLELISM, 32 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_PARALLELISM = 1;
init( BYTE_SAMPLE_LOAD_PARALLELISM, 8 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_PARALLELISM = 1;
init( BYTE_SAMPLE_LOAD_DELAY, 0.0 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_DELAY = 0.1;
init( BYTE_SAMPLE_START_DELAY, 1.0 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_DELAY = 0.0;
init( UPDATE_STORAGE_PROCESS_STATS_INTERVAL, 5.0 );
//Wait Failure

View File

@ -355,6 +355,7 @@ public:
double LONG_BYTE_SAMPLE_RECOVERY_DELAY;
int BYTE_SAMPLE_LOAD_PARALLELISM;
double BYTE_SAMPLE_LOAD_DELAY;
double BYTE_SAMPLE_START_DELAY;
double UPDATE_STORAGE_PROCESS_STATS_INTERVAL;
//Wait Failure

View File

@ -2958,11 +2958,12 @@ ACTOR Future<Void> applyByteSampleResult( StorageServer* data, IKeyValueStore* s
return Void();
}
ACTOR Future<Void> restoreByteSample(StorageServer* data, IKeyValueStore* storage, Promise<Void> byteSampleSampleRecovered) {
ACTOR Future<Void> restoreByteSample(StorageServer* data, IKeyValueStore* storage, Promise<Void> byteSampleSampleRecovered, Future<Void> startRestore) {
state std::vector<Standalone<VectorRef<KeyValueRef>>> byteSampleSample;
wait( applyByteSampleResult(data, storage, persistByteSampleSampleKeys.begin, persistByteSampleSampleKeys.end, &byteSampleSample) );
byteSampleSampleRecovered.send(Void());
wait( delay( BUGGIFY ? g_random->random01() * 2.0 : 0.0001 ) );
wait( startRestore );
wait( delay(SERVER_KNOBS->BYTE_SAMPLE_START_DELAY) );
size_t bytes_per_fetch = 0;
// Since the expected size also includes (as of now) the space overhead of the container, we calculate our own number here
@ -3009,7 +3010,8 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
state Future<Standalone<VectorRef<KeyValueRef>>> fShardAvailable = storage->readRange(persistShardAvailableKeys);
state Promise<Void> byteSampleSampleRecovered;
data->byteSampleRecovery = restoreByteSample(data, storage, byteSampleSampleRecovered);
state Promise<Void> startByteSampleRestore;
data->byteSampleRecovery = restoreByteSample(data, storage, byteSampleSampleRecovered, startByteSampleRestore.getFuture());
TraceEvent("ReadingDurableState", data->thisServerID);
wait( waitForAll( (vector<Future<Optional<Value>>>(), fFormat, fID, fVersion, fLogProtocol, fPrimaryLocality) ) );
@ -3088,6 +3090,7 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
}
validate(data, true);
startByteSampleRestore.send(Void());
return true;
}