Merge pull request #1700 from etschannen/release-6.1

Prevent the byte sample recovery from interfering with storage server recovery
This commit is contained in:
A.J. Beamon 2019-06-13 16:32:39 -07:00 committed by GitHub
commit ada4c9ed23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 17 additions and 4 deletions

View File

@ -2,6 +2,14 @@
Release Notes
#############
6.1.10
=====
Performance
-----------
* Improved the recovery speed of storage servers with large amount of data. `(PR #1700) <https://github.com/apple/foundationdb/pull/1700>`_
6.1.9
=====

View File

@ -414,8 +414,9 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( MAX_STORAGE_SERVER_WATCH_BYTES, 100e6 ); if( randomize && BUGGIFY ) MAX_STORAGE_SERVER_WATCH_BYTES = 10e3;
init( MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE, 1e9 ); if( randomize && BUGGIFY ) MAX_BYTE_SAMPLE_CLEAR_MAP_SIZE = 1e3;
init( LONG_BYTE_SAMPLE_RECOVERY_DELAY, 60.0 );
init( BYTE_SAMPLE_LOAD_PARALLELISM, 32 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_PARALLELISM = 1;
init( BYTE_SAMPLE_LOAD_PARALLELISM, 8 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_PARALLELISM = 1;
init( BYTE_SAMPLE_LOAD_DELAY, 0.0 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_DELAY = 0.1;
init( BYTE_SAMPLE_START_DELAY, 1.0 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_DELAY = 0.0;
init( UPDATE_STORAGE_PROCESS_STATS_INTERVAL, 5.0 );
//Wait Failure

View File

@ -355,6 +355,7 @@ public:
double LONG_BYTE_SAMPLE_RECOVERY_DELAY;
int BYTE_SAMPLE_LOAD_PARALLELISM;
double BYTE_SAMPLE_LOAD_DELAY;
double BYTE_SAMPLE_START_DELAY;
double UPDATE_STORAGE_PROCESS_STATS_INTERVAL;
//Wait Failure

View File

@ -2958,11 +2958,12 @@ ACTOR Future<Void> applyByteSampleResult( StorageServer* data, IKeyValueStore* s
return Void();
}
ACTOR Future<Void> restoreByteSample(StorageServer* data, IKeyValueStore* storage, Promise<Void> byteSampleSampleRecovered) {
ACTOR Future<Void> restoreByteSample(StorageServer* data, IKeyValueStore* storage, Promise<Void> byteSampleSampleRecovered, Future<Void> startRestore) {
state std::vector<Standalone<VectorRef<KeyValueRef>>> byteSampleSample;
wait( applyByteSampleResult(data, storage, persistByteSampleSampleKeys.begin, persistByteSampleSampleKeys.end, &byteSampleSample) );
byteSampleSampleRecovered.send(Void());
wait( delay( BUGGIFY ? g_random->random01() * 2.0 : 0.0001 ) );
wait( startRestore );
wait( delay(SERVER_KNOBS->BYTE_SAMPLE_START_DELAY) );
size_t bytes_per_fetch = 0;
// Since the expected size also includes (as of now) the space overhead of the container, we calculate our own number here
@ -3009,7 +3010,8 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
state Future<Standalone<VectorRef<KeyValueRef>>> fShardAvailable = storage->readRange(persistShardAvailableKeys);
state Promise<Void> byteSampleSampleRecovered;
data->byteSampleRecovery = restoreByteSample(data, storage, byteSampleSampleRecovered);
state Promise<Void> startByteSampleRestore;
data->byteSampleRecovery = restoreByteSample(data, storage, byteSampleSampleRecovered, startByteSampleRestore.getFuture());
TraceEvent("ReadingDurableState", data->thisServerID);
wait( waitForAll( (vector<Future<Optional<Value>>>(), fFormat, fID, fVersion, fLogProtocol, fPrimaryLocality) ) );
@ -3088,6 +3090,7 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
}
validate(data, true);
startByteSampleRestore.send(Void());
return true;
}