2017-05-26 04:48:44 +08:00
|
|
|
/*
|
|
|
|
* RandomMoveKeys.actor.cpp
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
|
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "fdbrpc/simulator.h"
|
|
|
|
#include "fdbclient/StorageServerInterface.h"
|
2019-02-18 09:38:13 +08:00
|
|
|
#include "fdbclient/ManagementAPI.actor.h"
|
2019-02-18 10:55:52 +08:00
|
|
|
#include "fdbserver/MoveKeys.actor.h"
|
2019-02-18 07:41:16 +08:00
|
|
|
#include "fdbclient/NativeAPI.actor.h"
|
2019-02-18 11:18:30 +08:00
|
|
|
#include "fdbserver/workloads/workloads.actor.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
#include "fdbserver/ServerDBInfo.h"
|
|
|
|
#include "fdbserver/QuietDatabase.h"
|
2018-08-11 06:18:24 +08:00
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
struct MoveKeysWorkload : TestWorkload {
|
|
|
|
bool enabled;
|
|
|
|
double testDuration, meanDelay;
|
|
|
|
double maxKeyspace;
|
|
|
|
DatabaseConfiguration configuration;
|
|
|
|
|
|
|
|
MoveKeysWorkload(WorkloadContext const& wcx)
|
|
|
|
: TestWorkload(wcx)
|
|
|
|
{
|
|
|
|
enabled = !clientId && g_network->isSimulated(); // only do this on the "first" client
|
|
|
|
meanDelay = getOption( options, LiteralStringRef("meanDelay"), 0.05 );
|
|
|
|
testDuration = getOption( options, LiteralStringRef("testDuration"), 10.0 );
|
|
|
|
maxKeyspace = getOption( options, LiteralStringRef("maxKeyspace"), 0.1 );
|
|
|
|
}
|
|
|
|
|
2020-10-05 13:29:07 +08:00
|
|
|
std::string description() const override { return "MoveKeysWorkload"; }
|
|
|
|
Future<Void> setup(Database const& cx) override { return Void(); }
|
|
|
|
Future<Void> start(Database const& cx) override { return _start(cx, this); }
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
ACTOR Future<Void> _start( Database cx, MoveKeysWorkload *self ) {
|
|
|
|
if( self->enabled ) {
|
|
|
|
// Get the database configuration so as to use proper team size
|
|
|
|
state Transaction tr(cx);
|
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
Standalone<RangeResultRef> res = wait( tr.getRange(configKeys, 1000) );
|
|
|
|
ASSERT( res.size() < 1000 );
|
|
|
|
for( int i = 0; i < res.size(); i++ )
|
|
|
|
self->configuration.set(res[i].key,res[i].value);
|
|
|
|
break;
|
|
|
|
} catch( Error &e ) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait( tr.onError(e) );
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-29 09:12:04 +08:00
|
|
|
state int oldMode = wait( setDDMode( cx, 0 ) );
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent("RMKStartModeSetting");
|
2018-08-11 04:57:10 +08:00
|
|
|
wait( timeout( reportErrors( self->worker( cx, self ), "MoveKeysWorkloadWorkerError" ), self->testDuration, Void() ) );
|
2017-05-26 04:48:44 +08:00
|
|
|
// Always set the DD mode back, even if we die with an error
|
|
|
|
TraceEvent("RMKDoneMoving");
|
2019-02-13 08:07:17 +08:00
|
|
|
wait(success( setDDMode( cx, oldMode ) ));
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent("RMKDoneModeSetting");
|
|
|
|
}
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2020-10-05 13:29:07 +08:00
|
|
|
double getCheckTimeout() const override { return testDuration / 2 + 1; }
|
|
|
|
Future<bool> check(Database const& cx) override {
|
|
|
|
return tag(delay(testDuration / 2), true);
|
|
|
|
} // Give the database time to recover from our damage
|
|
|
|
void getMetrics(vector<PerfMetric>& m) override {}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
KeyRange getRandomKeys() const {
|
2019-05-11 05:01:52 +08:00
|
|
|
double len = deterministicRandom()->random01() * this->maxKeyspace;
|
|
|
|
double pos = deterministicRandom()->random01() * (1.0 - len);
|
2017-05-26 04:48:44 +08:00
|
|
|
return KeyRangeRef( doubleToTestKey( pos ), doubleToTestKey( pos+len ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<StorageServerInterface> getRandomTeam(vector<StorageServerInterface> storageServers, int teamSize) {
|
|
|
|
if( storageServers.size() < teamSize ) {
|
|
|
|
TraceEvent(SevWarnAlways, "LessThanThreeStorageServers");
|
|
|
|
throw operation_failed();
|
|
|
|
}
|
|
|
|
|
2019-05-11 05:01:52 +08:00
|
|
|
deterministicRandom()->randomShuffle( storageServers );
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
std::set<StorageServerInterface> t;
|
|
|
|
std::set<Optional<Standalone<StringRef>>> machines;
|
|
|
|
while (t.size() < teamSize && storageServers.size()) {
|
|
|
|
auto s = storageServers.back();
|
|
|
|
storageServers.pop_back();
|
|
|
|
if( !machines.count( s.locality.zoneId() ) ) {
|
|
|
|
machines.insert( s.locality.zoneId() );
|
|
|
|
t.insert( s );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( t.size() < teamSize ) {
|
|
|
|
TraceEvent(SevWarnAlways, "LessThanThreeUniqueMachines");
|
|
|
|
throw operation_failed();
|
|
|
|
}
|
|
|
|
|
|
|
|
return vector<StorageServerInterface>(t.begin(), t.end());
|
|
|
|
}
|
|
|
|
|
2018-08-17 01:24:12 +08:00
|
|
|
ACTOR Future<Void> doMoveKeys(Database cx, MoveKeysWorkload *self, KeyRange keys, vector<StorageServerInterface> destinationTeam, MoveKeysLock lock ) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state TraceInterval relocateShardInterval("RelocateShard");
|
|
|
|
state FlowLock fl1(1);
|
|
|
|
state FlowLock fl2(1);
|
|
|
|
std::string desc;
|
|
|
|
for(int s=0; s<destinationTeam.size(); s++)
|
|
|
|
desc += format("%s (%llx),", destinationTeam[s].address().toString().c_str(), destinationTeam[s].id().first());
|
|
|
|
vector<UID> destinationTeamIDs;
|
|
|
|
for(int s=0; s<destinationTeam.size(); s++)
|
|
|
|
destinationTeamIDs.push_back( destinationTeam[s].id() );
|
|
|
|
|
|
|
|
TraceEvent(relocateShardInterval.begin())
|
|
|
|
.detail("KeyBegin", printable(keys.begin)).detail("KeyEnd", printable(keys.end))
|
|
|
|
.detail("Priority", 0)
|
|
|
|
.detail("Source", "RandomMoveKeys")
|
|
|
|
.detail("DestinationTeam", desc);
|
2019-01-19 07:42:48 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
|
|
|
state Promise<Void> signal;
|
2020-09-28 06:26:50 +08:00
|
|
|
state DDEnabledState ddEnabledState;
|
|
|
|
wait(moveKeys(cx, keys, destinationTeamIDs, destinationTeamIDs, lock, signal, &fl1, &fl2, false,
|
|
|
|
relocateShardInterval.pairID, &ddEnabledState));
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent(relocateShardInterval.end()).detail("Result","Success");
|
|
|
|
return Void();
|
|
|
|
} catch (Error& e) {
|
|
|
|
TraceEvent(relocateShardInterval.end(), self->dbInfo->get().master.id()).error(e, true);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void eliminateDuplicates( vector<StorageServerInterface>& servers ) {
|
|
|
|
// The real data distribution algorithm doesn't want to deal with multiple servers
|
|
|
|
// with the same address having keys. So if there are two servers with the same address,
|
|
|
|
// don't use either one (so we don't have to find out which of them, if any, already has keys).
|
|
|
|
std::map<NetworkAddress, int> count;
|
|
|
|
for(int s=0; s<servers.size(); s++)
|
|
|
|
count[servers[s].address()]++;
|
|
|
|
int o=0;
|
|
|
|
for(int s=0; s<servers.size(); s++)
|
|
|
|
if (count[servers[s].address()] == 1)
|
|
|
|
servers[o++] = servers[s];
|
|
|
|
servers.resize(o);
|
|
|
|
}
|
|
|
|
|
|
|
|
ACTOR Future<Void> forceMasterFailure( Database cx, MoveKeysWorkload *self ) {
|
|
|
|
ASSERT( g_network->isSimulated() );
|
|
|
|
loop {
|
2019-01-19 07:42:48 +08:00
|
|
|
if( g_simulator.killZone( self->dbInfo->get().master.locality.zoneId(), ISimulator::Reboot, true ) )
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
2018-08-11 04:57:10 +08:00
|
|
|
wait( delay(1.0) );
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ACTOR Future<Void> worker( Database cx, MoveKeysWorkload *self ) {
|
|
|
|
state KeyRangeMap< vector<StorageServerInterface> > inFlight;
|
|
|
|
state KeyRangeActorMap inFlightActors;
|
|
|
|
state double lastTime = now();
|
|
|
|
|
|
|
|
ASSERT( self->configuration.storageTeamSize > 0 );
|
|
|
|
|
2018-06-18 10:31:15 +08:00
|
|
|
if(self->configuration.usableRegions > 1) { //FIXME: add support for generating random teams across DCs
|
2017-10-20 06:49:54 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2019-01-19 07:42:48 +08:00
|
|
|
loop {
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
|
|
|
state MoveKeysLock lock = wait( takeMoveKeysLock(cx, UID()) );
|
|
|
|
state vector<StorageServerInterface> storageServers = wait( getStorageServers( cx ) );
|
|
|
|
eliminateDuplicates(storageServers);
|
|
|
|
|
|
|
|
loop {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait( poisson( &lastTime, self->meanDelay ) );
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
KeyRange keys = self->getRandomKeys();
|
|
|
|
vector<StorageServerInterface> team = self->getRandomTeam(storageServers, self->configuration.storageTeamSize);
|
|
|
|
|
|
|
|
// update both inFlightActors and inFlight key range maps, cancelling deleted RelocateShards
|
|
|
|
vector<KeyRange> ranges;
|
|
|
|
inFlightActors.getRangesAffectedByInsertion( keys, ranges );
|
|
|
|
inFlightActors.cancel( KeyRangeRef( ranges.front().begin, ranges.back().end ) );
|
|
|
|
inFlight.insert( keys, team );
|
|
|
|
for(int r=0; r<ranges.size(); r++) {
|
|
|
|
auto& rTeam = inFlight.rangeContaining(ranges[r].begin)->value();
|
2018-08-17 01:24:12 +08:00
|
|
|
inFlightActors.insert( ranges[r], self->doMoveKeys( cx, self, ranges[r], rTeam, lock ) );
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_movekeys_conflict && e.code() != error_code_operation_failed )
|
|
|
|
throw;
|
2018-08-11 04:57:10 +08:00
|
|
|
wait( delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY) );
|
2017-05-26 04:48:44 +08:00
|
|
|
// Keep trying to get the moveKeysLock
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
WorkloadFactory<MoveKeysWorkload> MoveKeysWorkloadFactory("RandomMoveKeys");
|