<rdar://problem/34557380> Need a way to map real time to version

This commit is contained in:
Bhaskar Muppana 2017-09-25 12:40:24 -07:00
parent d9b64899c5
commit 0bf5bdb23a
9 changed files with 202 additions and 0 deletions

View File

@ -306,6 +306,9 @@ const UID dataDistributionModeLock = UID(6345,3425);
// Client status info prefix
const KeyRangeRef fdbClientInfoPrefixRange(LiteralStringRef("\xff\x02/fdbClientInfo/"), LiteralStringRef("\xff\x02/fdbClientInfo0"));
// Keyspace to maintain wall clock to version map
const KeyRangeRef timeKeeperPrefixRange(LiteralStringRef("\xff\x02/timeKeeper/"), LiteralStringRef("\xff\x02/timeKeeper0"));
// Backup Log Mutation constant variables
const KeyRef backupEnabledKey = LiteralStringRef("\xff/backupEnabled");
const KeyRangeRef backupLogKeys(LiteralStringRef("\xff\x02/blog/"), LiteralStringRef("\xff\x02/blog0"));

View File

@ -173,6 +173,9 @@ extern const KeyRangeRef applyMutationsKeyVersionCountRange;
// FdbClient Info prefix
extern const KeyRangeRef fdbClientInfoPrefixRange;
// Keyspace to maintain wall clock to version map
extern const KeyRangeRef timeKeeperPrefixRange;
// Layer status metadata prefix
extern const KeyRangeRef layerStatusMetaPrefixRange;

View File

@ -37,6 +37,7 @@
#include "fdbclient/ReadYourWrites.h"
#include "fdbrpc/Replication.h"
#include "fdbrpc/ReplicationUtils.h"
#include "fdbclient/KeyBackedTypes.h"
void failAfter( Future<Void> trigger, Endpoint e );
@ -1354,6 +1355,41 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) {
TEST(true); // Received an old worker registration request.
}
// This actor periodically gets read version and writes it to cluster with current timestamp as key. To avoid running
// out of space, it limits the max number of entries and clears old entries on each update. This mapping is used from
// backup and restore to get the version information for a timestamp.
ACTOR Future<Void> timeKeeper(ClusterControllerData *self) {
state KeyBackedMap<int64_t, Version> versionMap(timeKeeperPrefixRange.begin);
TraceEvent(SevInfo, "TimeKeeper Started");
loop {
try {
state Reference<ReadYourWritesTransaction> tr = Reference<ReadYourWritesTransaction>(new ReadYourWritesTransaction(self->cx));
try {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
Version v = wait(tr->getReadVersion());
int64_t currentTime = (int64_t)now();
versionMap.set(tr, currentTime, v);
int64_t ttl = currentTime - SERVER_KNOBS->TIME_KEEPER_DELAY * SERVER_KNOBS->TIME_KEEPER_MAX_ENTRIES;
if (ttl > 0) {
versionMap.erase(tr, 0, ttl);
}
Void _ = wait(tr->commit());
} catch (Error &e) {
Void _ = wait(tr->onError(e));
}
} catch (Error &e) {
// Failed to update time-version map even after retries, just ignore this iteration
TraceEvent(SevWarn, "TimeKeeper Failed").detail("cause", e.what());
}
Void _ = wait(delay(SERVER_KNOBS->TIME_KEEPER_DELAY));
}
}
ACTOR Future<Void> statusServer(FutureStream< StatusRequest> requests,
ClusterControllerData *self,
ServerCoordinators coordinators)
@ -1549,6 +1585,7 @@ ACTOR Future<Void> clusterControllerCore( ClusterControllerFullInterface interf,
addActor.send( clusterWatchDatabase( &self, &self.db ) ); // Start the master database
addActor.send( self.updateWorkerList.init( self.db.db ) );
addActor.send( statusServer( interf.clientInterface.databaseStatus.getFuture(), &self, coordinators));
addActor.send( timeKeeper(&self) );
addActor.send( monitorProcessClasses(&self) );
addActor.send( monitorClientTxnInfoConfigs(&self.db) );
//printf("%s: I am the cluster controller\n", g_network->getLocalAddress().toString().c_str());

View File

@ -360,6 +360,10 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( STATUS_MIN_TIME_BETWEEN_REQUESTS, 0.0 );
init( CONFIGURATION_ROWS_TO_FETCH, 20000 );
// Timekeeper
init( TIME_KEEPER_DELAY, 1.0 ); if (randomize) { TIME_KEEPER_DELAY = 10; }
init( TIME_KEEPER_MAX_ENTRIES, 3600 * 24 * 30 * 6); if( randomize && BUGGIFY ) { TIME_KEEPER_MAX_ENTRIES = 2; }
if(clientKnobs)
clientKnobs->IS_ACCEPTABLE_DELAY = clientKnobs->IS_ACCEPTABLE_DELAY*std::min(MAX_READ_TRANSACTION_LIFE_VERSIONS, MAX_WRITE_TRANSACTION_LIFE_VERSIONS)/(5.0*VERSIONS_PER_SECOND);
}

View File

@ -302,6 +302,10 @@ public:
double STATUS_MIN_TIME_BETWEEN_REQUESTS;
int CONFIGURATION_ROWS_TO_FETCH;
// Timekeeper
int64_t TIME_KEEPER_DELAY;
int64_t TIME_KEEPER_MAX_ENTRIES;
ServerKnobs(bool randomize = false, ClientKnobs* clientKnobs = NULL);
};

View File

@ -92,6 +92,7 @@
<ActorCompiler Include="workloads\DummyWorkload.actor.cpp" />
<ActorCompiler Include="workloads\BackupCorrectness.actor.cpp" />
<ActorCompiler Include="workloads\AtomicOps.actor.cpp" />
<ActorCompiler Include="workloads\TimeKeeperCorrectness.actor.cpp" />
<ActorCompiler Include="workloads\BackupToDBCorrectness.actor.cpp" />
<ActorCompiler Include="workloads\AtomicSwitchover.actor.cpp" />
<ActorCompiler Include="workloads\AtomicRestore.actor.cpp" />

View File

@ -237,6 +237,9 @@
<ActorCompiler Include="workloads\LockDatabase.actor.cpp">
<Filter>workloads</Filter>
</ActorCompiler>
<ActorCompiler Include="workloads\TimeKeeperCorrectness.actor.cpp">
<Filter>workloads</Filter>
</ActorCompiler>
<ActorCompiler Include="workloads\BackupToDBCorrectness.actor.cpp">
<Filter>workloads</Filter>
</ActorCompiler>

View File

@ -0,0 +1,144 @@
/*
* TimeKeeperCorrectness.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "workloads.h"
#include "fdbclient/SystemData.h"
#include "fdbclient/KeyBackedTypes.h"
#include "fdbserver/Knobs.h"
struct TimeKeeperCorrectnessWorkload : TestWorkload {
double testDuration;
std::map<int64_t, Version> inMemTimeKeeper;
TimeKeeperCorrectnessWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
testDuration = getOption( options, LiteralStringRef("testDuration"), 20.0 );
}
virtual std::string description() {
return "TimeKeeperCorrectness";
}
virtual Future<Void> setup(Database const& cx) {
return Void();
}
virtual void getMetrics(vector<PerfMetric>& m) {
}
ACTOR static Future<Void> _start(Database cx, TimeKeeperCorrectnessWorkload *self) {
state Future<Void> testCompleted = delay(self->testDuration);
TraceEvent(SevInfo, "TKCorrectness start");
while (!testCompleted.isReady()) {
state Transaction tr(cx);
try {
Version v = wait(tr.getReadVersion());
self->inMemTimeKeeper[(int64_t) now()] = v;
} catch (Error & e) {
Void _ = wait(tr.onError(e));
}
// For every sample from Timekeeper collect two samples here.
Void _ = wait(delay(SERVER_KNOBS->TIME_KEEPER_DELAY / 2));
}
TraceEvent(SevInfo, "TKCorrectness completed");
return Void();
}
virtual Future<Void> start( Database const& cx ) {
return _start(cx, this);
}
ACTOR static Future<bool> _check(Database cx, TimeKeeperCorrectnessWorkload *self) {
state KeyBackedMap<int64_t, Version> dbTimeKeeper = KeyBackedMap<int64_t, Version>(timeKeeperPrefixRange.begin);
state Reference<ReadYourWritesTransaction> tr = Reference<ReadYourWritesTransaction>(new ReadYourWritesTransaction(cx));
TraceEvent(SevInfo, "TKCorrectness check start")
.detail("TIME_KEPER_MAX_ENTRIES", SERVER_KNOBS->TIME_KEEPER_MAX_ENTRIES)
.detail("TIME_KEEPER_DELAY", SERVER_KNOBS->TIME_KEEPER_DELAY);
if (SERVER_KNOBS->TIME_KEEPER_DELAY < 2) {
TraceEvent(SevError, "TKCorrectness too small delay").detail("found", SERVER_KNOBS->TIME_KEEPER_DELAY);
return false;
}
loop {
try {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
std::vector<std::pair<int64_t, Version>> futureItems = wait(
dbTimeKeeper.getRange(tr, ((int64_t) now()) + 1, Optional<int64_t>(), 1));
if (!futureItems.empty()) {
TraceEvent(SevError, "TKCorrectness FutureMappings").detail("count", futureItems.empty());
return false;
}
std::vector<std::pair<int64_t, Version>> allItems = wait(
dbTimeKeeper.getRange(tr, 0, Optional<int64_t>(), self->inMemTimeKeeper.size() + 2));
for (auto item : allItems) {
self->inMemTimeKeeper[item.first] = item.second;
}
if (allItems.size() > SERVER_KNOBS->TIME_KEEPER_MAX_ENTRIES) {
TraceEvent(SevError, "TKCorrectness too many entries")
.detail("expected", SERVER_KNOBS->TIME_KEEPER_MAX_ENTRIES)
.detail("found", allItems.size());
return false;
}
if (allItems.size() < self->testDuration / SERVER_KNOBS->TIME_KEEPER_DELAY) {
TraceEvent(SevError, "TKCorrectness too few entries")
.detail("expected", self->testDuration / SERVER_KNOBS->TIME_KEEPER_DELAY)
.detail("found", allItems.size());
return false;
}
bool first = true;
std::pair<int64_t, Version> prevItem;
for (auto item : self->inMemTimeKeeper) {
if (first) {
first = false;
} else if (prevItem.second > item.second) {
TraceEvent(SevError, "TKCorrectness time mismatch")
.detail("prevVersion", prevItem.second)
.detail("currentVersion", item.second);
return false;
}
prevItem = item;
}
TraceEvent(SevInfo, "TKCorrectness passed");
return true;
} catch (Error & e) {
Void _ = wait(tr->onError(e));
}
}
}
virtual Future<bool> check( Database const& cx ) {
return _check(cx, this);
}
};
WorkloadFactory<TimeKeeperCorrectnessWorkload> TimeKeeperCorrectnessWorkloadFactory("TimeKeeperCorrectness");

View File

@ -0,0 +1,3 @@
testTitle=TimeKeeperCorrectness
testName=TimeKeeperCorrectness
testDuration=40.0