1381 lines
52 KiB
C++
1381 lines
52 KiB
C++
/*
|
|
* tester.actor.cpp
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <boost/algorithm/string/predicate.hpp>
|
|
#include <cinttypes>
|
|
#include <fstream>
|
|
#include <functional>
|
|
#include <map>
|
|
#include <toml.hpp>
|
|
|
|
#include "flow/ActorCollection.h"
|
|
#include "fdbrpc/sim_validation.h"
|
|
#include "fdbrpc/simulator.h"
|
|
#include "fdbclient/ClusterInterface.h"
|
|
#include "fdbclient/NativeAPI.actor.h"
|
|
#include "fdbclient/SystemData.h"
|
|
#include "fdbserver/TesterInterface.actor.h"
|
|
#include "fdbserver/WorkerInterface.actor.h"
|
|
#include "fdbserver/workloads/workloads.actor.h"
|
|
#include "fdbserver/Status.h"
|
|
#include "fdbserver/QuietDatabase.h"
|
|
#include "fdbclient/MonitorLeader.h"
|
|
#include "fdbserver/CoordinationInterface.h"
|
|
#include "fdbclient/ManagementAPI.actor.h"
|
|
#include "fdbserver/WorkerInterface.actor.h"
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
|
|
|
using namespace std;
|
|
|
|
|
|
WorkloadContext::WorkloadContext() {}
|
|
|
|
WorkloadContext::WorkloadContext( const WorkloadContext& r )
|
|
: options(r.options), clientId(r.clientId), clientCount(r.clientCount),
|
|
dbInfo(r.dbInfo), sharedRandomNumber(r.sharedRandomNumber)
|
|
{
|
|
}
|
|
|
|
WorkloadContext::~WorkloadContext() {}
|
|
|
|
const char HEX_CHAR_LOOKUP[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
|
|
|
void emplaceIndex( uint8_t *data, int offset, int64_t index ) {
|
|
for( int i = 0; i < 16; i++ ) {
|
|
data[(15-i) + offset] = HEX_CHAR_LOOKUP[index & 0xf];
|
|
index = index>>4;
|
|
}
|
|
}
|
|
|
|
Key doubleToTestKey( double p ) {
|
|
return StringRef(format("%016llx", *(uint64_t*)&p));
|
|
}
|
|
|
|
double testKeyToDouble( const KeyRef& p ) {
|
|
uint64_t x = 0;
|
|
sscanf( p.toString().c_str(), "%" SCNx64, &x );
|
|
return *(double*)&x;
|
|
}
|
|
|
|
Key doubleToTestKey(double p, const KeyRef& prefix) {
|
|
return doubleToTestKey(p).withPrefix(prefix);
|
|
}
|
|
|
|
Key KVWorkload::getRandomKey() {
|
|
return getRandomKey(absentFrac);
|
|
}
|
|
|
|
Key KVWorkload::getRandomKey(double absentFrac) {
|
|
if ( absentFrac > 0.0000001 ) {
|
|
return getRandomKey(deterministicRandom()->random01() < absentFrac);
|
|
} else {
|
|
return getRandomKey(false);
|
|
}
|
|
}
|
|
|
|
Key KVWorkload::getRandomKey(bool absent) {
|
|
return keyForIndex(deterministicRandom()->randomInt( 0, nodeCount ), absent);
|
|
}
|
|
|
|
Key KVWorkload::keyForIndex( uint64_t index ) {
|
|
if ( absentFrac > 0.0000001 ) {
|
|
return keyForIndex(index, deterministicRandom()->random01() < absentFrac);
|
|
} else {
|
|
return keyForIndex(index, false);
|
|
}
|
|
}
|
|
|
|
Key KVWorkload::keyForIndex( uint64_t index, bool absent ) {
|
|
int adjustedKeyBytes = (absent) ? (keyBytes + 1) : keyBytes;
|
|
Key result = makeString( adjustedKeyBytes );
|
|
uint8_t* data = mutateString( result );
|
|
memset(data, '.', adjustedKeyBytes);
|
|
|
|
int idx = 0;
|
|
if( nodePrefix > 0 ) {
|
|
ASSERT(keyBytes >= 32);
|
|
emplaceIndex( data, 0, nodePrefix );
|
|
idx += 16;
|
|
}
|
|
ASSERT(keyBytes >= 16);
|
|
double d = double(index) / nodeCount;
|
|
emplaceIndex( data, idx, *(int64_t*)&d );
|
|
|
|
return result;
|
|
}
|
|
|
|
double testKeyToDouble(const KeyRef& p, const KeyRef& prefix) {
|
|
return testKeyToDouble(p.removePrefix(prefix));
|
|
}
|
|
|
|
ACTOR Future<Void> poisson( double *last, double meanInterval ) {
|
|
*last += meanInterval*-log( deterministicRandom()->random01() );
|
|
wait( delayUntil( *last ) );
|
|
return Void();
|
|
}
|
|
|
|
ACTOR Future<Void> uniform( double *last, double meanInterval ) {
|
|
*last += meanInterval;
|
|
wait( delayUntil( *last ) );
|
|
return Void();
|
|
}
|
|
|
|
Value getOption( VectorRef<KeyValueRef> options, Key key, Value defaultValue) {
|
|
for(int i = 0; i < options.size(); i++)
|
|
if( options[i].key == key ) {
|
|
Value value = options[i].value;
|
|
options[i].value = LiteralStringRef("");
|
|
return value;
|
|
}
|
|
|
|
return defaultValue;
|
|
}
|
|
|
|
int getOption( VectorRef<KeyValueRef> options, Key key, int defaultValue) {
|
|
for(int i = 0; i < options.size(); i++)
|
|
if( options[i].key == key ) {
|
|
int r;
|
|
if( sscanf(options[i].value.toString().c_str(), "%d", &r) ) {
|
|
options[i].value = LiteralStringRef("");
|
|
return r;
|
|
} else {
|
|
TraceEvent(SevError, "InvalidTestOption").detail("OptionName", key);
|
|
throw test_specification_invalid();
|
|
}
|
|
}
|
|
|
|
return defaultValue;
|
|
}
|
|
|
|
uint64_t getOption( VectorRef<KeyValueRef> options, Key key, uint64_t defaultValue) {
|
|
for(int i = 0; i < options.size(); i++)
|
|
if( options[i].key == key ) {
|
|
uint64_t r;
|
|
if( sscanf(options[i].value.toString().c_str(), "%" SCNd64, &r) ) {
|
|
options[i].value = LiteralStringRef("");
|
|
return r;
|
|
} else {
|
|
TraceEvent(SevError, "InvalidTestOption").detail("OptionName", key);
|
|
throw test_specification_invalid();
|
|
}
|
|
}
|
|
|
|
return defaultValue;
|
|
}
|
|
|
|
int64_t getOption( VectorRef<KeyValueRef> options, Key key, int64_t defaultValue) {
|
|
for(int i = 0; i < options.size(); i++)
|
|
if( options[i].key == key ) {
|
|
int64_t r;
|
|
if( sscanf(options[i].value.toString().c_str(), "%" SCNd64, &r) ) {
|
|
options[i].value = LiteralStringRef("");
|
|
return r;
|
|
} else {
|
|
TraceEvent(SevError, "InvalidTestOption").detail("OptionName", key);
|
|
throw test_specification_invalid();
|
|
}
|
|
}
|
|
|
|
return defaultValue;
|
|
}
|
|
|
|
double getOption( VectorRef<KeyValueRef> options, Key key, double defaultValue) {
|
|
for(int i = 0; i < options.size(); i++)
|
|
if( options[i].key == key ) {
|
|
float r;
|
|
if( sscanf(options[i].value.toString().c_str(), "%f", &r) ) {
|
|
options[i].value = LiteralStringRef("");
|
|
return r;
|
|
}
|
|
}
|
|
|
|
return defaultValue;
|
|
}
|
|
|
|
bool getOption( VectorRef<KeyValueRef> options, Key key, bool defaultValue ) {
|
|
Value p = getOption(options, key, defaultValue ? LiteralStringRef("true") : LiteralStringRef("false"));
|
|
if (p == LiteralStringRef("true"))
|
|
return true;
|
|
if (p == LiteralStringRef("false"))
|
|
return false;
|
|
ASSERT(false);
|
|
return false; // Assure that compiler is fine with the function
|
|
}
|
|
|
|
vector<std::string> getOption( VectorRef<KeyValueRef> options, Key key, vector<std::string> defaultValue ) {
|
|
for(int i = 0; i < options.size(); i++)
|
|
if( options[i].key == key ) {
|
|
vector<std::string> v;
|
|
int begin = 0;
|
|
for(int c=0; c<options[i].value.size(); c++)
|
|
if (options[i].value[c] == ',') {
|
|
v.push_back( options[i].value.substr(begin, c-begin).toString() );
|
|
begin = c+1;
|
|
}
|
|
v.push_back(options[i].value.substr(begin).toString());
|
|
options[i].value = LiteralStringRef("");
|
|
return v;
|
|
}
|
|
return defaultValue;
|
|
}
|
|
|
|
// returns unconsumed options
|
|
Standalone<VectorRef<KeyValueRef>> checkAllOptionsConsumed( VectorRef<KeyValueRef> options ) {
|
|
static StringRef nothing = LiteralStringRef("");
|
|
Standalone<VectorRef<KeyValueRef>> unconsumed;
|
|
for(int i = 0; i < options.size(); i++)
|
|
if( !(options[i].value == nothing) ) {
|
|
TraceEvent(SevError,"OptionNotConsumed").detail("Key", options[i].key.toString().c_str()).detail("Value", options[i].value.toString().c_str());
|
|
unconsumed.push_back_deep( unconsumed.arena(), options[i] );
|
|
}
|
|
return unconsumed;
|
|
}
|
|
|
|
struct CompoundWorkload : TestWorkload {
|
|
vector<TestWorkload*> workloads;
|
|
|
|
CompoundWorkload( WorkloadContext& wcx ) : TestWorkload( wcx ) {}
|
|
CompoundWorkload* add( TestWorkload* w ) { workloads.push_back(w); return this; }
|
|
|
|
virtual ~CompoundWorkload() { for(int w=0; w<workloads.size(); w++) delete workloads[w]; }
|
|
virtual std::string description() {
|
|
std::string d;
|
|
for(int w=0; w<workloads.size(); w++)
|
|
d += workloads[w]->description() + (w==workloads.size()-1?"":";");
|
|
return d;
|
|
}
|
|
virtual Future<Void> setup( Database const& cx ) {
|
|
vector<Future<Void>> all;
|
|
for(int w=0; w<workloads.size(); w++)
|
|
all.push_back( workloads[w]->setup(cx) );
|
|
return waitForAll(all);
|
|
}
|
|
virtual Future<Void> start( Database const& cx ) {
|
|
vector<Future<Void>> all;
|
|
for(int w=0; w<workloads.size(); w++)
|
|
all.push_back( workloads[w]->start(cx) );
|
|
return waitForAll(all);
|
|
}
|
|
virtual Future<bool> check( Database const& cx ) {
|
|
vector<Future<bool>> all;
|
|
for(int w=0; w<workloads.size(); w++)
|
|
all.push_back( workloads[w]->check(cx) );
|
|
return allTrue(all);
|
|
}
|
|
virtual void getMetrics( vector<PerfMetric>& m ) {
|
|
for(int w=0; w<workloads.size(); w++) {
|
|
vector<PerfMetric> p;
|
|
workloads[w]->getMetrics(p);
|
|
for(int i=0; i<p.size(); i++)
|
|
m.push_back( p[i].withPrefix( workloads[w]->description()+"." ) );
|
|
}
|
|
}
|
|
virtual double getCheckTimeout() {
|
|
double m = 0;
|
|
for(int w=0; w<workloads.size(); w++)
|
|
m = std::max( workloads[w]->getCheckTimeout(), m );
|
|
return m;
|
|
}
|
|
};
|
|
|
|
TestWorkload *getWorkloadIface( WorkloadRequest work, VectorRef<KeyValueRef> options, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
|
|
Value testName = getOption( options, LiteralStringRef("testName"), LiteralStringRef("no-test-specified") );
|
|
WorkloadContext wcx;
|
|
wcx.clientId = work.clientId;
|
|
wcx.clientCount = work.clientCount;
|
|
wcx.dbInfo = dbInfo;
|
|
wcx.options = options;
|
|
wcx.sharedRandomNumber = work.sharedRandomNumber;
|
|
|
|
TestWorkload *workload = IWorkloadFactory::create( testName.toString(), wcx );
|
|
|
|
auto unconsumedOptions = checkAllOptionsConsumed( workload ? workload->options : VectorRef<KeyValueRef>() );
|
|
if( !workload || unconsumedOptions.size() ) {
|
|
TraceEvent evt(SevError,"TestCreationError");
|
|
evt.detail("TestName", testName);
|
|
if( !workload ) {
|
|
evt.detail("Reason", "Null workload");
|
|
fprintf(stderr, "ERROR: Workload could not be created, perhaps testName (%s) is not a valid workload\n", printable(testName).c_str());
|
|
}
|
|
else {
|
|
evt.detail("Reason", "Not all options consumed");
|
|
fprintf(stderr, "ERROR: Workload had invalid options. The following were unrecognized:\n");
|
|
for(int i = 0; i < unconsumedOptions.size(); i++)
|
|
fprintf(stderr, " '%s' = '%s'\n", unconsumedOptions[i].key.toString().c_str(), unconsumedOptions[i].value.toString().c_str());
|
|
delete workload;
|
|
}
|
|
throw test_specification_invalid();
|
|
}
|
|
return workload;
|
|
}
|
|
|
|
TestWorkload *getWorkloadIface( WorkloadRequest work, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
|
|
if( work.options.size() < 1 ) {
|
|
TraceEvent(SevError,"TestCreationError").detail("Reason", "No options provided");
|
|
fprintf(stderr, "ERROR: No options were provided for workload.\n");
|
|
throw test_specification_invalid();
|
|
}
|
|
if( work.options.size() == 1 )
|
|
return getWorkloadIface( work, work.options[0], dbInfo );
|
|
|
|
WorkloadContext wcx;
|
|
wcx.clientId = work.clientId;
|
|
wcx.clientCount = work.clientCount;
|
|
wcx.sharedRandomNumber = work.sharedRandomNumber;
|
|
// FIXME: Other stuff not filled in; why isn't this constructed here and passed down to the other getWorkloadIface()?
|
|
CompoundWorkload *compound = new CompoundWorkload( wcx );
|
|
for( int i = 0; i < work.options.size(); i++ ) {
|
|
TestWorkload *workload = getWorkloadIface( work, work.options[i], dbInfo );
|
|
compound->add( workload );
|
|
}
|
|
return compound;
|
|
}
|
|
|
|
ACTOR Future<Void> databaseWarmer( Database cx ) {
|
|
loop {
|
|
state Transaction tr( cx );
|
|
wait(success(tr.getReadVersion()));
|
|
wait( delay( 0.25 ) );
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Tries indefinitly to commit a simple, self conflicting transaction
|
|
ACTOR Future<Void> pingDatabase( Database cx ) {
|
|
state Transaction tr( cx );
|
|
loop {
|
|
try {
|
|
tr.setOption( FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE );
|
|
tr.setOption( FDBTransactionOptions::LOCK_AWARE );
|
|
Optional<Value> v = wait( tr.get( StringRef("/Liveness/" + deterministicRandom()->randomUniqueID().toString() ) ) );
|
|
tr.makeSelfConflicting();
|
|
wait( tr.commit() );
|
|
return Void();
|
|
} catch( Error& e ) {
|
|
TraceEvent("PingingDatabaseTransactionError").error(e);
|
|
wait( tr.onError( e ) );
|
|
}
|
|
}
|
|
}
|
|
|
|
ACTOR Future<Void> testDatabaseLiveness( Database cx, double databasePingDelay, string context, double startDelay = 0.0 ) {
|
|
wait( delay( startDelay ) );
|
|
loop {
|
|
try {
|
|
state double start = now();
|
|
auto traceMsg = "PingingDatabaseLiveness_" + context;
|
|
TraceEvent(traceMsg.c_str());
|
|
wait( timeoutError( pingDatabase( cx ), databasePingDelay ) );
|
|
double pingTime = now() - start;
|
|
ASSERT( pingTime > 0 );
|
|
TraceEvent(("PingingDatabaseLivenessDone_" + context).c_str()).detail("TimeTaken", pingTime);
|
|
wait( delay( databasePingDelay - pingTime ) );
|
|
} catch( Error& e ) {
|
|
if( e.code() != error_code_actor_cancelled )
|
|
TraceEvent(SevError, ("PingingDatabaseLivenessError_" + context).c_str()).error(e)
|
|
.detail("PingDelay", databasePingDelay);
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
|
|
template <class T>
|
|
void sendResult( ReplyPromise<T>& reply, Optional<ErrorOr<T>> const& result ) {
|
|
auto& res = result.get();
|
|
if (res.isError())
|
|
reply.sendError(res.getError());
|
|
else
|
|
reply.send(res.get());
|
|
}
|
|
|
|
ACTOR Future<Void> runWorkloadAsync( Database cx, WorkloadInterface workIface, TestWorkload *workload, double databasePingDelay ) {
|
|
state unique_ptr<TestWorkload> delw(workload);
|
|
state Optional<ErrorOr<Void>> setupResult;
|
|
state Optional<ErrorOr<Void>> startResult;
|
|
state Optional<ErrorOr<CheckReply>> checkResult;
|
|
state ReplyPromise<Void> setupReq;
|
|
state ReplyPromise<Void> startReq;
|
|
state ReplyPromise<CheckReply> checkReq;
|
|
|
|
TraceEvent("TestBeginAsync", workIface.id()).detail("Workload", workload->description()).detail("DatabasePingDelay", databasePingDelay);
|
|
|
|
state Future<Void> databaseError = databasePingDelay == 0.0 ? Never() : testDatabaseLiveness( cx, databasePingDelay, "RunWorkloadAsync" );
|
|
|
|
loop choose {
|
|
when( ReplyPromise<Void> req = waitNext( workIface.setup.getFuture() ) ) {
|
|
printf("Test received trigger for setup...\n");
|
|
TraceEvent("TestSetupBeginning", workIface.id()).detail("Workload", workload->description());
|
|
setupReq = req;
|
|
if (!setupResult.present()) {
|
|
try {
|
|
wait( workload->setup(cx) || databaseError );
|
|
TraceEvent("TestSetupComplete", workIface.id()).detail("Workload", workload->description());
|
|
setupResult = Void();
|
|
} catch (Error& e) {
|
|
setupResult = operation_failed();
|
|
TraceEvent(SevError, "TestSetupError", workIface.id()).error(e).detail("Workload", workload->description());
|
|
if( e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete) throw;
|
|
}
|
|
}
|
|
sendResult( setupReq, setupResult );
|
|
}
|
|
when( ReplyPromise<Void> req = waitNext( workIface.start.getFuture() ) ) {
|
|
startReq = req;
|
|
if (!startResult.present()) {
|
|
try {
|
|
TraceEvent("TestStarting", workIface.id()).detail("Workload", workload->description());
|
|
wait( workload->start(cx) || databaseError );
|
|
startResult = Void();
|
|
} catch( Error& e ) {
|
|
startResult = operation_failed();
|
|
if( e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete) throw;
|
|
TraceEvent(SevError,"TestFailure", workIface.id())
|
|
.error(e, true)
|
|
.detail("Reason", "Error starting workload")
|
|
.detail("Workload", workload->description());
|
|
//ok = false;
|
|
}
|
|
TraceEvent("TestComplete", workIface.id()).detail("Workload", workload->description()).detail("OK", !startResult.get().isError());
|
|
printf("%s complete\n", workload->description().c_str());
|
|
}
|
|
sendResult( startReq, startResult );
|
|
}
|
|
when(ReplyPromise<CheckReply> req = waitNext(workIface.check.getFuture())) {
|
|
checkReq = req;
|
|
if (!checkResult.present()) {
|
|
try {
|
|
bool check = wait( timeoutError( workload->check(cx), workload->getCheckTimeout() ) );
|
|
checkResult = CheckReply{ (!startResult.present() || !startResult.get().isError()) && check };
|
|
} catch (Error& e) {
|
|
checkResult = operation_failed(); // was: checkResult = false;
|
|
if( e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete) throw;
|
|
TraceEvent(SevError,"TestFailure", workIface.id())
|
|
.error(e)
|
|
.detail("Reason", "Error checking workload")
|
|
.detail("Workload", workload->description());
|
|
//ok = false;
|
|
}
|
|
}
|
|
|
|
sendResult( checkReq, checkResult );
|
|
}
|
|
when( ReplyPromise<vector<PerfMetric>> req = waitNext( workIface.metrics.getFuture() ) ) {
|
|
state ReplyPromise<vector<PerfMetric>> s_req = req;
|
|
try {
|
|
vector<PerfMetric> m;
|
|
workload->getMetrics( m );
|
|
TraceEvent("WorkloadSendMetrics", workIface.id()).detail( "Count", m.size() );
|
|
req.send( m );
|
|
} catch (Error& e) {
|
|
if( e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete) throw;
|
|
TraceEvent(SevError, "WorkloadSendMetrics", workIface.id()).error(e);
|
|
s_req.sendError( operation_failed() );
|
|
}
|
|
}
|
|
when( ReplyPromise<Void> r = waitNext( workIface.stop.getFuture() ) ) {
|
|
r.send(Void());
|
|
break;
|
|
}
|
|
}
|
|
return Void();
|
|
}
|
|
|
|
ACTOR Future<Void> testerServerWorkload( WorkloadRequest work, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<struct ServerDBInfo>> dbInfo, LocalityData locality ) {
|
|
state WorkloadInterface workIface;
|
|
state bool replied = false;
|
|
state Database cx;
|
|
try {
|
|
std::map<std::string, std::string> details;
|
|
details["WorkloadTitle"] = printable(work.title);
|
|
details["ClientId"] = format("%d", work.clientId);
|
|
details["ClientCount"] = format("%d", work.clientCount);
|
|
details["WorkloadTimeout"] = format("%d", work.timeout);
|
|
startRole(Role::TESTER, workIface.id(), UID(), details);
|
|
|
|
if( work.useDatabase ) {
|
|
cx = Database::createDatabase(ccf, -1, true, locality);
|
|
wait( delay(1.0) );
|
|
}
|
|
|
|
// add test for "done" ?
|
|
TraceEvent("WorkloadReceived", workIface.id()).detail("Title", work.title );
|
|
TestWorkload *workload = getWorkloadIface( work, dbInfo );
|
|
if(!workload) {
|
|
TraceEvent("TestCreationError").detail("Reason", "Workload could not be created");
|
|
fprintf(stderr, "ERROR: The workload could not be created.\n");
|
|
throw test_specification_invalid();
|
|
}
|
|
Future<Void> test = runWorkloadAsync(cx, workIface, workload, work.databasePingDelay) || traceRole(Role::TESTER, workIface.id());
|
|
work.reply.send(workIface);
|
|
replied = true;
|
|
|
|
if(work.timeout > 0) {
|
|
test = timeoutError(test,work.timeout);
|
|
}
|
|
|
|
wait(test);
|
|
|
|
endRole(Role::TESTER, workIface.id(), "Complete");
|
|
} catch (Error& e) {
|
|
if (!replied) {
|
|
if (e.code() == error_code_test_specification_invalid)
|
|
work.reply.sendError(e);
|
|
else
|
|
work.reply.sendError( operation_failed() );
|
|
}
|
|
|
|
bool ok = e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete || e.code() == error_code_actor_cancelled;
|
|
endRole(Role::TESTER, workIface.id(), "Error", ok, e);
|
|
|
|
if (e.code() != error_code_test_specification_invalid && e.code() != error_code_timed_out) {
|
|
throw; // fatal errors will kill the testerServer as well
|
|
}
|
|
}
|
|
return Void();
|
|
}
|
|
|
|
ACTOR Future<Void> testerServerCore( TesterInterface interf, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<struct ServerDBInfo>> dbInfo, LocalityData locality ) {
|
|
state PromiseStream<Future<Void>> addWorkload;
|
|
state Future<Void> workerFatalError = actorCollection(addWorkload.getFuture());
|
|
|
|
TraceEvent("StartingTesterServerCore", interf.id());
|
|
loop choose {
|
|
when (wait(workerFatalError)) {}
|
|
when (WorkloadRequest work = waitNext( interf.recruitments.getFuture() )) {
|
|
addWorkload.send(testerServerWorkload(work, ccf, dbInfo, locality));
|
|
}
|
|
}
|
|
}
|
|
|
|
ACTOR Future<Void> clearData( Database cx ) {
|
|
state Transaction tr( cx );
|
|
loop {
|
|
try {
|
|
// This transaction needs to be self-conflicting, but not conflict consistently with
|
|
// any other transactions
|
|
tr.clear( normalKeys );
|
|
tr.makeSelfConflicting();
|
|
wait(success(tr.getReadVersion())); // required since we use addReadConflictRange but not get
|
|
wait( tr.commit() );
|
|
TraceEvent("TesterClearingDatabase").detail("AtVersion", tr.getCommittedVersion());
|
|
break;
|
|
} catch (Error& e) {
|
|
TraceEvent(SevWarn, "TesterClearingDatabaseError").error(e);
|
|
wait( tr.onError(e) );
|
|
}
|
|
}
|
|
return Void();
|
|
}
|
|
|
|
Future<Void> dumpDatabase( Database const& cx, std::string const& outputFilename, KeyRange const& range );
|
|
|
|
int passCount = 0;
|
|
int failCount = 0;
|
|
|
|
vector<PerfMetric> aggregateMetrics( vector<vector<PerfMetric>> metrics ) {
|
|
std::map<std::string, vector<PerfMetric>> metricMap;
|
|
for(int i = 0; i < metrics.size(); i++) {
|
|
vector<PerfMetric> workloadMetrics = metrics[i];
|
|
TraceEvent("MetricsReturned").detail( "Count", workloadMetrics.size() );
|
|
for(int m=0; m<workloadMetrics.size(); m++) {
|
|
printf( "Metric (%d, %d): %s, %f, %s\n", i, m, workloadMetrics[m].name().c_str(),
|
|
workloadMetrics[m].value(), workloadMetrics[m].formatted().c_str() );
|
|
metricMap[workloadMetrics[m].name()].push_back( workloadMetrics[m] );
|
|
}
|
|
}
|
|
TraceEvent("Metric")
|
|
.detail( "Name", "Reporting Clients" )
|
|
.detail( "Value", (double)metrics.size() )
|
|
.detail( "Formatted", format("%d", metrics.size()).c_str() );
|
|
|
|
vector<PerfMetric> result;
|
|
std::map<std::string, vector<PerfMetric>>::iterator it;
|
|
for( it = metricMap.begin(); it != metricMap.end(); it++ ) {
|
|
auto& vec = it->second;
|
|
if( !vec.size() )
|
|
continue;
|
|
double sum = 0;
|
|
for(int i = 0; i < vec.size(); i++ )
|
|
sum += vec[i].value();
|
|
if( vec[0].averaged() && vec.size() )
|
|
sum /= vec.size();
|
|
result.push_back( PerfMetric( vec[0].name(), sum, false, vec[0].format_code() ) );
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void logMetrics( vector<PerfMetric> metrics ) {
|
|
for(int idx=0; idx < metrics.size(); idx++ )
|
|
TraceEvent("Metric")
|
|
.detail( "Name", metrics[idx].name() )
|
|
.detail( "Value", metrics[idx].value() )
|
|
.detail( "Formatted", format(metrics[idx].format_code().c_str(), metrics[idx].value() ) );
|
|
}
|
|
|
|
template <class T>
|
|
void throwIfError(const std::vector<Future<ErrorOr<T>>> &futures, std::string errorMsg) {
|
|
for(auto &future:futures) {
|
|
if(future.get().isError()) {
|
|
TraceEvent(SevError, errorMsg.c_str()).error(future.get().getError());
|
|
throw future.get().getError();
|
|
}
|
|
}
|
|
}
|
|
|
|
ACTOR Future<DistributedTestResults> runWorkload( Database cx, std::vector< TesterInterface > testers,
|
|
TestSpec spec ) {
|
|
TraceEvent("TestRunning").detail( "WorkloadTitle", spec.title )
|
|
.detail("TesterCount", testers.size()).detail("Phases", spec.phases)
|
|
.detail("TestTimeout", spec.timeout);
|
|
|
|
state vector< Future< WorkloadInterface > > workRequests;
|
|
state vector<vector<PerfMetric>> metricsResults;
|
|
|
|
state int i = 0;
|
|
state int success = 0;
|
|
state int failure = 0;
|
|
int64_t sharedRandom = deterministicRandom()->randomInt64(0,10000000);
|
|
for(; i < testers.size(); i++) {
|
|
WorkloadRequest req;
|
|
req.title = spec.title;
|
|
req.useDatabase = spec.useDB;
|
|
req.timeout = spec.timeout;
|
|
req.databasePingDelay = spec.databasePingDelay;
|
|
req.options = spec.options;
|
|
req.clientId = i;
|
|
req.clientCount = testers.size();
|
|
req.sharedRandomNumber = sharedRandom;
|
|
workRequests.push_back( testers[i].recruitments.getReply( req ) );
|
|
}
|
|
|
|
state vector< WorkloadInterface > workloads = wait( getAll( workRequests ) );
|
|
state double waitForFailureTime = g_network->isSimulated() ? 24*60*60 : 60;
|
|
if( g_network->isSimulated() && spec.simCheckRelocationDuration )
|
|
debug_setCheckRelocationDuration( true );
|
|
|
|
if( spec.phases & TestWorkload::SETUP ) {
|
|
state std::vector< Future<ErrorOr<Void>> > setups;
|
|
printf("setting up test (%s)...\n", printable(spec.title).c_str());
|
|
TraceEvent("TestSetupStart").detail("WorkloadTitle", spec.title);
|
|
for(int i= 0; i < workloads.size(); i++)
|
|
setups.push_back( workloads[i].setup.template getReplyUnlessFailedFor<Void>( waitForFailureTime, 0) );
|
|
wait( waitForAll( setups ) );
|
|
throwIfError(setups, "SetupFailedForWorkload" + printable(spec.title));
|
|
TraceEvent("TestSetupComplete").detail("WorkloadTitle", spec.title);
|
|
}
|
|
|
|
if( spec.phases & TestWorkload::EXECUTION ) {
|
|
TraceEvent("TestStarting").detail("WorkloadTitle", spec.title);
|
|
printf("running test (%s)...\n", printable(spec.title).c_str());
|
|
state std::vector< Future<ErrorOr<Void>> > starts;
|
|
for(int i= 0; i < workloads.size(); i++)
|
|
starts.push_back( workloads[i].start.template getReplyUnlessFailedFor<Void>(waitForFailureTime, 0) );
|
|
wait( waitForAll( starts ) );
|
|
throwIfError(starts, "StartFailedForWorkload" + printable(spec.title));
|
|
printf("%s complete\n", printable(spec.title).c_str());
|
|
TraceEvent("TestComplete").detail("WorkloadTitle", spec.title);
|
|
}
|
|
|
|
if( spec.phases & TestWorkload::CHECK ) {
|
|
if( spec.useDB && ( spec.phases & TestWorkload::EXECUTION ) ) {
|
|
wait( delay(3.0) );
|
|
}
|
|
|
|
state std::vector<Future<ErrorOr<CheckReply>>> checks;
|
|
TraceEvent("CheckingResults");
|
|
|
|
printf("checking test (%s)...\n", printable(spec.title).c_str());
|
|
|
|
for(int i= 0; i < workloads.size(); i++)
|
|
checks.push_back(workloads[i].check.template getReplyUnlessFailedFor<CheckReply>(waitForFailureTime, 0));
|
|
wait( waitForAll( checks ) );
|
|
|
|
throwIfError(checks, "CheckFailedForWorkload" + printable(spec.title));
|
|
|
|
for(int i = 0; i < checks.size(); i++) {
|
|
if (checks[i].get().get().value)
|
|
success++;
|
|
else
|
|
failure++;
|
|
}
|
|
}
|
|
|
|
if( spec.phases & TestWorkload::METRICS ) {
|
|
state std::vector< Future<ErrorOr<vector<PerfMetric>>> > metricTasks;
|
|
printf("fetching metrics (%s)...\n", printable(spec.title).c_str());
|
|
TraceEvent("TestFetchingMetrics").detail("WorkloadTitle", spec.title);
|
|
for(int i= 0; i < workloads.size(); i++)
|
|
metricTasks.push_back( workloads[i].metrics.template getReplyUnlessFailedFor<vector<PerfMetric>>(waitForFailureTime, 0) );
|
|
wait( waitForAll( metricTasks ) );
|
|
throwIfError(metricTasks, "MetricFailedForWorkload" + printable(spec.title));
|
|
for(int i = 0; i < metricTasks.size(); i++) {
|
|
metricsResults.push_back( metricTasks[i].get().get() );
|
|
}
|
|
}
|
|
|
|
// Stopping the workloads is unreliable, but they have a timeout
|
|
// FIXME: stop if one of the above phases throws an exception
|
|
for(int i=0; i<workloads.size(); i++)
|
|
workloads[i].stop.send(ReplyPromise<Void>());
|
|
|
|
return DistributedTestResults( aggregateMetrics( metricsResults ), success, failure );
|
|
}
|
|
|
|
//Sets the database configuration by running the ChangeConfig workload
|
|
ACTOR Future<Void> changeConfiguration(Database cx, std::vector< TesterInterface > testers, StringRef configMode) {
|
|
state TestSpec spec;
|
|
Standalone<VectorRef<KeyValueRef>> options;
|
|
spec.title = LiteralStringRef("ChangeConfig");
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("testName"), LiteralStringRef("ChangeConfig")));
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("configMode"), configMode));
|
|
spec.options.push_back_deep(spec.options.arena(), options);
|
|
|
|
DistributedTestResults testResults = wait(runWorkload(cx, testers, spec));
|
|
|
|
return Void();
|
|
}
|
|
|
|
//Runs the consistency check workload, which verifies that the database is in a consistent state
|
|
ACTOR Future<Void> checkConsistency(Database cx, std::vector< TesterInterface > testers, bool doQuiescentCheck,
|
|
double quiescentWaitTimeout, double softTimeLimit, double databasePingDelay, Reference<AsyncVar<ServerDBInfo>> dbInfo) {
|
|
state TestSpec spec;
|
|
|
|
state double connectionFailures;
|
|
if( g_network->isSimulated() ) {
|
|
connectionFailures = g_simulator.connectionFailuresDisableDuration;
|
|
g_simulator.connectionFailuresDisableDuration = 1e6;
|
|
g_simulator.speedUpSimulation = true;
|
|
}
|
|
|
|
Standalone<VectorRef<KeyValueRef>> options;
|
|
StringRef performQuiescent = LiteralStringRef("false");
|
|
if (doQuiescentCheck) {
|
|
performQuiescent = LiteralStringRef("true");
|
|
}
|
|
spec.title = LiteralStringRef("ConsistencyCheck");
|
|
spec.databasePingDelay = databasePingDelay;
|
|
spec.timeout = 32000;
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("testName"), LiteralStringRef("ConsistencyCheck")));
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("performQuiescentChecks"), performQuiescent));
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("quiescentWaitTimeout"), ValueRef(options.arena(), format("%f", quiescentWaitTimeout))));
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("distributed"), LiteralStringRef("false")));
|
|
spec.options.push_back_deep(spec.options.arena(), options);
|
|
|
|
state double start = now();
|
|
state bool lastRun = false;
|
|
loop {
|
|
DistributedTestResults testResults = wait(runWorkload(cx, testers, spec));
|
|
if(testResults.ok() || lastRun) {
|
|
if( g_network->isSimulated() ) {
|
|
g_simulator.connectionFailuresDisableDuration = connectionFailures;
|
|
}
|
|
return Void();
|
|
}
|
|
if(now() - start > softTimeLimit) {
|
|
spec.options[0].push_back_deep(spec.options.arena(), KeyValueRef(LiteralStringRef("failureIsError"), LiteralStringRef("true")));
|
|
lastRun = true;
|
|
}
|
|
|
|
wait( repairDeadDatacenter(cx, dbInfo, "ConsistencyCheck") );
|
|
}
|
|
}
|
|
|
|
ACTOR Future<bool> runTest( Database cx, std::vector< TesterInterface > testers, TestSpec spec, Reference<AsyncVar<ServerDBInfo>> dbInfo )
|
|
{
|
|
state DistributedTestResults testResults;
|
|
|
|
try {
|
|
Future<DistributedTestResults> fTestResults = runWorkload( cx, testers, spec );
|
|
if( spec.timeout > 0 ) {
|
|
fTestResults = timeoutError( fTestResults, spec.timeout );
|
|
}
|
|
DistributedTestResults _testResults = wait( fTestResults );
|
|
testResults = _testResults;
|
|
logMetrics( testResults.metrics );
|
|
} catch(Error& e) {
|
|
if( e.code() == error_code_timed_out ) {
|
|
TraceEvent(SevError, "TestFailure").error(e).detail("Reason", "Test timed out").detail("Timeout", spec.timeout);
|
|
fprintf(stderr, "ERROR: Test timed out after %d seconds.\n", spec.timeout);
|
|
testResults.failures = testers.size();
|
|
testResults.successes = 0;
|
|
} else
|
|
throw;
|
|
}
|
|
|
|
state bool ok = testResults.ok();
|
|
|
|
if( spec.useDB ) {
|
|
if( spec.dumpAfterTest ) {
|
|
try {
|
|
wait( timeoutError( dumpDatabase( cx, "dump after " + printable(spec.title) + ".html", allKeys ), 30.0 ) );
|
|
} catch (Error& e) {
|
|
TraceEvent(SevError, "TestFailure").error(e).detail("Reason", "Unable to dump database");
|
|
ok = false;
|
|
}
|
|
|
|
wait( delay(1.0) );
|
|
}
|
|
|
|
//Run the consistency check workload
|
|
if(spec.runConsistencyCheck) {
|
|
try {
|
|
bool quiescent = g_network->isSimulated() ? !BUGGIFY : spec.waitForQuiescenceEnd;
|
|
wait(timeoutError(checkConsistency(cx, testers, quiescent, 10000.0, 18000, spec.databasePingDelay, dbInfo), 20000.0));
|
|
}
|
|
catch(Error& e) {
|
|
TraceEvent(SevError, "TestFailure").error(e).detail("Reason", "Unable to perform consistency check");
|
|
ok = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
TraceEvent(ok ? SevInfo : SevWarnAlways, "TestResults")
|
|
.detail("Workload", spec.title)
|
|
.detail("Passed", (int)ok);
|
|
//.detail("Metrics", metricSummary);
|
|
|
|
if (ok) { passCount++; }
|
|
else { failCount++; }
|
|
|
|
printf("%d test clients passed; %d test clients failed\n", testResults.successes, testResults.failures);
|
|
|
|
if( spec.useDB && spec.clearAfterTest ) {
|
|
try {
|
|
TraceEvent("TesterClearingDatabase");
|
|
wait( timeoutError(clearData(cx), 1000.0) );
|
|
} catch (Error& e) {
|
|
TraceEvent(SevError, "ErrorClearingDatabaseAfterTest").error(e);
|
|
throw; // If we didn't do this, we don't want any later tests to run on this DB
|
|
}
|
|
|
|
wait( delay(1.0) );
|
|
}
|
|
|
|
return ok;
|
|
}
|
|
|
|
std::map<std::string, std::function<void(const std::string&)>> testSpecGlobalKeys = {
|
|
// These are read by SimulatedCluster and used before testers exist. Thus, they must
|
|
// be recognized and accepted, but there's no point in placing them into a testSpec.
|
|
{"extraDB", [](const std::string& value) {
|
|
TraceEvent("TestParserTest").detail("ParsedExtraDB", "");
|
|
}},
|
|
{"configureLocked", [](const std::string& value) {
|
|
TraceEvent("TestParserTest").detail("ParsedConfigureLocked", "");
|
|
}},
|
|
{"minimumReplication", [](const std::string& value) {
|
|
TraceEvent("TestParserTest").detail("ParsedMinimumReplication", "");
|
|
}},
|
|
{"minimumRegions", [](const std::string& value) {
|
|
TraceEvent("TestParserTest").detail("ParsedMinimumRegions", "");
|
|
}},
|
|
{"logAntiQuorum", [](const std::string& value) {
|
|
TraceEvent("TestParserTest").detail("ParsedLogAntiQuorum", "");
|
|
}},
|
|
{"buggify", [](const std::string& value) {
|
|
TraceEvent("TestParserTest").detail("ParsedBuggify", "");
|
|
}},
|
|
// The test harness handles NewSeverity events specially.
|
|
{"StderrSeverity", [](const std::string& value) {
|
|
TraceEvent("StderrSeverity").detail("NewSeverity", value);
|
|
}},
|
|
{"ClientInfoLogging", [](const std::string& value) {
|
|
if (value == "false") {
|
|
setNetworkOption(FDBNetworkOptions::DISABLE_CLIENT_STATISTICS_LOGGING);
|
|
}
|
|
// else { } It is enable by default for tester
|
|
TraceEvent("TestParserTest").detail("ClientInfoLogging", value);
|
|
}},
|
|
};
|
|
|
|
std::map<std::string, std::function<void(const std::string& value, TestSpec* spec)>> testSpecTestKeys = {
|
|
{ "testTitle", [](const std::string& value, TestSpec* spec) {
|
|
spec->title = value;
|
|
TraceEvent("TestParserTest").detail("ParsedTest", spec->title );
|
|
}},
|
|
{ "timeout", [](const std::string& value, TestSpec* spec) {
|
|
sscanf( value.c_str(), "%d", &(spec->timeout) );
|
|
ASSERT( spec->timeout > 0 );
|
|
TraceEvent("TestParserTest").detail("ParsedTimeout", spec->timeout);
|
|
}},
|
|
{ "databasePingDelay", [](const std::string& value, TestSpec* spec) {
|
|
double databasePingDelay;
|
|
sscanf( value.c_str(), "%lf", &databasePingDelay );
|
|
ASSERT( databasePingDelay >= 0 );
|
|
if( !spec->useDB && databasePingDelay > 0 ) {
|
|
TraceEvent(SevError, "TestParserError")
|
|
.detail("Reason", "Cannot have non-zero ping delay on test that does not use database")
|
|
.detail("PingDelay", databasePingDelay).detail("UseDB", spec->useDB);
|
|
ASSERT( false );
|
|
}
|
|
spec->databasePingDelay = databasePingDelay;
|
|
TraceEvent("TestParserTest").detail("ParsedPingDelay", spec->databasePingDelay);
|
|
}},
|
|
{ "runSetup", [](const std::string& value, TestSpec* spec) {
|
|
spec->phases = TestWorkload::EXECUTION | TestWorkload::CHECK | TestWorkload::METRICS;
|
|
if( value == "true" )
|
|
spec->phases |= TestWorkload::SETUP;
|
|
TraceEvent("TestParserTest").detail("ParsedSetupFlag", (spec->phases & TestWorkload::SETUP) != 0);
|
|
}},
|
|
{ "dumpAfterTest", [](const std::string& value, TestSpec* spec) {
|
|
spec->dumpAfterTest = ( value == "true" );
|
|
TraceEvent("TestParserTest").detail("ParsedDumpAfter", spec->dumpAfterTest);
|
|
}},
|
|
{ "clearAfterTest", [](const std::string& value, TestSpec* spec) {
|
|
spec->clearAfterTest = ( value == "true" );
|
|
TraceEvent("TestParserTest").detail("ParsedClearAfter", spec->clearAfterTest);
|
|
}},
|
|
{ "useDB", [](const std::string& value, TestSpec* spec) {
|
|
spec->useDB = ( value == "true" );
|
|
TraceEvent("TestParserTest").detail("ParsedUseDB", spec->useDB);
|
|
if( !spec->useDB )
|
|
spec->databasePingDelay = 0.0;
|
|
}},
|
|
{ "startDelay", [](const std::string& value, TestSpec* spec) {
|
|
sscanf( value.c_str(), "%lf", &spec->startDelay );
|
|
TraceEvent("TestParserTest").detail("ParsedStartDelay", spec->startDelay);
|
|
}},
|
|
{ "runConsistencyCheck", [](const std::string& value, TestSpec* spec) {
|
|
spec->runConsistencyCheck = ( value == "true" );
|
|
TraceEvent("TestParserTest").detail("ParsedRunConsistencyCheck", spec->runConsistencyCheck);
|
|
}},
|
|
{ "waitForQuiescence", [](const std::string& value, TestSpec* spec) {
|
|
bool toWait = value == "true";
|
|
spec->waitForQuiescenceBegin = toWait;
|
|
spec->waitForQuiescenceEnd = toWait;
|
|
TraceEvent("TestParserTest").detail("ParsedWaitForQuiescence", toWait);
|
|
}},
|
|
{ "waitForQuiescenceBegin", [](const std::string& value, TestSpec* spec) {
|
|
bool toWait = value == "true";
|
|
spec->waitForQuiescenceBegin = toWait;
|
|
TraceEvent("TestParserTest").detail("ParsedWaitForQuiescenceBegin", toWait);
|
|
}},
|
|
{ "waitForQuiescenceEnd", [](const std::string& value, TestSpec* spec) {
|
|
bool toWait = value == "true";
|
|
spec->waitForQuiescenceEnd = toWait;
|
|
TraceEvent("TestParserTest").detail("ParsedWaitForQuiescenceEnd", toWait);
|
|
}},
|
|
{ "simCheckRelocationDuration", [](const std::string& value, TestSpec* spec) {
|
|
spec->simCheckRelocationDuration = (value == "true");
|
|
TraceEvent("TestParserTest").detail("ParsedSimCheckRelocationDuration", spec->simCheckRelocationDuration);
|
|
}},
|
|
{ "connectionFailuresDisableDuration", [](const std::string& value, TestSpec* spec) {
|
|
double connectionFailuresDisableDuration;
|
|
sscanf( value.c_str(), "%lf", &connectionFailuresDisableDuration );
|
|
ASSERT( connectionFailuresDisableDuration >= 0 );
|
|
spec->simConnectionFailuresDisableDuration = connectionFailuresDisableDuration;
|
|
if(g_network->isSimulated())
|
|
g_simulator.connectionFailuresDisableDuration = spec->simConnectionFailuresDisableDuration;
|
|
TraceEvent("TestParserTest").detail("ParsedSimConnectionFailuresDisableDuration", spec->simConnectionFailuresDisableDuration);
|
|
}},
|
|
{ "simBackupAgents", [](const std::string& value, TestSpec* spec) {
|
|
if (value == "BackupToFile" || value == "BackupToFileAndDB")
|
|
spec->simBackupAgents = ISimulator::BackupToFile;
|
|
else
|
|
spec->simBackupAgents = ISimulator::NoBackupAgents;
|
|
TraceEvent("TestParserTest").detail("ParsedSimBackupAgents", spec->simBackupAgents);
|
|
|
|
if (value == "BackupToDB" || value == "BackupToFileAndDB")
|
|
spec->simDrAgents = ISimulator::BackupToDB;
|
|
else
|
|
spec->simDrAgents = ISimulator::NoBackupAgents;
|
|
TraceEvent("TestParserTest").detail("ParsedSimDrAgents", spec->simDrAgents);
|
|
}},
|
|
{ "checkOnly", [](const std::string& value, TestSpec* spec) {
|
|
if(value == "true")
|
|
spec->phases = TestWorkload::CHECK;
|
|
}},
|
|
};
|
|
|
|
vector<TestSpec> readTests( ifstream& ifs ) {
|
|
TestSpec spec;
|
|
vector<TestSpec> result;
|
|
Standalone< VectorRef< KeyValueRef > > workloadOptions;
|
|
std::string cline;
|
|
bool beforeFirstTest = true;
|
|
bool parsingWorkloads = false;
|
|
|
|
while( ifs.good() ) {
|
|
getline(ifs, cline);
|
|
string line = removeWhitespace( string(cline) );
|
|
if( !line.size() || line.find( ';' ) == 0 )
|
|
continue;
|
|
|
|
size_t found = line.find( '=' );
|
|
if( found == string::npos )
|
|
// hmmm, not good
|
|
continue;
|
|
string attrib = removeWhitespace(line.substr( 0, found ));
|
|
string value = removeWhitespace(line.substr( found + 1 ));
|
|
|
|
if( attrib == "testTitle" ) {
|
|
beforeFirstTest = false;
|
|
parsingWorkloads = false;
|
|
if( workloadOptions.size() ) {
|
|
spec.options.push_back_deep( spec.options.arena(), workloadOptions );
|
|
workloadOptions = Standalone< VectorRef< KeyValueRef > >();
|
|
}
|
|
if( spec.options.size() && spec.title.size() ) {
|
|
result.push_back( spec );
|
|
spec = TestSpec();
|
|
}
|
|
|
|
testSpecTestKeys[attrib](value, &spec);
|
|
} else if ( testSpecTestKeys.find(attrib) != testSpecTestKeys.end() ) {
|
|
if (parsingWorkloads) TraceEvent(SevError, "TestSpecTestParamInWorkload").detail("Attrib", attrib).detail("Value", value);
|
|
testSpecTestKeys[attrib](value, &spec);
|
|
} else if ( testSpecGlobalKeys.find(attrib) != testSpecGlobalKeys.end() ) {
|
|
if (!beforeFirstTest) TraceEvent(SevError, "TestSpecGlobalParamInTest").detail("Attrib", attrib).detail("Value", value);
|
|
testSpecGlobalKeys[attrib](value);
|
|
}
|
|
else {
|
|
if( attrib == "testName" ) {
|
|
parsingWorkloads = true;
|
|
if( workloadOptions.size() ) {
|
|
TraceEvent("TestParserFlush").detail("Reason", "new (compound) test");
|
|
spec.options.push_back_deep( spec.options.arena(), workloadOptions );
|
|
workloadOptions = Standalone< VectorRef< KeyValueRef > >();
|
|
}
|
|
}
|
|
|
|
workloadOptions.push_back_deep( workloadOptions.arena(),
|
|
KeyValueRef( StringRef( attrib ), StringRef( value ) ) );
|
|
TraceEvent("TestParserOption").detail("ParsedKey", attrib).detail("ParsedValue", value);
|
|
}
|
|
}
|
|
if( workloadOptions.size() )
|
|
spec.options.push_back_deep( spec.options.arena(), workloadOptions );
|
|
if( spec.options.size() && spec.title.size() ) {
|
|
result.push_back( spec );
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
template <typename T>
|
|
std::string toml_to_string(const T& value) {
|
|
// TOML formatting converts numbers to strings exactly how they're in the file
|
|
// and thus, is equivalent to testspec. However, strings are quoted, so we
|
|
// must remove the quotes.
|
|
if (value.type() == toml::value_t::string) {
|
|
const std::string& formatted = toml::format(value);
|
|
return formatted.substr(1, formatted.size()-2);
|
|
} else {
|
|
return toml::format(value);
|
|
}
|
|
}
|
|
|
|
|
|
std::vector<TestSpec> readTOMLTests_( std::string fileName ) {
|
|
TestSpec spec;
|
|
Standalone< VectorRef< KeyValueRef > > workloadOptions;
|
|
std::vector<TestSpec> result;
|
|
|
|
const toml::value& conf = toml::parse(fileName);
|
|
|
|
// Handle all global settings
|
|
for (const auto& [k, v] : conf.as_table()) {
|
|
if (k == "test") {
|
|
continue;
|
|
}
|
|
if (testSpecGlobalKeys.find(k) != testSpecGlobalKeys.end()) {
|
|
testSpecGlobalKeys[k](toml_to_string(v));
|
|
} else {
|
|
TraceEvent(SevError, "TestSpecUnrecognizedGlobalParam").detail("Attrib", k).detail("Value", toml_to_string(v));
|
|
}
|
|
}
|
|
|
|
// Then parse each test
|
|
const toml::array& tests = toml::find(conf, "test").as_array();
|
|
for (const toml::value& test : tests) {
|
|
spec = TestSpec();
|
|
|
|
// First handle all test-level settings
|
|
for (const auto& [k, v] : test.as_table()) {
|
|
if (k == "workload") {
|
|
continue;
|
|
}
|
|
if (testSpecTestKeys.find(k) != testSpecTestKeys.end()) {
|
|
testSpecTestKeys[k](toml_to_string(v), &spec);
|
|
} else {
|
|
TraceEvent(SevError, "TestSpecUnrecognizedTestParam").detail("Attrib", k).detail("Value", toml_to_string(v));
|
|
}
|
|
}
|
|
|
|
// And then copy the workload attributes to spec.options
|
|
const toml::array& workloads = toml::find(test, "workload").as_array();
|
|
for (const toml::value& workload : workloads) {
|
|
workloadOptions = Standalone< VectorRef< KeyValueRef > >();
|
|
TraceEvent("TestParserFlush").detail("Reason", "new (compound) test");
|
|
for (const auto& [attrib, v] : workload.as_table()) {
|
|
const std::string& value = toml_to_string(v);
|
|
workloadOptions.push_back_deep( workloadOptions.arena(),
|
|
KeyValueRef( StringRef( attrib ), StringRef( value ) ) );
|
|
TraceEvent("TestParserOption").detail("ParsedKey", attrib).detail("ParsedValue", value);
|
|
}
|
|
spec.options.push_back_deep( spec.options.arena(), workloadOptions );
|
|
}
|
|
|
|
result.push_back(spec);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// A hack to catch and log std::exception, because TOML11 has very useful
|
|
// error messages, but the actor framework can't handle std::exception.
|
|
std::vector<TestSpec> readTOMLTests( std::string fileName ) {
|
|
try {
|
|
return readTOMLTests_( fileName );
|
|
} catch (std::exception &e) {
|
|
std::cerr << e.what() << std::endl;
|
|
TraceEvent("TOMLParseError").detail("Error", printable(e.what()));
|
|
// TODO: replace with toml_parse_error();
|
|
throw unknown_error();
|
|
}
|
|
}
|
|
|
|
ACTOR Future<Void> monitorServerDBInfo(Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
|
|
LocalityData locality,
|
|
Reference<AsyncVar<ServerDBInfo>> dbInfo) {
|
|
// Initially most of the serverDBInfo is not known, but we know our locality right away
|
|
ServerDBInfo localInfo;
|
|
localInfo.myLocality = locality;
|
|
dbInfo->set(localInfo);
|
|
|
|
loop {
|
|
GetServerDBInfoRequest req;
|
|
req.knownServerInfoID = dbInfo->get().id;
|
|
|
|
choose {
|
|
when( ServerDBInfo _localInfo = wait( ccInterface->get().present() ? brokenPromiseToNever( ccInterface->get().get().getServerDBInfo.getReply( req ) ) : Never() ) ) {
|
|
ServerDBInfo localInfo = _localInfo;
|
|
TraceEvent("GotServerDBInfoChange").detail("ChangeID", localInfo.id).detail("MasterID", localInfo.master.id())
|
|
.detail("RatekeeperID", localInfo.ratekeeper.present() ? localInfo.ratekeeper.get().id() : UID())
|
|
.detail("DataDistributorID", localInfo.distributor.present() ? localInfo.distributor.get().id() : UID());
|
|
|
|
localInfo.myLocality = locality;
|
|
dbInfo->set(localInfo);
|
|
}
|
|
when( wait( ccInterface->onChange() ) ) {
|
|
if(ccInterface->get().present())
|
|
TraceEvent("GotCCInterfaceChange").detail("CCID", ccInterface->get().get().id()).detail("CCMachine", ccInterface->get().get().getWorkers.getEndpoint().getPrimaryAddress());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> cc, Reference<AsyncVar<Optional<struct ClusterInterface>>> ci, vector< TesterInterface > testers, vector<TestSpec> tests, StringRef startingConfiguration, LocalityData locality ) {
|
|
state Database cx;
|
|
state Reference<AsyncVar<ServerDBInfo>> dbInfo( new AsyncVar<ServerDBInfo> );
|
|
state Future<Void> ccMonitor = monitorServerDBInfo(cc, LocalityData(), dbInfo); // FIXME: locality
|
|
|
|
state bool useDB = false;
|
|
state bool waitForQuiescenceBegin = false;
|
|
state bool waitForQuiescenceEnd = false;
|
|
state double startDelay = 0.0;
|
|
state double databasePingDelay = 1e9;
|
|
state ISimulator::BackupAgentType simBackupAgents = ISimulator::NoBackupAgents;
|
|
state ISimulator::BackupAgentType simDrAgents = ISimulator::NoBackupAgents;
|
|
state bool enableDD = false;
|
|
if (tests.empty()) useDB = true;
|
|
for( auto iter = tests.begin(); iter != tests.end(); ++iter ) {
|
|
if( iter->useDB ) useDB = true;
|
|
if( iter->waitForQuiescenceBegin ) waitForQuiescenceBegin = true;
|
|
if( iter->waitForQuiescenceEnd ) waitForQuiescenceEnd = true;
|
|
startDelay = std::max( startDelay, iter->startDelay );
|
|
databasePingDelay = std::min( databasePingDelay, iter->databasePingDelay );
|
|
if (iter->simBackupAgents != ISimulator::NoBackupAgents) simBackupAgents = iter->simBackupAgents;
|
|
|
|
if (iter->simDrAgents != ISimulator::NoBackupAgents) {
|
|
simDrAgents = iter->simDrAgents;
|
|
}
|
|
enableDD = enableDD || getOption(iter->options[0], LiteralStringRef("enableDD"), false);
|
|
}
|
|
|
|
if (g_network->isSimulated()) {
|
|
g_simulator.backupAgents = simBackupAgents;
|
|
g_simulator.drAgents = simDrAgents;
|
|
}
|
|
|
|
// turn off the database ping functionality if the suite of tests are not going to be using the database
|
|
if( !useDB )
|
|
databasePingDelay = 0.0;
|
|
|
|
if (useDB) {
|
|
cx = openDBOnServer(dbInfo);
|
|
}
|
|
|
|
state Future<Void> disabler = disableConnectionFailuresAfter(450, "Tester");
|
|
|
|
//Change the configuration (and/or create the database) if necessary
|
|
printf("startingConfiguration:%s start\n", startingConfiguration.toString().c_str());
|
|
if(useDB && startingConfiguration != StringRef()) {
|
|
try {
|
|
wait(timeoutError(changeConfiguration(cx, testers, startingConfiguration), 2000.0));
|
|
if (g_network->isSimulated() && enableDD) {
|
|
wait(success(setDDMode(cx, 1)));
|
|
}
|
|
}
|
|
catch(Error& e) {
|
|
TraceEvent(SevError, "TestFailure").error(e).detail("Reason", "Unable to set starting configuration");
|
|
}
|
|
}
|
|
|
|
if (useDB && waitForQuiescenceBegin) {
|
|
TraceEvent("TesterStartingPreTestChecks").detail("DatabasePingDelay", databasePingDelay).detail("StartDelay", startDelay);
|
|
try {
|
|
wait( quietDatabase( cx, dbInfo, "Start") ||
|
|
( databasePingDelay == 0.0 ? Never() : testDatabaseLiveness( cx, databasePingDelay, "QuietDatabaseStart", startDelay ) ) );
|
|
} catch( Error& e ) {
|
|
TraceEvent("QuietDatabaseStartExternalError").error(e);
|
|
throw;
|
|
}
|
|
}
|
|
|
|
TraceEvent("TestsExpectedToPass").detail("Count", tests.size());
|
|
state int idx = 0;
|
|
for(; idx < tests.size(); idx++ ) {
|
|
printf("Run test:%s start\n", tests[idx].title.toString().c_str());
|
|
wait(success(runTest(cx, testers, tests[idx], dbInfo)));
|
|
printf("Run test:%s Done.\n", tests[idx].title.toString().c_str());
|
|
// do we handle a failure here?
|
|
}
|
|
|
|
printf("\n%d tests passed; %d tests failed.\n", passCount, failCount);
|
|
|
|
//If the database was deleted during the workload we need to recreate the database
|
|
if(tests.empty() || useDB) {
|
|
if(waitForQuiescenceEnd) {
|
|
printf("Waiting for DD to end...\n");
|
|
try {
|
|
wait(quietDatabase(cx, dbInfo, "End", 0, 2e6, 2e6) ||
|
|
(databasePingDelay == 0.0 ? Never()
|
|
: testDatabaseLiveness(cx, databasePingDelay, "QuietDatabaseEnd")));
|
|
} catch( Error& e ) {
|
|
TraceEvent("QuietDatabaseEndExternalError").error(e);
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
printf("\n");
|
|
|
|
return Void();
|
|
}
|
|
|
|
ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> cc,
|
|
Reference<AsyncVar<Optional<struct ClusterInterface>>> ci, vector<TestSpec> tests, test_location_t at,
|
|
int minTestersExpected, StringRef startingConfiguration, LocalityData locality ) {
|
|
state int flags = (at == TEST_ON_SERVERS ? 0 : GetWorkersRequest::TESTER_CLASS_ONLY) | GetWorkersRequest::NON_EXCLUDED_PROCESSES_ONLY;
|
|
state Future<Void> testerTimeout = delay(600.0); // wait 600 sec for testers to show up
|
|
state vector<WorkerDetails> workers;
|
|
|
|
loop {
|
|
choose {
|
|
when( vector<WorkerDetails> w = wait( cc->get().present() ? brokenPromiseToNever( cc->get().get().getWorkers.getReply( GetWorkersRequest( flags ) ) ) : Never() ) ) {
|
|
if (w.size() >= minTestersExpected) {
|
|
workers = w;
|
|
break;
|
|
}
|
|
wait( delay(SERVER_KNOBS->WORKER_POLL_DELAY) );
|
|
}
|
|
when( wait( cc->onChange() ) ) {}
|
|
when( wait( testerTimeout ) ) {
|
|
TraceEvent(SevError, "TesterRecruitmentTimeout");
|
|
throw timed_out();
|
|
}
|
|
}
|
|
}
|
|
|
|
vector<TesterInterface> ts;
|
|
for(int i=0; i<workers.size(); i++)
|
|
ts.push_back(workers[i].interf.testerInterface);
|
|
|
|
wait( runTests( cc, ci, ts, tests, startingConfiguration, locality) );
|
|
return Void();
|
|
}
|
|
|
|
ACTOR Future<Void> runTests( Reference<ClusterConnectionFile> connFile, test_type_t whatToRun, test_location_t at,
|
|
int minTestersExpected, std::string fileName, StringRef startingConfiguration, LocalityData locality ) {
|
|
state vector<TestSpec> testSpecs;
|
|
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> cc( new AsyncVar<Optional<ClusterControllerFullInterface>> );
|
|
Reference<AsyncVar<Optional<ClusterInterface>>> ci( new AsyncVar<Optional<ClusterInterface>> );
|
|
vector<Future<Void>> actors;
|
|
actors.push_back( reportErrors(monitorLeader( connFile, cc ), "MonitorLeader") );
|
|
actors.push_back( reportErrors(extractClusterInterface( cc,ci ),"ExtractClusterInterface") );
|
|
|
|
if(whatToRun == TEST_TYPE_CONSISTENCY_CHECK) {
|
|
TestSpec spec;
|
|
Standalone<VectorRef<KeyValueRef>> options;
|
|
spec.title = LiteralStringRef("ConsistencyCheck");
|
|
spec.databasePingDelay = 0;
|
|
spec.timeout = 0;
|
|
spec.waitForQuiescenceBegin = false;
|
|
spec.waitForQuiescenceEnd = false;
|
|
std::string rateLimitMax = format("%d", CLIENT_KNOBS->CONSISTENCY_CHECK_RATE_LIMIT_MAX);
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("testName"), LiteralStringRef("ConsistencyCheck")));
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("performQuiescentChecks"), LiteralStringRef("false")));
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("distributed"), LiteralStringRef("false")));
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("failureIsError"), LiteralStringRef("true")));
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("indefinite"), LiteralStringRef("true")));
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("rateLimitMax"), StringRef(rateLimitMax)));
|
|
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("shuffleShards"), LiteralStringRef("true")));
|
|
spec.options.push_back_deep(spec.options.arena(), options);
|
|
testSpecs.push_back(spec);
|
|
} else {
|
|
ifstream ifs;
|
|
ifs.open( fileName.c_str(), ifstream::in );
|
|
if( !ifs.good() ) {
|
|
TraceEvent(SevError, "TestHarnessFail").detail("Reason", "file open failed").detail("File", fileName.c_str());
|
|
fprintf(stderr, "ERROR: Could not open file `%s'\n", fileName.c_str());
|
|
return Void();
|
|
}
|
|
enableClientInfoLogging(); // Enable Client Info logging by default for tester
|
|
if ( boost::algorithm::ends_with(fileName, ".txt") ) {
|
|
testSpecs = readTests( ifs );
|
|
} else if ( boost::algorithm::ends_with(fileName, ".toml") ) {
|
|
// TOML is weird about opening the file as binary on windows, so we
|
|
// just let TOML re-open the file instead of using ifs.
|
|
testSpecs = readTOMLTests( fileName );
|
|
} else {
|
|
TraceEvent(SevError, "TestHarnessFail").detail("Reason", "unknown tests specification extension").detail("File", fileName.c_str());
|
|
return Void();
|
|
}
|
|
ifs.close();
|
|
}
|
|
|
|
Future<Void> tests;
|
|
if (at == TEST_HERE) {
|
|
Reference<AsyncVar<ServerDBInfo>> db( new AsyncVar<ServerDBInfo> );
|
|
vector<TesterInterface> iTesters(1);
|
|
actors.push_back( reportErrors(monitorServerDBInfo( cc, LocalityData(), db ), "MonitorServerDBInfo") ); // FIXME: Locality
|
|
actors.push_back( reportErrors(testerServerCore( iTesters[0], connFile, db, locality ), "TesterServerCore") );
|
|
tests = runTests( cc, ci, iTesters, testSpecs, startingConfiguration, locality );
|
|
} else {
|
|
tests = reportErrors(runTests(cc, ci, testSpecs, at, minTestersExpected, startingConfiguration, locality), "RunTests");
|
|
}
|
|
|
|
choose {
|
|
when (wait(tests)) { return Void(); }
|
|
when (wait(quorum(actors, 1))) { ASSERT(false); throw internal_error(); }
|
|
}
|
|
}
|