foundationdb/fdbserver/tester.actor.cpp

1381 lines
52 KiB
C++

/*
* tester.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <boost/algorithm/string/predicate.hpp>
#include <cinttypes>
#include <fstream>
#include <functional>
#include <map>
#include <toml.hpp>
#include "flow/ActorCollection.h"
#include "fdbrpc/sim_validation.h"
#include "fdbrpc/simulator.h"
#include "fdbclient/ClusterInterface.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/SystemData.h"
#include "fdbserver/TesterInterface.actor.h"
#include "fdbserver/WorkerInterface.actor.h"
#include "fdbserver/workloads/workloads.actor.h"
#include "fdbserver/Status.h"
#include "fdbserver/QuietDatabase.h"
#include "fdbclient/MonitorLeader.h"
#include "fdbserver/CoordinationInterface.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbserver/WorkerInterface.actor.h"
#include "flow/actorcompiler.h" // This must be the last #include.
using namespace std;
WorkloadContext::WorkloadContext() {}
WorkloadContext::WorkloadContext( const WorkloadContext& r )
: options(r.options), clientId(r.clientId), clientCount(r.clientCount),
dbInfo(r.dbInfo), sharedRandomNumber(r.sharedRandomNumber)
{
}
WorkloadContext::~WorkloadContext() {}
const char HEX_CHAR_LOOKUP[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
void emplaceIndex( uint8_t *data, int offset, int64_t index ) {
for( int i = 0; i < 16; i++ ) {
data[(15-i) + offset] = HEX_CHAR_LOOKUP[index & 0xf];
index = index>>4;
}
}
Key doubleToTestKey( double p ) {
return StringRef(format("%016llx", *(uint64_t*)&p));
}
double testKeyToDouble( const KeyRef& p ) {
uint64_t x = 0;
sscanf( p.toString().c_str(), "%" SCNx64, &x );
return *(double*)&x;
}
Key doubleToTestKey(double p, const KeyRef& prefix) {
return doubleToTestKey(p).withPrefix(prefix);
}
Key KVWorkload::getRandomKey() {
return getRandomKey(absentFrac);
}
Key KVWorkload::getRandomKey(double absentFrac) {
if ( absentFrac > 0.0000001 ) {
return getRandomKey(deterministicRandom()->random01() < absentFrac);
} else {
return getRandomKey(false);
}
}
Key KVWorkload::getRandomKey(bool absent) {
return keyForIndex(deterministicRandom()->randomInt( 0, nodeCount ), absent);
}
Key KVWorkload::keyForIndex( uint64_t index ) {
if ( absentFrac > 0.0000001 ) {
return keyForIndex(index, deterministicRandom()->random01() < absentFrac);
} else {
return keyForIndex(index, false);
}
}
Key KVWorkload::keyForIndex( uint64_t index, bool absent ) {
int adjustedKeyBytes = (absent) ? (keyBytes + 1) : keyBytes;
Key result = makeString( adjustedKeyBytes );
uint8_t* data = mutateString( result );
memset(data, '.', adjustedKeyBytes);
int idx = 0;
if( nodePrefix > 0 ) {
ASSERT(keyBytes >= 32);
emplaceIndex( data, 0, nodePrefix );
idx += 16;
}
ASSERT(keyBytes >= 16);
double d = double(index) / nodeCount;
emplaceIndex( data, idx, *(int64_t*)&d );
return result;
}
double testKeyToDouble(const KeyRef& p, const KeyRef& prefix) {
return testKeyToDouble(p.removePrefix(prefix));
}
ACTOR Future<Void> poisson( double *last, double meanInterval ) {
*last += meanInterval*-log( deterministicRandom()->random01() );
wait( delayUntil( *last ) );
return Void();
}
ACTOR Future<Void> uniform( double *last, double meanInterval ) {
*last += meanInterval;
wait( delayUntil( *last ) );
return Void();
}
Value getOption( VectorRef<KeyValueRef> options, Key key, Value defaultValue) {
for(int i = 0; i < options.size(); i++)
if( options[i].key == key ) {
Value value = options[i].value;
options[i].value = LiteralStringRef("");
return value;
}
return defaultValue;
}
int getOption( VectorRef<KeyValueRef> options, Key key, int defaultValue) {
for(int i = 0; i < options.size(); i++)
if( options[i].key == key ) {
int r;
if( sscanf(options[i].value.toString().c_str(), "%d", &r) ) {
options[i].value = LiteralStringRef("");
return r;
} else {
TraceEvent(SevError, "InvalidTestOption").detail("OptionName", key);
throw test_specification_invalid();
}
}
return defaultValue;
}
uint64_t getOption( VectorRef<KeyValueRef> options, Key key, uint64_t defaultValue) {
for(int i = 0; i < options.size(); i++)
if( options[i].key == key ) {
uint64_t r;
if( sscanf(options[i].value.toString().c_str(), "%" SCNd64, &r) ) {
options[i].value = LiteralStringRef("");
return r;
} else {
TraceEvent(SevError, "InvalidTestOption").detail("OptionName", key);
throw test_specification_invalid();
}
}
return defaultValue;
}
int64_t getOption( VectorRef<KeyValueRef> options, Key key, int64_t defaultValue) {
for(int i = 0; i < options.size(); i++)
if( options[i].key == key ) {
int64_t r;
if( sscanf(options[i].value.toString().c_str(), "%" SCNd64, &r) ) {
options[i].value = LiteralStringRef("");
return r;
} else {
TraceEvent(SevError, "InvalidTestOption").detail("OptionName", key);
throw test_specification_invalid();
}
}
return defaultValue;
}
double getOption( VectorRef<KeyValueRef> options, Key key, double defaultValue) {
for(int i = 0; i < options.size(); i++)
if( options[i].key == key ) {
float r;
if( sscanf(options[i].value.toString().c_str(), "%f", &r) ) {
options[i].value = LiteralStringRef("");
return r;
}
}
return defaultValue;
}
bool getOption( VectorRef<KeyValueRef> options, Key key, bool defaultValue ) {
Value p = getOption(options, key, defaultValue ? LiteralStringRef("true") : LiteralStringRef("false"));
if (p == LiteralStringRef("true"))
return true;
if (p == LiteralStringRef("false"))
return false;
ASSERT(false);
return false; // Assure that compiler is fine with the function
}
vector<std::string> getOption( VectorRef<KeyValueRef> options, Key key, vector<std::string> defaultValue ) {
for(int i = 0; i < options.size(); i++)
if( options[i].key == key ) {
vector<std::string> v;
int begin = 0;
for(int c=0; c<options[i].value.size(); c++)
if (options[i].value[c] == ',') {
v.push_back( options[i].value.substr(begin, c-begin).toString() );
begin = c+1;
}
v.push_back(options[i].value.substr(begin).toString());
options[i].value = LiteralStringRef("");
return v;
}
return defaultValue;
}
// returns unconsumed options
Standalone<VectorRef<KeyValueRef>> checkAllOptionsConsumed( VectorRef<KeyValueRef> options ) {
static StringRef nothing = LiteralStringRef("");
Standalone<VectorRef<KeyValueRef>> unconsumed;
for(int i = 0; i < options.size(); i++)
if( !(options[i].value == nothing) ) {
TraceEvent(SevError,"OptionNotConsumed").detail("Key", options[i].key.toString().c_str()).detail("Value", options[i].value.toString().c_str());
unconsumed.push_back_deep( unconsumed.arena(), options[i] );
}
return unconsumed;
}
struct CompoundWorkload : TestWorkload {
vector<TestWorkload*> workloads;
CompoundWorkload( WorkloadContext& wcx ) : TestWorkload( wcx ) {}
CompoundWorkload* add( TestWorkload* w ) { workloads.push_back(w); return this; }
virtual ~CompoundWorkload() { for(int w=0; w<workloads.size(); w++) delete workloads[w]; }
virtual std::string description() {
std::string d;
for(int w=0; w<workloads.size(); w++)
d += workloads[w]->description() + (w==workloads.size()-1?"":";");
return d;
}
virtual Future<Void> setup( Database const& cx ) {
vector<Future<Void>> all;
for(int w=0; w<workloads.size(); w++)
all.push_back( workloads[w]->setup(cx) );
return waitForAll(all);
}
virtual Future<Void> start( Database const& cx ) {
vector<Future<Void>> all;
for(int w=0; w<workloads.size(); w++)
all.push_back( workloads[w]->start(cx) );
return waitForAll(all);
}
virtual Future<bool> check( Database const& cx ) {
vector<Future<bool>> all;
for(int w=0; w<workloads.size(); w++)
all.push_back( workloads[w]->check(cx) );
return allTrue(all);
}
virtual void getMetrics( vector<PerfMetric>& m ) {
for(int w=0; w<workloads.size(); w++) {
vector<PerfMetric> p;
workloads[w]->getMetrics(p);
for(int i=0; i<p.size(); i++)
m.push_back( p[i].withPrefix( workloads[w]->description()+"." ) );
}
}
virtual double getCheckTimeout() {
double m = 0;
for(int w=0; w<workloads.size(); w++)
m = std::max( workloads[w]->getCheckTimeout(), m );
return m;
}
};
TestWorkload *getWorkloadIface( WorkloadRequest work, VectorRef<KeyValueRef> options, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
Value testName = getOption( options, LiteralStringRef("testName"), LiteralStringRef("no-test-specified") );
WorkloadContext wcx;
wcx.clientId = work.clientId;
wcx.clientCount = work.clientCount;
wcx.dbInfo = dbInfo;
wcx.options = options;
wcx.sharedRandomNumber = work.sharedRandomNumber;
TestWorkload *workload = IWorkloadFactory::create( testName.toString(), wcx );
auto unconsumedOptions = checkAllOptionsConsumed( workload ? workload->options : VectorRef<KeyValueRef>() );
if( !workload || unconsumedOptions.size() ) {
TraceEvent evt(SevError,"TestCreationError");
evt.detail("TestName", testName);
if( !workload ) {
evt.detail("Reason", "Null workload");
fprintf(stderr, "ERROR: Workload could not be created, perhaps testName (%s) is not a valid workload\n", printable(testName).c_str());
}
else {
evt.detail("Reason", "Not all options consumed");
fprintf(stderr, "ERROR: Workload had invalid options. The following were unrecognized:\n");
for(int i = 0; i < unconsumedOptions.size(); i++)
fprintf(stderr, " '%s' = '%s'\n", unconsumedOptions[i].key.toString().c_str(), unconsumedOptions[i].value.toString().c_str());
delete workload;
}
throw test_specification_invalid();
}
return workload;
}
TestWorkload *getWorkloadIface( WorkloadRequest work, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
if( work.options.size() < 1 ) {
TraceEvent(SevError,"TestCreationError").detail("Reason", "No options provided");
fprintf(stderr, "ERROR: No options were provided for workload.\n");
throw test_specification_invalid();
}
if( work.options.size() == 1 )
return getWorkloadIface( work, work.options[0], dbInfo );
WorkloadContext wcx;
wcx.clientId = work.clientId;
wcx.clientCount = work.clientCount;
wcx.sharedRandomNumber = work.sharedRandomNumber;
// FIXME: Other stuff not filled in; why isn't this constructed here and passed down to the other getWorkloadIface()?
CompoundWorkload *compound = new CompoundWorkload( wcx );
for( int i = 0; i < work.options.size(); i++ ) {
TestWorkload *workload = getWorkloadIface( work, work.options[i], dbInfo );
compound->add( workload );
}
return compound;
}
ACTOR Future<Void> databaseWarmer( Database cx ) {
loop {
state Transaction tr( cx );
wait(success(tr.getReadVersion()));
wait( delay( 0.25 ) );
}
}
// Tries indefinitly to commit a simple, self conflicting transaction
ACTOR Future<Void> pingDatabase( Database cx ) {
state Transaction tr( cx );
loop {
try {
tr.setOption( FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE );
tr.setOption( FDBTransactionOptions::LOCK_AWARE );
Optional<Value> v = wait( tr.get( StringRef("/Liveness/" + deterministicRandom()->randomUniqueID().toString() ) ) );
tr.makeSelfConflicting();
wait( tr.commit() );
return Void();
} catch( Error& e ) {
TraceEvent("PingingDatabaseTransactionError").error(e);
wait( tr.onError( e ) );
}
}
}
ACTOR Future<Void> testDatabaseLiveness( Database cx, double databasePingDelay, string context, double startDelay = 0.0 ) {
wait( delay( startDelay ) );
loop {
try {
state double start = now();
auto traceMsg = "PingingDatabaseLiveness_" + context;
TraceEvent(traceMsg.c_str());
wait( timeoutError( pingDatabase( cx ), databasePingDelay ) );
double pingTime = now() - start;
ASSERT( pingTime > 0 );
TraceEvent(("PingingDatabaseLivenessDone_" + context).c_str()).detail("TimeTaken", pingTime);
wait( delay( databasePingDelay - pingTime ) );
} catch( Error& e ) {
if( e.code() != error_code_actor_cancelled )
TraceEvent(SevError, ("PingingDatabaseLivenessError_" + context).c_str()).error(e)
.detail("PingDelay", databasePingDelay);
throw;
}
}
}
template <class T>
void sendResult( ReplyPromise<T>& reply, Optional<ErrorOr<T>> const& result ) {
auto& res = result.get();
if (res.isError())
reply.sendError(res.getError());
else
reply.send(res.get());
}
ACTOR Future<Void> runWorkloadAsync( Database cx, WorkloadInterface workIface, TestWorkload *workload, double databasePingDelay ) {
state unique_ptr<TestWorkload> delw(workload);
state Optional<ErrorOr<Void>> setupResult;
state Optional<ErrorOr<Void>> startResult;
state Optional<ErrorOr<CheckReply>> checkResult;
state ReplyPromise<Void> setupReq;
state ReplyPromise<Void> startReq;
state ReplyPromise<CheckReply> checkReq;
TraceEvent("TestBeginAsync", workIface.id()).detail("Workload", workload->description()).detail("DatabasePingDelay", databasePingDelay);
state Future<Void> databaseError = databasePingDelay == 0.0 ? Never() : testDatabaseLiveness( cx, databasePingDelay, "RunWorkloadAsync" );
loop choose {
when( ReplyPromise<Void> req = waitNext( workIface.setup.getFuture() ) ) {
printf("Test received trigger for setup...\n");
TraceEvent("TestSetupBeginning", workIface.id()).detail("Workload", workload->description());
setupReq = req;
if (!setupResult.present()) {
try {
wait( workload->setup(cx) || databaseError );
TraceEvent("TestSetupComplete", workIface.id()).detail("Workload", workload->description());
setupResult = Void();
} catch (Error& e) {
setupResult = operation_failed();
TraceEvent(SevError, "TestSetupError", workIface.id()).error(e).detail("Workload", workload->description());
if( e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete) throw;
}
}
sendResult( setupReq, setupResult );
}
when( ReplyPromise<Void> req = waitNext( workIface.start.getFuture() ) ) {
startReq = req;
if (!startResult.present()) {
try {
TraceEvent("TestStarting", workIface.id()).detail("Workload", workload->description());
wait( workload->start(cx) || databaseError );
startResult = Void();
} catch( Error& e ) {
startResult = operation_failed();
if( e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete) throw;
TraceEvent(SevError,"TestFailure", workIface.id())
.error(e, true)
.detail("Reason", "Error starting workload")
.detail("Workload", workload->description());
//ok = false;
}
TraceEvent("TestComplete", workIface.id()).detail("Workload", workload->description()).detail("OK", !startResult.get().isError());
printf("%s complete\n", workload->description().c_str());
}
sendResult( startReq, startResult );
}
when(ReplyPromise<CheckReply> req = waitNext(workIface.check.getFuture())) {
checkReq = req;
if (!checkResult.present()) {
try {
bool check = wait( timeoutError( workload->check(cx), workload->getCheckTimeout() ) );
checkResult = CheckReply{ (!startResult.present() || !startResult.get().isError()) && check };
} catch (Error& e) {
checkResult = operation_failed(); // was: checkResult = false;
if( e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete) throw;
TraceEvent(SevError,"TestFailure", workIface.id())
.error(e)
.detail("Reason", "Error checking workload")
.detail("Workload", workload->description());
//ok = false;
}
}
sendResult( checkReq, checkResult );
}
when( ReplyPromise<vector<PerfMetric>> req = waitNext( workIface.metrics.getFuture() ) ) {
state ReplyPromise<vector<PerfMetric>> s_req = req;
try {
vector<PerfMetric> m;
workload->getMetrics( m );
TraceEvent("WorkloadSendMetrics", workIface.id()).detail( "Count", m.size() );
req.send( m );
} catch (Error& e) {
if( e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete) throw;
TraceEvent(SevError, "WorkloadSendMetrics", workIface.id()).error(e);
s_req.sendError( operation_failed() );
}
}
when( ReplyPromise<Void> r = waitNext( workIface.stop.getFuture() ) ) {
r.send(Void());
break;
}
}
return Void();
}
ACTOR Future<Void> testerServerWorkload( WorkloadRequest work, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<struct ServerDBInfo>> dbInfo, LocalityData locality ) {
state WorkloadInterface workIface;
state bool replied = false;
state Database cx;
try {
std::map<std::string, std::string> details;
details["WorkloadTitle"] = printable(work.title);
details["ClientId"] = format("%d", work.clientId);
details["ClientCount"] = format("%d", work.clientCount);
details["WorkloadTimeout"] = format("%d", work.timeout);
startRole(Role::TESTER, workIface.id(), UID(), details);
if( work.useDatabase ) {
cx = Database::createDatabase(ccf, -1, true, locality);
wait( delay(1.0) );
}
// add test for "done" ?
TraceEvent("WorkloadReceived", workIface.id()).detail("Title", work.title );
TestWorkload *workload = getWorkloadIface( work, dbInfo );
if(!workload) {
TraceEvent("TestCreationError").detail("Reason", "Workload could not be created");
fprintf(stderr, "ERROR: The workload could not be created.\n");
throw test_specification_invalid();
}
Future<Void> test = runWorkloadAsync(cx, workIface, workload, work.databasePingDelay) || traceRole(Role::TESTER, workIface.id());
work.reply.send(workIface);
replied = true;
if(work.timeout > 0) {
test = timeoutError(test,work.timeout);
}
wait(test);
endRole(Role::TESTER, workIface.id(), "Complete");
} catch (Error& e) {
if (!replied) {
if (e.code() == error_code_test_specification_invalid)
work.reply.sendError(e);
else
work.reply.sendError( operation_failed() );
}
bool ok = e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete || e.code() == error_code_actor_cancelled;
endRole(Role::TESTER, workIface.id(), "Error", ok, e);
if (e.code() != error_code_test_specification_invalid && e.code() != error_code_timed_out) {
throw; // fatal errors will kill the testerServer as well
}
}
return Void();
}
ACTOR Future<Void> testerServerCore( TesterInterface interf, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<struct ServerDBInfo>> dbInfo, LocalityData locality ) {
state PromiseStream<Future<Void>> addWorkload;
state Future<Void> workerFatalError = actorCollection(addWorkload.getFuture());
TraceEvent("StartingTesterServerCore", interf.id());
loop choose {
when (wait(workerFatalError)) {}
when (WorkloadRequest work = waitNext( interf.recruitments.getFuture() )) {
addWorkload.send(testerServerWorkload(work, ccf, dbInfo, locality));
}
}
}
ACTOR Future<Void> clearData( Database cx ) {
state Transaction tr( cx );
loop {
try {
// This transaction needs to be self-conflicting, but not conflict consistently with
// any other transactions
tr.clear( normalKeys );
tr.makeSelfConflicting();
wait(success(tr.getReadVersion())); // required since we use addReadConflictRange but not get
wait( tr.commit() );
TraceEvent("TesterClearingDatabase").detail("AtVersion", tr.getCommittedVersion());
break;
} catch (Error& e) {
TraceEvent(SevWarn, "TesterClearingDatabaseError").error(e);
wait( tr.onError(e) );
}
}
return Void();
}
Future<Void> dumpDatabase( Database const& cx, std::string const& outputFilename, KeyRange const& range );
int passCount = 0;
int failCount = 0;
vector<PerfMetric> aggregateMetrics( vector<vector<PerfMetric>> metrics ) {
std::map<std::string, vector<PerfMetric>> metricMap;
for(int i = 0; i < metrics.size(); i++) {
vector<PerfMetric> workloadMetrics = metrics[i];
TraceEvent("MetricsReturned").detail( "Count", workloadMetrics.size() );
for(int m=0; m<workloadMetrics.size(); m++) {
printf( "Metric (%d, %d): %s, %f, %s\n", i, m, workloadMetrics[m].name().c_str(),
workloadMetrics[m].value(), workloadMetrics[m].formatted().c_str() );
metricMap[workloadMetrics[m].name()].push_back( workloadMetrics[m] );
}
}
TraceEvent("Metric")
.detail( "Name", "Reporting Clients" )
.detail( "Value", (double)metrics.size() )
.detail( "Formatted", format("%d", metrics.size()).c_str() );
vector<PerfMetric> result;
std::map<std::string, vector<PerfMetric>>::iterator it;
for( it = metricMap.begin(); it != metricMap.end(); it++ ) {
auto& vec = it->second;
if( !vec.size() )
continue;
double sum = 0;
for(int i = 0; i < vec.size(); i++ )
sum += vec[i].value();
if( vec[0].averaged() && vec.size() )
sum /= vec.size();
result.push_back( PerfMetric( vec[0].name(), sum, false, vec[0].format_code() ) );
}
return result;
}
void logMetrics( vector<PerfMetric> metrics ) {
for(int idx=0; idx < metrics.size(); idx++ )
TraceEvent("Metric")
.detail( "Name", metrics[idx].name() )
.detail( "Value", metrics[idx].value() )
.detail( "Formatted", format(metrics[idx].format_code().c_str(), metrics[idx].value() ) );
}
template <class T>
void throwIfError(const std::vector<Future<ErrorOr<T>>> &futures, std::string errorMsg) {
for(auto &future:futures) {
if(future.get().isError()) {
TraceEvent(SevError, errorMsg.c_str()).error(future.get().getError());
throw future.get().getError();
}
}
}
ACTOR Future<DistributedTestResults> runWorkload( Database cx, std::vector< TesterInterface > testers,
TestSpec spec ) {
TraceEvent("TestRunning").detail( "WorkloadTitle", spec.title )
.detail("TesterCount", testers.size()).detail("Phases", spec.phases)
.detail("TestTimeout", spec.timeout);
state vector< Future< WorkloadInterface > > workRequests;
state vector<vector<PerfMetric>> metricsResults;
state int i = 0;
state int success = 0;
state int failure = 0;
int64_t sharedRandom = deterministicRandom()->randomInt64(0,10000000);
for(; i < testers.size(); i++) {
WorkloadRequest req;
req.title = spec.title;
req.useDatabase = spec.useDB;
req.timeout = spec.timeout;
req.databasePingDelay = spec.databasePingDelay;
req.options = spec.options;
req.clientId = i;
req.clientCount = testers.size();
req.sharedRandomNumber = sharedRandom;
workRequests.push_back( testers[i].recruitments.getReply( req ) );
}
state vector< WorkloadInterface > workloads = wait( getAll( workRequests ) );
state double waitForFailureTime = g_network->isSimulated() ? 24*60*60 : 60;
if( g_network->isSimulated() && spec.simCheckRelocationDuration )
debug_setCheckRelocationDuration( true );
if( spec.phases & TestWorkload::SETUP ) {
state std::vector< Future<ErrorOr<Void>> > setups;
printf("setting up test (%s)...\n", printable(spec.title).c_str());
TraceEvent("TestSetupStart").detail("WorkloadTitle", spec.title);
for(int i= 0; i < workloads.size(); i++)
setups.push_back( workloads[i].setup.template getReplyUnlessFailedFor<Void>( waitForFailureTime, 0) );
wait( waitForAll( setups ) );
throwIfError(setups, "SetupFailedForWorkload" + printable(spec.title));
TraceEvent("TestSetupComplete").detail("WorkloadTitle", spec.title);
}
if( spec.phases & TestWorkload::EXECUTION ) {
TraceEvent("TestStarting").detail("WorkloadTitle", spec.title);
printf("running test (%s)...\n", printable(spec.title).c_str());
state std::vector< Future<ErrorOr<Void>> > starts;
for(int i= 0; i < workloads.size(); i++)
starts.push_back( workloads[i].start.template getReplyUnlessFailedFor<Void>(waitForFailureTime, 0) );
wait( waitForAll( starts ) );
throwIfError(starts, "StartFailedForWorkload" + printable(spec.title));
printf("%s complete\n", printable(spec.title).c_str());
TraceEvent("TestComplete").detail("WorkloadTitle", spec.title);
}
if( spec.phases & TestWorkload::CHECK ) {
if( spec.useDB && ( spec.phases & TestWorkload::EXECUTION ) ) {
wait( delay(3.0) );
}
state std::vector<Future<ErrorOr<CheckReply>>> checks;
TraceEvent("CheckingResults");
printf("checking test (%s)...\n", printable(spec.title).c_str());
for(int i= 0; i < workloads.size(); i++)
checks.push_back(workloads[i].check.template getReplyUnlessFailedFor<CheckReply>(waitForFailureTime, 0));
wait( waitForAll( checks ) );
throwIfError(checks, "CheckFailedForWorkload" + printable(spec.title));
for(int i = 0; i < checks.size(); i++) {
if (checks[i].get().get().value)
success++;
else
failure++;
}
}
if( spec.phases & TestWorkload::METRICS ) {
state std::vector< Future<ErrorOr<vector<PerfMetric>>> > metricTasks;
printf("fetching metrics (%s)...\n", printable(spec.title).c_str());
TraceEvent("TestFetchingMetrics").detail("WorkloadTitle", spec.title);
for(int i= 0; i < workloads.size(); i++)
metricTasks.push_back( workloads[i].metrics.template getReplyUnlessFailedFor<vector<PerfMetric>>(waitForFailureTime, 0) );
wait( waitForAll( metricTasks ) );
throwIfError(metricTasks, "MetricFailedForWorkload" + printable(spec.title));
for(int i = 0; i < metricTasks.size(); i++) {
metricsResults.push_back( metricTasks[i].get().get() );
}
}
// Stopping the workloads is unreliable, but they have a timeout
// FIXME: stop if one of the above phases throws an exception
for(int i=0; i<workloads.size(); i++)
workloads[i].stop.send(ReplyPromise<Void>());
return DistributedTestResults( aggregateMetrics( metricsResults ), success, failure );
}
//Sets the database configuration by running the ChangeConfig workload
ACTOR Future<Void> changeConfiguration(Database cx, std::vector< TesterInterface > testers, StringRef configMode) {
state TestSpec spec;
Standalone<VectorRef<KeyValueRef>> options;
spec.title = LiteralStringRef("ChangeConfig");
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("testName"), LiteralStringRef("ChangeConfig")));
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("configMode"), configMode));
spec.options.push_back_deep(spec.options.arena(), options);
DistributedTestResults testResults = wait(runWorkload(cx, testers, spec));
return Void();
}
//Runs the consistency check workload, which verifies that the database is in a consistent state
ACTOR Future<Void> checkConsistency(Database cx, std::vector< TesterInterface > testers, bool doQuiescentCheck,
double quiescentWaitTimeout, double softTimeLimit, double databasePingDelay, Reference<AsyncVar<ServerDBInfo>> dbInfo) {
state TestSpec spec;
state double connectionFailures;
if( g_network->isSimulated() ) {
connectionFailures = g_simulator.connectionFailuresDisableDuration;
g_simulator.connectionFailuresDisableDuration = 1e6;
g_simulator.speedUpSimulation = true;
}
Standalone<VectorRef<KeyValueRef>> options;
StringRef performQuiescent = LiteralStringRef("false");
if (doQuiescentCheck) {
performQuiescent = LiteralStringRef("true");
}
spec.title = LiteralStringRef("ConsistencyCheck");
spec.databasePingDelay = databasePingDelay;
spec.timeout = 32000;
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("testName"), LiteralStringRef("ConsistencyCheck")));
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("performQuiescentChecks"), performQuiescent));
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("quiescentWaitTimeout"), ValueRef(options.arena(), format("%f", quiescentWaitTimeout))));
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("distributed"), LiteralStringRef("false")));
spec.options.push_back_deep(spec.options.arena(), options);
state double start = now();
state bool lastRun = false;
loop {
DistributedTestResults testResults = wait(runWorkload(cx, testers, spec));
if(testResults.ok() || lastRun) {
if( g_network->isSimulated() ) {
g_simulator.connectionFailuresDisableDuration = connectionFailures;
}
return Void();
}
if(now() - start > softTimeLimit) {
spec.options[0].push_back_deep(spec.options.arena(), KeyValueRef(LiteralStringRef("failureIsError"), LiteralStringRef("true")));
lastRun = true;
}
wait( repairDeadDatacenter(cx, dbInfo, "ConsistencyCheck") );
}
}
ACTOR Future<bool> runTest( Database cx, std::vector< TesterInterface > testers, TestSpec spec, Reference<AsyncVar<ServerDBInfo>> dbInfo )
{
state DistributedTestResults testResults;
try {
Future<DistributedTestResults> fTestResults = runWorkload( cx, testers, spec );
if( spec.timeout > 0 ) {
fTestResults = timeoutError( fTestResults, spec.timeout );
}
DistributedTestResults _testResults = wait( fTestResults );
testResults = _testResults;
logMetrics( testResults.metrics );
} catch(Error& e) {
if( e.code() == error_code_timed_out ) {
TraceEvent(SevError, "TestFailure").error(e).detail("Reason", "Test timed out").detail("Timeout", spec.timeout);
fprintf(stderr, "ERROR: Test timed out after %d seconds.\n", spec.timeout);
testResults.failures = testers.size();
testResults.successes = 0;
} else
throw;
}
state bool ok = testResults.ok();
if( spec.useDB ) {
if( spec.dumpAfterTest ) {
try {
wait( timeoutError( dumpDatabase( cx, "dump after " + printable(spec.title) + ".html", allKeys ), 30.0 ) );
} catch (Error& e) {
TraceEvent(SevError, "TestFailure").error(e).detail("Reason", "Unable to dump database");
ok = false;
}
wait( delay(1.0) );
}
//Run the consistency check workload
if(spec.runConsistencyCheck) {
try {
bool quiescent = g_network->isSimulated() ? !BUGGIFY : spec.waitForQuiescenceEnd;
wait(timeoutError(checkConsistency(cx, testers, quiescent, 10000.0, 18000, spec.databasePingDelay, dbInfo), 20000.0));
}
catch(Error& e) {
TraceEvent(SevError, "TestFailure").error(e).detail("Reason", "Unable to perform consistency check");
ok = false;
}
}
}
TraceEvent(ok ? SevInfo : SevWarnAlways, "TestResults")
.detail("Workload", spec.title)
.detail("Passed", (int)ok);
//.detail("Metrics", metricSummary);
if (ok) { passCount++; }
else { failCount++; }
printf("%d test clients passed; %d test clients failed\n", testResults.successes, testResults.failures);
if( spec.useDB && spec.clearAfterTest ) {
try {
TraceEvent("TesterClearingDatabase");
wait( timeoutError(clearData(cx), 1000.0) );
} catch (Error& e) {
TraceEvent(SevError, "ErrorClearingDatabaseAfterTest").error(e);
throw; // If we didn't do this, we don't want any later tests to run on this DB
}
wait( delay(1.0) );
}
return ok;
}
std::map<std::string, std::function<void(const std::string&)>> testSpecGlobalKeys = {
// These are read by SimulatedCluster and used before testers exist. Thus, they must
// be recognized and accepted, but there's no point in placing them into a testSpec.
{"extraDB", [](const std::string& value) {
TraceEvent("TestParserTest").detail("ParsedExtraDB", "");
}},
{"configureLocked", [](const std::string& value) {
TraceEvent("TestParserTest").detail("ParsedConfigureLocked", "");
}},
{"minimumReplication", [](const std::string& value) {
TraceEvent("TestParserTest").detail("ParsedMinimumReplication", "");
}},
{"minimumRegions", [](const std::string& value) {
TraceEvent("TestParserTest").detail("ParsedMinimumRegions", "");
}},
{"logAntiQuorum", [](const std::string& value) {
TraceEvent("TestParserTest").detail("ParsedLogAntiQuorum", "");
}},
{"buggify", [](const std::string& value) {
TraceEvent("TestParserTest").detail("ParsedBuggify", "");
}},
// The test harness handles NewSeverity events specially.
{"StderrSeverity", [](const std::string& value) {
TraceEvent("StderrSeverity").detail("NewSeverity", value);
}},
{"ClientInfoLogging", [](const std::string& value) {
if (value == "false") {
setNetworkOption(FDBNetworkOptions::DISABLE_CLIENT_STATISTICS_LOGGING);
}
// else { } It is enable by default for tester
TraceEvent("TestParserTest").detail("ClientInfoLogging", value);
}},
};
std::map<std::string, std::function<void(const std::string& value, TestSpec* spec)>> testSpecTestKeys = {
{ "testTitle", [](const std::string& value, TestSpec* spec) {
spec->title = value;
TraceEvent("TestParserTest").detail("ParsedTest", spec->title );
}},
{ "timeout", [](const std::string& value, TestSpec* spec) {
sscanf( value.c_str(), "%d", &(spec->timeout) );
ASSERT( spec->timeout > 0 );
TraceEvent("TestParserTest").detail("ParsedTimeout", spec->timeout);
}},
{ "databasePingDelay", [](const std::string& value, TestSpec* spec) {
double databasePingDelay;
sscanf( value.c_str(), "%lf", &databasePingDelay );
ASSERT( databasePingDelay >= 0 );
if( !spec->useDB && databasePingDelay > 0 ) {
TraceEvent(SevError, "TestParserError")
.detail("Reason", "Cannot have non-zero ping delay on test that does not use database")
.detail("PingDelay", databasePingDelay).detail("UseDB", spec->useDB);
ASSERT( false );
}
spec->databasePingDelay = databasePingDelay;
TraceEvent("TestParserTest").detail("ParsedPingDelay", spec->databasePingDelay);
}},
{ "runSetup", [](const std::string& value, TestSpec* spec) {
spec->phases = TestWorkload::EXECUTION | TestWorkload::CHECK | TestWorkload::METRICS;
if( value == "true" )
spec->phases |= TestWorkload::SETUP;
TraceEvent("TestParserTest").detail("ParsedSetupFlag", (spec->phases & TestWorkload::SETUP) != 0);
}},
{ "dumpAfterTest", [](const std::string& value, TestSpec* spec) {
spec->dumpAfterTest = ( value == "true" );
TraceEvent("TestParserTest").detail("ParsedDumpAfter", spec->dumpAfterTest);
}},
{ "clearAfterTest", [](const std::string& value, TestSpec* spec) {
spec->clearAfterTest = ( value == "true" );
TraceEvent("TestParserTest").detail("ParsedClearAfter", spec->clearAfterTest);
}},
{ "useDB", [](const std::string& value, TestSpec* spec) {
spec->useDB = ( value == "true" );
TraceEvent("TestParserTest").detail("ParsedUseDB", spec->useDB);
if( !spec->useDB )
spec->databasePingDelay = 0.0;
}},
{ "startDelay", [](const std::string& value, TestSpec* spec) {
sscanf( value.c_str(), "%lf", &spec->startDelay );
TraceEvent("TestParserTest").detail("ParsedStartDelay", spec->startDelay);
}},
{ "runConsistencyCheck", [](const std::string& value, TestSpec* spec) {
spec->runConsistencyCheck = ( value == "true" );
TraceEvent("TestParserTest").detail("ParsedRunConsistencyCheck", spec->runConsistencyCheck);
}},
{ "waitForQuiescence", [](const std::string& value, TestSpec* spec) {
bool toWait = value == "true";
spec->waitForQuiescenceBegin = toWait;
spec->waitForQuiescenceEnd = toWait;
TraceEvent("TestParserTest").detail("ParsedWaitForQuiescence", toWait);
}},
{ "waitForQuiescenceBegin", [](const std::string& value, TestSpec* spec) {
bool toWait = value == "true";
spec->waitForQuiescenceBegin = toWait;
TraceEvent("TestParserTest").detail("ParsedWaitForQuiescenceBegin", toWait);
}},
{ "waitForQuiescenceEnd", [](const std::string& value, TestSpec* spec) {
bool toWait = value == "true";
spec->waitForQuiescenceEnd = toWait;
TraceEvent("TestParserTest").detail("ParsedWaitForQuiescenceEnd", toWait);
}},
{ "simCheckRelocationDuration", [](const std::string& value, TestSpec* spec) {
spec->simCheckRelocationDuration = (value == "true");
TraceEvent("TestParserTest").detail("ParsedSimCheckRelocationDuration", spec->simCheckRelocationDuration);
}},
{ "connectionFailuresDisableDuration", [](const std::string& value, TestSpec* spec) {
double connectionFailuresDisableDuration;
sscanf( value.c_str(), "%lf", &connectionFailuresDisableDuration );
ASSERT( connectionFailuresDisableDuration >= 0 );
spec->simConnectionFailuresDisableDuration = connectionFailuresDisableDuration;
if(g_network->isSimulated())
g_simulator.connectionFailuresDisableDuration = spec->simConnectionFailuresDisableDuration;
TraceEvent("TestParserTest").detail("ParsedSimConnectionFailuresDisableDuration", spec->simConnectionFailuresDisableDuration);
}},
{ "simBackupAgents", [](const std::string& value, TestSpec* spec) {
if (value == "BackupToFile" || value == "BackupToFileAndDB")
spec->simBackupAgents = ISimulator::BackupToFile;
else
spec->simBackupAgents = ISimulator::NoBackupAgents;
TraceEvent("TestParserTest").detail("ParsedSimBackupAgents", spec->simBackupAgents);
if (value == "BackupToDB" || value == "BackupToFileAndDB")
spec->simDrAgents = ISimulator::BackupToDB;
else
spec->simDrAgents = ISimulator::NoBackupAgents;
TraceEvent("TestParserTest").detail("ParsedSimDrAgents", spec->simDrAgents);
}},
{ "checkOnly", [](const std::string& value, TestSpec* spec) {
if(value == "true")
spec->phases = TestWorkload::CHECK;
}},
};
vector<TestSpec> readTests( ifstream& ifs ) {
TestSpec spec;
vector<TestSpec> result;
Standalone< VectorRef< KeyValueRef > > workloadOptions;
std::string cline;
bool beforeFirstTest = true;
bool parsingWorkloads = false;
while( ifs.good() ) {
getline(ifs, cline);
string line = removeWhitespace( string(cline) );
if( !line.size() || line.find( ';' ) == 0 )
continue;
size_t found = line.find( '=' );
if( found == string::npos )
// hmmm, not good
continue;
string attrib = removeWhitespace(line.substr( 0, found ));
string value = removeWhitespace(line.substr( found + 1 ));
if( attrib == "testTitle" ) {
beforeFirstTest = false;
parsingWorkloads = false;
if( workloadOptions.size() ) {
spec.options.push_back_deep( spec.options.arena(), workloadOptions );
workloadOptions = Standalone< VectorRef< KeyValueRef > >();
}
if( spec.options.size() && spec.title.size() ) {
result.push_back( spec );
spec = TestSpec();
}
testSpecTestKeys[attrib](value, &spec);
} else if ( testSpecTestKeys.find(attrib) != testSpecTestKeys.end() ) {
if (parsingWorkloads) TraceEvent(SevError, "TestSpecTestParamInWorkload").detail("Attrib", attrib).detail("Value", value);
testSpecTestKeys[attrib](value, &spec);
} else if ( testSpecGlobalKeys.find(attrib) != testSpecGlobalKeys.end() ) {
if (!beforeFirstTest) TraceEvent(SevError, "TestSpecGlobalParamInTest").detail("Attrib", attrib).detail("Value", value);
testSpecGlobalKeys[attrib](value);
}
else {
if( attrib == "testName" ) {
parsingWorkloads = true;
if( workloadOptions.size() ) {
TraceEvent("TestParserFlush").detail("Reason", "new (compound) test");
spec.options.push_back_deep( spec.options.arena(), workloadOptions );
workloadOptions = Standalone< VectorRef< KeyValueRef > >();
}
}
workloadOptions.push_back_deep( workloadOptions.arena(),
KeyValueRef( StringRef( attrib ), StringRef( value ) ) );
TraceEvent("TestParserOption").detail("ParsedKey", attrib).detail("ParsedValue", value);
}
}
if( workloadOptions.size() )
spec.options.push_back_deep( spec.options.arena(), workloadOptions );
if( spec.options.size() && spec.title.size() ) {
result.push_back( spec );
}
return result;
}
template <typename T>
std::string toml_to_string(const T& value) {
// TOML formatting converts numbers to strings exactly how they're in the file
// and thus, is equivalent to testspec. However, strings are quoted, so we
// must remove the quotes.
if (value.type() == toml::value_t::string) {
const std::string& formatted = toml::format(value);
return formatted.substr(1, formatted.size()-2);
} else {
return toml::format(value);
}
}
std::vector<TestSpec> readTOMLTests_( std::string fileName ) {
TestSpec spec;
Standalone< VectorRef< KeyValueRef > > workloadOptions;
std::vector<TestSpec> result;
const toml::value& conf = toml::parse(fileName);
// Handle all global settings
for (const auto& [k, v] : conf.as_table()) {
if (k == "test") {
continue;
}
if (testSpecGlobalKeys.find(k) != testSpecGlobalKeys.end()) {
testSpecGlobalKeys[k](toml_to_string(v));
} else {
TraceEvent(SevError, "TestSpecUnrecognizedGlobalParam").detail("Attrib", k).detail("Value", toml_to_string(v));
}
}
// Then parse each test
const toml::array& tests = toml::find(conf, "test").as_array();
for (const toml::value& test : tests) {
spec = TestSpec();
// First handle all test-level settings
for (const auto& [k, v] : test.as_table()) {
if (k == "workload") {
continue;
}
if (testSpecTestKeys.find(k) != testSpecTestKeys.end()) {
testSpecTestKeys[k](toml_to_string(v), &spec);
} else {
TraceEvent(SevError, "TestSpecUnrecognizedTestParam").detail("Attrib", k).detail("Value", toml_to_string(v));
}
}
// And then copy the workload attributes to spec.options
const toml::array& workloads = toml::find(test, "workload").as_array();
for (const toml::value& workload : workloads) {
workloadOptions = Standalone< VectorRef< KeyValueRef > >();
TraceEvent("TestParserFlush").detail("Reason", "new (compound) test");
for (const auto& [attrib, v] : workload.as_table()) {
const std::string& value = toml_to_string(v);
workloadOptions.push_back_deep( workloadOptions.arena(),
KeyValueRef( StringRef( attrib ), StringRef( value ) ) );
TraceEvent("TestParserOption").detail("ParsedKey", attrib).detail("ParsedValue", value);
}
spec.options.push_back_deep( spec.options.arena(), workloadOptions );
}
result.push_back(spec);
}
return result;
}
// A hack to catch and log std::exception, because TOML11 has very useful
// error messages, but the actor framework can't handle std::exception.
std::vector<TestSpec> readTOMLTests( std::string fileName ) {
try {
return readTOMLTests_( fileName );
} catch (std::exception &e) {
std::cerr << e.what() << std::endl;
TraceEvent("TOMLParseError").detail("Error", printable(e.what()));
// TODO: replace with toml_parse_error();
throw unknown_error();
}
}
ACTOR Future<Void> monitorServerDBInfo(Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
LocalityData locality,
Reference<AsyncVar<ServerDBInfo>> dbInfo) {
// Initially most of the serverDBInfo is not known, but we know our locality right away
ServerDBInfo localInfo;
localInfo.myLocality = locality;
dbInfo->set(localInfo);
loop {
GetServerDBInfoRequest req;
req.knownServerInfoID = dbInfo->get().id;
choose {
when( ServerDBInfo _localInfo = wait( ccInterface->get().present() ? brokenPromiseToNever( ccInterface->get().get().getServerDBInfo.getReply( req ) ) : Never() ) ) {
ServerDBInfo localInfo = _localInfo;
TraceEvent("GotServerDBInfoChange").detail("ChangeID", localInfo.id).detail("MasterID", localInfo.master.id())
.detail("RatekeeperID", localInfo.ratekeeper.present() ? localInfo.ratekeeper.get().id() : UID())
.detail("DataDistributorID", localInfo.distributor.present() ? localInfo.distributor.get().id() : UID());
localInfo.myLocality = locality;
dbInfo->set(localInfo);
}
when( wait( ccInterface->onChange() ) ) {
if(ccInterface->get().present())
TraceEvent("GotCCInterfaceChange").detail("CCID", ccInterface->get().get().id()).detail("CCMachine", ccInterface->get().get().getWorkers.getEndpoint().getPrimaryAddress());
}
}
}
}
ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> cc, Reference<AsyncVar<Optional<struct ClusterInterface>>> ci, vector< TesterInterface > testers, vector<TestSpec> tests, StringRef startingConfiguration, LocalityData locality ) {
state Database cx;
state Reference<AsyncVar<ServerDBInfo>> dbInfo( new AsyncVar<ServerDBInfo> );
state Future<Void> ccMonitor = monitorServerDBInfo(cc, LocalityData(), dbInfo); // FIXME: locality
state bool useDB = false;
state bool waitForQuiescenceBegin = false;
state bool waitForQuiescenceEnd = false;
state double startDelay = 0.0;
state double databasePingDelay = 1e9;
state ISimulator::BackupAgentType simBackupAgents = ISimulator::NoBackupAgents;
state ISimulator::BackupAgentType simDrAgents = ISimulator::NoBackupAgents;
state bool enableDD = false;
if (tests.empty()) useDB = true;
for( auto iter = tests.begin(); iter != tests.end(); ++iter ) {
if( iter->useDB ) useDB = true;
if( iter->waitForQuiescenceBegin ) waitForQuiescenceBegin = true;
if( iter->waitForQuiescenceEnd ) waitForQuiescenceEnd = true;
startDelay = std::max( startDelay, iter->startDelay );
databasePingDelay = std::min( databasePingDelay, iter->databasePingDelay );
if (iter->simBackupAgents != ISimulator::NoBackupAgents) simBackupAgents = iter->simBackupAgents;
if (iter->simDrAgents != ISimulator::NoBackupAgents) {
simDrAgents = iter->simDrAgents;
}
enableDD = enableDD || getOption(iter->options[0], LiteralStringRef("enableDD"), false);
}
if (g_network->isSimulated()) {
g_simulator.backupAgents = simBackupAgents;
g_simulator.drAgents = simDrAgents;
}
// turn off the database ping functionality if the suite of tests are not going to be using the database
if( !useDB )
databasePingDelay = 0.0;
if (useDB) {
cx = openDBOnServer(dbInfo);
}
state Future<Void> disabler = disableConnectionFailuresAfter(450, "Tester");
//Change the configuration (and/or create the database) if necessary
printf("startingConfiguration:%s start\n", startingConfiguration.toString().c_str());
if(useDB && startingConfiguration != StringRef()) {
try {
wait(timeoutError(changeConfiguration(cx, testers, startingConfiguration), 2000.0));
if (g_network->isSimulated() && enableDD) {
wait(success(setDDMode(cx, 1)));
}
}
catch(Error& e) {
TraceEvent(SevError, "TestFailure").error(e).detail("Reason", "Unable to set starting configuration");
}
}
if (useDB && waitForQuiescenceBegin) {
TraceEvent("TesterStartingPreTestChecks").detail("DatabasePingDelay", databasePingDelay).detail("StartDelay", startDelay);
try {
wait( quietDatabase( cx, dbInfo, "Start") ||
( databasePingDelay == 0.0 ? Never() : testDatabaseLiveness( cx, databasePingDelay, "QuietDatabaseStart", startDelay ) ) );
} catch( Error& e ) {
TraceEvent("QuietDatabaseStartExternalError").error(e);
throw;
}
}
TraceEvent("TestsExpectedToPass").detail("Count", tests.size());
state int idx = 0;
for(; idx < tests.size(); idx++ ) {
printf("Run test:%s start\n", tests[idx].title.toString().c_str());
wait(success(runTest(cx, testers, tests[idx], dbInfo)));
printf("Run test:%s Done.\n", tests[idx].title.toString().c_str());
// do we handle a failure here?
}
printf("\n%d tests passed; %d tests failed.\n", passCount, failCount);
//If the database was deleted during the workload we need to recreate the database
if(tests.empty() || useDB) {
if(waitForQuiescenceEnd) {
printf("Waiting for DD to end...\n");
try {
wait(quietDatabase(cx, dbInfo, "End", 0, 2e6, 2e6) ||
(databasePingDelay == 0.0 ? Never()
: testDatabaseLiveness(cx, databasePingDelay, "QuietDatabaseEnd")));
} catch( Error& e ) {
TraceEvent("QuietDatabaseEndExternalError").error(e);
throw;
}
}
}
printf("\n");
return Void();
}
ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> cc,
Reference<AsyncVar<Optional<struct ClusterInterface>>> ci, vector<TestSpec> tests, test_location_t at,
int minTestersExpected, StringRef startingConfiguration, LocalityData locality ) {
state int flags = (at == TEST_ON_SERVERS ? 0 : GetWorkersRequest::TESTER_CLASS_ONLY) | GetWorkersRequest::NON_EXCLUDED_PROCESSES_ONLY;
state Future<Void> testerTimeout = delay(600.0); // wait 600 sec for testers to show up
state vector<WorkerDetails> workers;
loop {
choose {
when( vector<WorkerDetails> w = wait( cc->get().present() ? brokenPromiseToNever( cc->get().get().getWorkers.getReply( GetWorkersRequest( flags ) ) ) : Never() ) ) {
if (w.size() >= minTestersExpected) {
workers = w;
break;
}
wait( delay(SERVER_KNOBS->WORKER_POLL_DELAY) );
}
when( wait( cc->onChange() ) ) {}
when( wait( testerTimeout ) ) {
TraceEvent(SevError, "TesterRecruitmentTimeout");
throw timed_out();
}
}
}
vector<TesterInterface> ts;
for(int i=0; i<workers.size(); i++)
ts.push_back(workers[i].interf.testerInterface);
wait( runTests( cc, ci, ts, tests, startingConfiguration, locality) );
return Void();
}
ACTOR Future<Void> runTests( Reference<ClusterConnectionFile> connFile, test_type_t whatToRun, test_location_t at,
int minTestersExpected, std::string fileName, StringRef startingConfiguration, LocalityData locality ) {
state vector<TestSpec> testSpecs;
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> cc( new AsyncVar<Optional<ClusterControllerFullInterface>> );
Reference<AsyncVar<Optional<ClusterInterface>>> ci( new AsyncVar<Optional<ClusterInterface>> );
vector<Future<Void>> actors;
actors.push_back( reportErrors(monitorLeader( connFile, cc ), "MonitorLeader") );
actors.push_back( reportErrors(extractClusterInterface( cc,ci ),"ExtractClusterInterface") );
if(whatToRun == TEST_TYPE_CONSISTENCY_CHECK) {
TestSpec spec;
Standalone<VectorRef<KeyValueRef>> options;
spec.title = LiteralStringRef("ConsistencyCheck");
spec.databasePingDelay = 0;
spec.timeout = 0;
spec.waitForQuiescenceBegin = false;
spec.waitForQuiescenceEnd = false;
std::string rateLimitMax = format("%d", CLIENT_KNOBS->CONSISTENCY_CHECK_RATE_LIMIT_MAX);
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("testName"), LiteralStringRef("ConsistencyCheck")));
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("performQuiescentChecks"), LiteralStringRef("false")));
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("distributed"), LiteralStringRef("false")));
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("failureIsError"), LiteralStringRef("true")));
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("indefinite"), LiteralStringRef("true")));
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("rateLimitMax"), StringRef(rateLimitMax)));
options.push_back_deep(options.arena(), KeyValueRef(LiteralStringRef("shuffleShards"), LiteralStringRef("true")));
spec.options.push_back_deep(spec.options.arena(), options);
testSpecs.push_back(spec);
} else {
ifstream ifs;
ifs.open( fileName.c_str(), ifstream::in );
if( !ifs.good() ) {
TraceEvent(SevError, "TestHarnessFail").detail("Reason", "file open failed").detail("File", fileName.c_str());
fprintf(stderr, "ERROR: Could not open file `%s'\n", fileName.c_str());
return Void();
}
enableClientInfoLogging(); // Enable Client Info logging by default for tester
if ( boost::algorithm::ends_with(fileName, ".txt") ) {
testSpecs = readTests( ifs );
} else if ( boost::algorithm::ends_with(fileName, ".toml") ) {
// TOML is weird about opening the file as binary on windows, so we
// just let TOML re-open the file instead of using ifs.
testSpecs = readTOMLTests( fileName );
} else {
TraceEvent(SevError, "TestHarnessFail").detail("Reason", "unknown tests specification extension").detail("File", fileName.c_str());
return Void();
}
ifs.close();
}
Future<Void> tests;
if (at == TEST_HERE) {
Reference<AsyncVar<ServerDBInfo>> db( new AsyncVar<ServerDBInfo> );
vector<TesterInterface> iTesters(1);
actors.push_back( reportErrors(monitorServerDBInfo( cc, LocalityData(), db ), "MonitorServerDBInfo") ); // FIXME: Locality
actors.push_back( reportErrors(testerServerCore( iTesters[0], connFile, db, locality ), "TesterServerCore") );
tests = runTests( cc, ci, iTesters, testSpecs, startingConfiguration, locality );
} else {
tests = reportErrors(runTests(cc, ci, testSpecs, at, minTestersExpected, startingConfiguration, locality), "RunTests");
}
choose {
when (wait(tests)) { return Void(); }
when (wait(quorum(actors, 1))) { ASSERT(false); throw internal_error(); }
}
}