Merge in master

This commit is contained in:
Evan Tschannen 2019-07-05 15:49:24 -07:00
commit 15e894c724
153 changed files with 6427 additions and 2779 deletions

View File

@ -37,11 +37,11 @@ else()
strip_debug_symbols(fdb_c)
endif()
add_dependencies(fdb_c fdb_c_generated fdb_c_options)
target_link_libraries(fdb_c PUBLIC fdbclient)
target_link_libraries(fdb_c PUBLIC $<BUILD_INTERFACE:fdbclient>)
target_include_directories(fdb_c PUBLIC
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}/foundationdb)
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/foundationdb>)
if(WIN32)
enable_language(ASM_MASM)
set_property(SOURCE ${asm_file} PROPERTY LANGUAGE ASM_MASM)
@ -75,15 +75,58 @@ if(NOT WIN32)
target_link_libraries(mako PRIVATE fdb_c)
endif()
set(c_workloads_srcs
test/workloads/workloads.cpp
test/workloads/workloads.h
test/workloads/SimpleWorkload.cpp)
if(OPEN_FOR_IDE)
add_library(c_workloads OBJECT ${c_workloads_srcs})
else()
add_library(c_workloads SHARED ${c_workloads_srcs})
endif()
set_target_properties(c_workloads PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/share/foundationdb")
target_link_libraries(c_workloads PUBLIC fdb_c)
# TODO: re-enable once the old vcxproj-based build system is removed.
#generate_export_header(fdb_c EXPORT_MACRO_NAME "DLLEXPORT"
# EXPORT_FILE_NAME ${CMAKE_CURRENT_BINARY_DIR}/foundationdb/fdb_c_export.h)
fdb_install(TARGETS fdb_c
set(targets_export_name "FoundationDB-Client")
set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
set(version_config "${generated_dir}/${targets_export_name}ConfigVersion.cmake")
set(project_config "${generated_dir}/${targets_export_name}Config.cmake")
include(CMakePackageConfigHelpers)
write_basic_package_version_file(
"${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY AnyNewerVersion
)
configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY)
fdb_install(
TARGETS fdb_c
EXPORT ${targets_export_name}
DESTINATION lib
COMPONENT clients)
fdb_install(
FILES foundationdb/fdb_c.h
${CMAKE_CURRENT_BINARY_DIR}/foundationdb/fdb_c_options.g.h
${CMAKE_SOURCE_DIR}/fdbclient/vexillographer/fdb.options
DESTINATION include COMPONENT clients)
#install(EXPORT fdbc DESTINATION ${FDB_LIB_DIR}/foundationdb COMPONENT clients)
DESTINATION include
DESTINATION_SUFFIX /foundationdb
COMPONENT clients)
fdb_install(
FILES "${project_config}" "${version_config}"
DESTINATION lib
DESTINATION_SUFFIX "/cmake/${targets_export_name}"
COMPONENT clients)
fdb_configure_and_install(
FILE "${PROJECT_SOURCE_DIR}/cmake/foundationdb-client.pc.in"
DESTINATION lib
DESTINATION_SUFFIX "/pkgconfig"
COMPONENT clients)
fdb_install(
EXPORT ${targets_export_name}
DESTINATION lib
DESTINATION_SUFFIX "/cmake/${targets_export_name}"
COMPONENT clients)

View File

@ -0,0 +1,104 @@
/*
* ClientWorkload.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef CLIENT_WORKLOAD_H
#define CLIENT_WORKLOAD_H
#include <string>
#include <vector>
#include <functional>
#include <memory>
#ifndef DLLEXPORT
#if defined(_MSC_VER)
#define DLLEXPORT __declspec(dllexport)
#elif defined(__GNUG__)
#define DLLEXPORT __attribute__((visibility("default")))
#else
#error Missing symbol export
#endif
#endif
typedef struct FDB_future FDBFuture;
typedef struct FDB_database FDBDatabase;
typedef struct FDB_transaction FDBTransaction;
enum class FDBSeverity { Debug, Info, Warn, WarnAlways, Error };
class FDBLogger {
public:
virtual void trace(FDBSeverity sev, const std::string& name,
const std::vector<std::pair<std::string, std::string>>& details) = 0;
};
class FDBWorkloadContext : public FDBLogger {
public:
virtual uint64_t getProcessID() const = 0;
virtual void setProcessID(uint64_t processID) = 0;
virtual double now() const = 0;
virtual uint32_t rnd() const = 0;
virtual bool getOption(const std::string& name, bool defaultValue) = 0;
virtual long getOption(const std::string& name, long defaultValue) = 0;
virtual unsigned long getOption(const std::string& name, unsigned long defaultValue) = 0;
virtual double getOption(const std::string& name, double defaultValue) = 0;
virtual std::string getOption(const std::string& name, std::string defaultValue) = 0;
virtual int clientId() const = 0;
virtual int clientCount() const = 0;
virtual int64_t sharedRandomNumber() const = 0;
};
struct FDBPromise {
virtual void send(void*) = 0;
};
template <class T>
class GenericPromise {
std::shared_ptr<FDBPromise> impl;
public:
template <class Ptr>
explicit GenericPromise(Ptr&& impl) : impl(std::forward<Ptr>(impl)) {}
void send(T val) { impl->send(&val); }
};
struct FDBPerfMetric {
std::string name;
double value;
bool averaged;
std::string format_code = "0.3g";
};
class DLLEXPORT FDBWorkload {
public:
virtual std::string description() const = 0;
virtual bool init(FDBWorkloadContext* context) = 0;
virtual void setup(FDBDatabase* db, GenericPromise<bool> done) = 0;
virtual void start(FDBDatabase* db, GenericPromise<bool> done) = 0;
virtual void check(FDBDatabase* db, GenericPromise<bool> done) = 0;
virtual void getMetrics(std::vector<FDBPerfMetric>& out) const = 0;
virtual double getCheckTimeout() { return 3000; }
};
class DLLEXPORT FDBWorkloadFactory {
public:
virtual std::shared_ptr<FDBWorkload> create(const std::string& name) = 0;
};
#endif

View File

@ -580,7 +580,7 @@ int run_transaction(FDBTransaction *transaction, mako_args_t *args,
sprintf(keystr + KEYPREFIXLEN + randstrlen, "%0.*d",
digits(args->txnspec.ops[i][OP_RANGE]), rangei);
if (rangei == 0) {
strncpy(keystr2, keystr, strlen(keystr));
strcpy(keystr2, keystr);
keystr2[strlen(keystr)] = '\0';
}
rc = run_op_insert(transaction, keystr, valstr);

View File

@ -0,0 +1,372 @@
/*
* workloads.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define FDB_API_VERSION 610
#include "foundationdb/fdb_c.h"
#undef DLLEXPORT
#include "workloads.h"
#include <unordered_map>
#include <functional>
#include <random>
#include <iostream>
namespace {
struct SimpleWorkload : FDBWorkload {
static const std::string name;
static const std::string KEY_PREFIX;
std::mt19937 random;
bool success = true;
FDBWorkloadContext* context = nullptr;
unsigned long numTuples, numActors, insertsPerTx, opsPerTx;
double runFor;
// stats
std::vector<double> gets, txs, retries;
template <class Actor>
struct ActorBase {
using Callback = std::function<void(Actor*)>;
Callback done;
SimpleWorkload& self;
FDBDatabase* db;
fdb_error_t error = 0;
FDBFuture* currentFuture = nullptr;
int numWaiters = 0;
ActorBase(const Callback& done, SimpleWorkload& self, FDBDatabase* db) : done(done), self(self), db(db) {}
Actor* super() { return static_cast<Actor*>(this); }
const Actor* super() const { return static_cast<const Actor*>(this); }
template <class State>
void wait(FDBFuture* future, State state) {
if (++numWaiters != 1) {
std::cerr << "More than one wait in one actor" << std::endl;
std::terminate();
}
super()->state = state;
currentFuture = future;
if (fdb_future_is_ready(future)) {
callback(future, this);
} else {
auto err = fdb_future_set_callback(future, &ActorBase<Actor>::callback, this);
if (err) {
auto self = static_cast<Actor*>(this);
self->callbacks[self->state].onError(err);
fdb_future_destroy(future);
}
}
}
static void callback(FDBFuture* future, void* data) {
auto self = reinterpret_cast<Actor*>(data);
--self->numWaiters;
auto err = fdb_future_get_error(future);
if (err) {
self->callbacks[self->state].onError(fdb_future_get_error(future));
} else {
self->callbacks[self->state].onSuccess(future);
}
fdb_future_destroy(future);
}
};
struct ActorCallback {
std::function<void(FDBFuture*)> onSuccess;
std::function<void(fdb_error_t)> onError;
};
struct PopulateActor : ActorBase<PopulateActor> {
enum class State { Commit, Retry };
State state;
FDBTransaction* tx = nullptr;
unsigned long from, to, lastTx = 0;
std::unordered_map<State, ActorCallback> callbacks;
PopulateActor(const Callback& promise, SimpleWorkload& self, FDBDatabase* db, unsigned long from,
unsigned long to)
: ActorBase(promise, self, db), from(from), to(to) {
error = fdb_database_create_transaction(db, &tx);
if (error) {
done(this);
}
setCallbacks();
}
~PopulateActor() {
if (tx) {
fdb_transaction_destroy(tx);
}
}
void run() {
if (error || from >= to) {
done(this);
return;
}
lastTx = 0;
unsigned ops = 0;
for (; from < to && ops < self.insertsPerTx; ++ops, ++from) {
std::string value = std::to_string(from);
std::string key = KEY_PREFIX + value;
fdb_transaction_set(tx, reinterpret_cast<const uint8_t*>(key.c_str()), key.size(),
reinterpret_cast<const uint8_t*>(value.c_str()), value.size());
}
lastTx = ops;
auto commit_future = fdb_transaction_commit(tx);
wait(commit_future, State::Commit);
}
void setCallbacks() {
callbacks[State::Commit] = {
[this](FDBFuture* future) {
fdb_transaction_reset(tx);
self.context->trace(FDBSeverity::Debug, "TXComplete", { { "NumInserts", std::to_string(lastTx) } });
lastTx = 0;
run();
},
[this](fdb_error_t error) { wait(fdb_transaction_on_error(tx, error), State::Retry); }
};
callbacks[State::Retry] = { [this](FDBFuture* future) {
from -= lastTx;
fdb_transaction_reset(tx);
run();
},
[this](fdb_error_t error) {
self.context->trace(FDBSeverity::Error, "AssertionFailure",
{ { "Reason", "tx.onError failed" },
{ "Error", std::string(fdb_get_error(error)) } });
self.success = false;
done(this);
} };
}
};
struct ClientActor : ActorBase<ClientActor> {
enum class State { Get, Commit, Retry };
State state;
std::unordered_map<State, ActorCallback> callbacks;
unsigned long ops = 0;
std::uniform_int_distribution<decltype(SimpleWorkload::numTuples)> random;
FDBTransaction* tx = nullptr;
unsigned numCommits = 0;
unsigned numRetries = 0;
unsigned numGets = 0;
double startTime;
ClientActor(const Callback& promise, SimpleWorkload& self, FDBDatabase* db)
: ActorBase(promise, self, db), random(0, self.numTuples - 1), startTime(self.context->now()) {
error = fdb_database_create_transaction(db, &tx);
if (error) {
done(this);
}
setCallbacks();
}
~ClientActor() {
if (tx) {
fdb_transaction_destroy(tx);
}
}
void run() { get(); }
void get() {
if (self.context->now() > startTime + self.runFor) {
done(this);
return;
}
auto key = KEY_PREFIX + std::to_string(random(self.random));
auto f = fdb_transaction_get(tx, reinterpret_cast<const uint8_t*>(key.c_str()), key.size(), false);
wait(f, State::Get);
}
void commit() {
if (self.context->now() > startTime + self.runFor) {
done(this);
return;
}
wait(fdb_transaction_commit(tx), State::Commit);
}
void setCallbacks() {
callbacks[State::Get] = { [this](FDBFuture* future) {
++numGets;
if (++ops >= self.opsPerTx) {
commit();
} else {
get();
}
},
[this](fdb_error_t error) {
wait(fdb_transaction_on_error(tx, error), State::Retry);
} };
callbacks[State::Retry] = { [this](FDBFuture* future) {
ops = 0;
fdb_transaction_reset(tx);
++numRetries;
get();
},
[this](fdb_error_t) {
self.context->trace(FDBSeverity::Error, "AssertionFailure",
{ { "Reason", "tx.onError failed" },
{ "Error", std::string(fdb_get_error(error)) } });
self.success = false;
done(this);
} };
callbacks[State::Commit] = { [this](FDBFuture* future) {
++numCommits;
ops = 0;
fdb_transaction_reset(tx);
get();
},
[this](fdb_error_t) {
wait(fdb_transaction_on_error(tx, error), State::Retry);
} };
}
};
std::string description() const override { return name; }
bool init(FDBWorkloadContext* context) override {
this->context = context;
context->trace(FDBSeverity::Info, "SimpleWorkloadInit", {});
random = decltype(random)(context->rnd());
numTuples = context->getOption("numTuples", 100000ul);
numActors = context->getOption("numActors", 100ul);
insertsPerTx = context->getOption("insertsPerTx", 100ul);
opsPerTx = context->getOption("opsPerTx", 100ul);
runFor = context->getOption("runFor", 10.0);
auto err = fdb_select_api_version(610);
if (err) {
context->trace(FDBSeverity::Info, "SelectAPIVersionFailed",
{ { "Error", std::string(fdb_get_error(err)) } });
}
return true;
}
void setup(FDBDatabase* db, GenericPromise<bool> done) override {
if (this->context->clientId() == 0) {
done.send(true);
return;
}
struct Populator {
std::vector<PopulateActor*> actors;
GenericPromise<bool> promise;
bool success = true;
void operator()(PopulateActor* done) {
if (done->error) {
success = false;
}
for (int i = 0; i < actors.size(); ++i) {
if (actors[i] == done) {
actors[i] = actors.back();
delete done;
actors.pop_back();
}
}
if (actors.empty()) {
promise.send(success);
delete this;
}
}
};
decltype(numTuples) from = 0;
auto p = new Populator{ {}, std::move(done) };
for (decltype(numActors) i = 0; i < numActors; ++i) {
decltype(from) to = from + (numTuples / numActors);
if (i == numActors - 1) {
to = numTuples;
}
auto actor = new PopulateActor([p](PopulateActor* self) { (*p)(self); }, *this, db, from, to);
p->actors.emplace_back(actor);
from = to;
}
for (auto actor : p->actors) {
actor->run();
}
}
void start(FDBDatabase* db, GenericPromise<bool> done) override {
if (!success) {
done.send(false);
}
struct ClientRunner {
std::vector<ClientActor*> actors;
GenericPromise<bool> done;
SimpleWorkload* self;
void operator()(ClientActor* actor) {
double now = self->context->now();
for (int i = 0; i < actors.size(); ++i) {
if (actors[i] == actor) {
actors[i] = actors.back();
actors.pop_back();
}
}
double s = now - actor->startTime;
if (s > 0.01) {
self->gets.emplace_back(double(actor->numGets) / s);
self->txs.emplace_back(double(actor->numCommits) / s);
self->retries.emplace_back(double(actor->numRetries) / s);
}
delete actor;
if (actors.empty()) {
done.send(self->success);
delete this;
}
}
};
auto runner = new ClientRunner{ {}, std::move(done), this };
for (decltype(numActors) i = 0; i < numActors; ++i) {
auto actor = new ClientActor([runner](ClientActor* self) { (*runner)(self); }, *this, db);
runner->actors.push_back(actor);
}
for (auto actor : runner->actors) {
actor->run();
}
}
void check(FDBDatabase* db, GenericPromise<bool> done) override { done.send(success); }
template <class Vec>
double accumulateMetric(const Vec& v) const {
double res = 0.0;
for (auto val : v) {
res += val;
}
return res / double(v.size());
}
void getMetrics(std::vector<FDBPerfMetric>& out) const override {
out.emplace_back(FDBPerfMetric{ "Get/s", accumulateMetric(gets), true });
out.emplace_back(FDBPerfMetric{ "Tx/s", accumulateMetric(txs), true });
out.emplace_back(FDBPerfMetric{ "Retries/s", accumulateMetric(retries), true });
}
};
const std::string SimpleWorkload::name = "SimpleWorkload";
const std::string SimpleWorkload::KEY_PREFIX = "csimple/";
} // namespace
FDBWorkloadFactoryT<SimpleWorkload> simpleWorkload(SimpleWorkload::name);

View File

@ -0,0 +1,41 @@
/*
* workloads.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "workloads.h"
FDBWorkloadFactoryImpl::~FDBWorkloadFactoryImpl() {}
std::map<std::string, IFDBWorkloadFactory*>& FDBWorkloadFactoryImpl::factories() {
static std::map<std::string, IFDBWorkloadFactory*> _factories;
return _factories;
}
std::shared_ptr<FDBWorkload> FDBWorkloadFactoryImpl::create(const std::string &name) {
auto res = factories().find(name);
if (res == factories().end()) {
return nullptr;
}
return res->second->create();
}
FDBWorkloadFactory* workloadFactory(FDBLogger*) {
static FDBWorkloadFactoryImpl impl;
return &impl;
}

View File

@ -0,0 +1,47 @@
/*
* workloads.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "foundationdb/ClientWorkload.h"
#include <map>
struct IFDBWorkloadFactory {
virtual std::shared_ptr<FDBWorkload> create() = 0;
};
struct FDBWorkloadFactoryImpl : FDBWorkloadFactory {
~FDBWorkloadFactoryImpl();
static std::map<std::string, IFDBWorkloadFactory*>& factories();
std::shared_ptr<FDBWorkload> create(const std::string& name) override;
};
template<class WorkloadType>
struct FDBWorkloadFactoryT : IFDBWorkloadFactory {
explicit FDBWorkloadFactoryT(const std::string& name) {
FDBWorkloadFactoryImpl::factories()[name] = this;
}
std::shared_ptr<FDBWorkload> create() override {
return std::make_shared<WorkloadType>();
}
};
extern "C" DLLEXPORT FDBWorkloadFactory* workloadFactory(FDBLogger*);

View File

@ -85,7 +85,7 @@ void fdb_flow_test() {
openTraceFile(NetworkAddress(), 1000000, 1000000, ".");
systemMonitor();
uncancellable(recurring(&systemMonitor, 5.0, TaskFlushTrace));
uncancellable(recurring(&systemMonitor, 5.0, TaskPriority::FlushTrace));
Future<Void> t = _test();
@ -179,7 +179,7 @@ namespace FDB {
}
void backToFutureCallback( FDBFuture* f, void* data ) {
g_network->onMainThread( Promise<Void>((SAV<Void>*)data), TaskDefaultOnMainThread ); // SOMEDAY: think about this priority
g_network->onMainThread( Promise<Void>((SAV<Void>*)data), TaskPriority::DefaultOnMainThread ); // SOMEDAY: think about this priority
}
// backToFuture<Type>( FDBFuture*, (FDBFuture* -> Type) ) -> Future<Type>

View File

@ -1550,6 +1550,7 @@ struct UnitTestsFunc : InstructionFunc {
const uint64_t retryLimit = 50;
const uint64_t noRetryLimit = -1;
const uint64_t maxRetryDelay = 100;
const uint64_t sizeLimit = 100000;
data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_LOCATION_CACHE_SIZE, Optional<StringRef>(StringRef((const uint8_t*)&locationCacheSize, 8)));
data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_MAX_WATCHES, Optional<StringRef>(StringRef((const uint8_t*)&maxWatches, 8)));
@ -1558,6 +1559,7 @@ struct UnitTestsFunc : InstructionFunc {
data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_TIMEOUT, Optional<StringRef>(StringRef((const uint8_t*)&timeout, 8)));
data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_TIMEOUT, Optional<StringRef>(StringRef((const uint8_t*)&noTimeout, 8)));
data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_MAX_RETRY_DELAY, Optional<StringRef>(StringRef((const uint8_t*)&maxRetryDelay, 8)));
data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_SIZE_LIMIT, Optional<StringRef>(StringRef((const uint8_t*)&sizeLimit, 8)));
data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_RETRY_LIMIT, Optional<StringRef>(StringRef((const uint8_t*)&retryLimit, 8)));
data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_TRANSACTION_RETRY_LIMIT, Optional<StringRef>(StringRef((const uint8_t*)&noRetryLimit, 8)));
data->db->setDatabaseOption(FDBDatabaseOption::FDB_DB_OPTION_SNAPSHOT_RYW_ENABLE);

View File

@ -277,6 +277,13 @@ func (o DatabaseOptions) SetTransactionMaxRetryDelay(param int64) error {
return o.setOpt(502, int64ToBytes(param))
}
// Set the maximum transaction size which, if exceeded, will cause the transaction to be cancelled. Default to 10,000,000 bytes.
//
// Parameter: value in bytes
func (o DatabaseOptions) SetTransactionSizeLimit(param int64) error {
return o.setOpt(503, int64ToBytes(param))
}
// Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior.
func (o DatabaseOptions) SetSnapshotRywEnable() error {
return o.setOpt(26, nil)
@ -402,6 +409,13 @@ func (o TransactionOptions) SetMaxRetryDelay(param int64) error {
return o.setOpt(502, int64ToBytes(param))
}
// Set the maximum transaction size which, if exceeded, will cause the transaction to be cancelled. Valid parameter values are ``[32, 10,000,000]```.
//
// Parameter: value in bytes
func (o TransactionOptions) SetSizeLimit(param int64) error {
return o.setOpt(503, int64ToBytes(param))
}
// Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior.
func (o TransactionOptions) SetSnapshotRywEnable() error {
return o.setOpt(600, nil)
@ -451,7 +465,7 @@ const (
// Infrequently used. The client has passed a specific row limit and wants
// that many rows delivered in a single batch. Because of iterator operation
// in client drivers make request batches transparent to the user, consider
// “WANT_ALL“ StreamingMode instead. A row limit must be specified if this
// ``WANT_ALL`` StreamingMode instead. A row limit must be specified if this
// mode is used.
StreamingModeExact StreamingMode = 1
@ -568,15 +582,15 @@ type ErrorPredicate int
const (
// Returns “true“ if the error indicates the operations in the transactions
// should be retried because of transient error.
// Returns ``true`` if the error indicates the operations in the
// transactions should be retried because of transient error.
ErrorPredicateRetryable ErrorPredicate = 50000
// Returns “true“ if the error indicates the transaction may have succeeded,
// though not in a way the system can verify.
// Returns ``true`` if the error indicates the transaction may have
// succeeded, though not in a way the system can verify.
ErrorPredicateMaybeCommitted ErrorPredicate = 50001
// Returns “true“ if the error indicates the transaction has not committed,
// though in a way that can be retried.
// Returns ``true`` if the error indicates the transaction has not
// committed, though in a way that can be retried.
ErrorPredicateRetryableNotCommitted ErrorPredicate = 50002
)

View File

@ -53,6 +53,8 @@ set(JAVA_BINDING_SRCS
src/main/com/apple/foundationdb/TransactionContext.java
src/main/com/apple/foundationdb/testing/AbstractWorkload.java
src/main/com/apple/foundationdb/testing/WorkloadContext.java
src/main/com/apple/foundationdb/testing/Promise.java
src/main/com/apple/foundationdb/testing/PerfMetric.java
src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
src/main/com/apple/foundationdb/tuple/IterableComparator.java
src/main/com/apple/foundationdb/tuple/package-info.java
@ -119,11 +121,11 @@ endif()
if(OPEN_FOR_IDE)
add_library(fdb_java OBJECT fdbJNI.cpp)
add_library(java_workloads OBJECT JavaWorkload.cpp)
else()
add_library(fdb_java SHARED fdbJNI.cpp)
add_library(java_workloads SHARED JavaWorkload.cpp)
endif()
message(DEBUG ${JNI_INCLUDE_DIRS})
message(DEBUG ${JNI_LIBRARIES})
target_include_directories(fdb_java PRIVATE ${JNI_INCLUDE_DIRS})
# libfdb_java.so is loaded by fdb-java.jar and doesn't need to depened on jvm shared libraries.
target_link_libraries(fdb_java PRIVATE fdb_c)
@ -132,6 +134,10 @@ set_target_properties(fdb_java PROPERTIES
if(APPLE)
set_target_properties(fdb_java PROPERTIES SUFFIX ".jnilib")
endif()
set_target_properties(java_workloads PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/share/foundationdb")
target_link_libraries(java_workloads PUBLIC fdb_c ${JNI_LIBRARIES})
target_include_directories(java_workloads PUBLIC ${JNI_INCLUDE_DIRS})
set(CMAKE_JAVA_COMPILE_FLAGS "-source" "1.8" "-target" "1.8")
set(CMAKE_JNI_TARGET TRUE)

View File

@ -0,0 +1,614 @@
/*
* JavaWorkload.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <foundationdb/ClientWorkload.h>
#define FDB_API_VERSION 610
#include <foundationdb/fdb_c.h>
#include <jni.h>
#include <set>
#include <iostream>
#include <boost/algorithm/string.hpp>
namespace {
// to make logging more convenient
// this should be fine as it is guarded by
// a anon namespace
auto debug = FDBSeverity::Debug;
auto info = FDBSeverity::Info;
auto error = FDBSeverity::Error;
void printTrace(JNIEnv* env, jclass, jlong logger, jint severity, jstring message, jobject details) {
auto log = reinterpret_cast<FDBLogger*>(logger);
jboolean isCopy;
const char* msg = env->GetStringUTFChars(message, &isCopy);
std::vector<std::pair<std::string, std::string>> detailsMap;
if (details != nullptr) {
jclass mapClass = env->FindClass("java/util/Map");
jclass setClass = env->FindClass("java/util/Set");
jclass iteratorClass = env->FindClass("java/util/Iterator");
jmethodID keySetID = env->GetMethodID(mapClass, "keySet", "()Ljava/util/Set;");
jobject keySet = env->CallObjectMethod(details, keySetID);
jmethodID iteratorMethodID = env->GetMethodID(setClass, "iterator", "()Ljava/util/Iterator;");
jobject iterator = env->CallObjectMethod(keySet, iteratorMethodID);
jmethodID hasNextID = env->GetMethodID(iteratorClass, "hasNext", "()Z");
jmethodID nextID = env->GetMethodID(iteratorClass, "next", "()Ljava/lang/Object;");
jmethodID getID = env->GetMethodID(mapClass, "get", "(Ljava/lang/Object;)Ljava/lang/Object;");
while (env->CallBooleanMethod(iterator, hasNextID)) {
jobject next = env->CallObjectMethod(iterator, nextID);
jstring key = jstring(next);
jstring value = jstring(env->CallObjectMethod(details, getID, next));
auto keyStr = env->GetStringUTFChars(key, nullptr);
auto keyLen = env->GetStringUTFLength(key);
auto valueStr = env->GetStringUTFChars(value, nullptr);
auto valueLen = env->GetStringUTFLength(value);
detailsMap.emplace_back(std::string(keyStr, keyLen), std::string(valueStr, valueLen));
env->ReleaseStringUTFChars(key, keyStr);
env->ReleaseStringUTFChars(value, valueStr);
env->DeleteLocalRef(key);
env->DeleteLocalRef(value);
}
}
FDBSeverity sev;
if (severity < 10) {
sev = debug;
} else if (severity < 20) {
sev = FDBSeverity::Info;
} else if (severity < 30) {
sev = FDBSeverity::Warn;
} else if (severity < 40) {
sev = FDBSeverity::WarnAlways;
}
log->trace(sev, msg, detailsMap);
if (isCopy) {
env->ReleaseStringUTFChars(message, msg);
}
}
jlong getProcessID(JNIEnv* env, jclass, jlong self) {
FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
return jlong(context->getProcessID());
}
void setProcessID(JNIEnv* env, jclass, jlong self, jlong processID) {
FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
context->setProcessID(processID);
}
jboolean getOptionBool(JNIEnv* env, jclass, jlong self, jstring name, jboolean defaultValue) {
FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
jboolean isCopy = true;
const char* utf = env->GetStringUTFChars(name, &isCopy);
auto res = jboolean(context->getOption(utf, bool(defaultValue)));
if (isCopy) {
env->ReleaseStringUTFChars(name, utf);
}
return res;
}
jlong getOptionLong(JNIEnv* env, jclass, jlong self, jstring name, jlong defaultValue) {
FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
jboolean isCopy = true;
const char* utf = env->GetStringUTFChars(name, &isCopy);
auto res = jlong(context->getOption(utf, long(defaultValue)));
if (isCopy) {
env->ReleaseStringUTFChars(name, utf);
}
return res;
}
jdouble getOptionDouble(JNIEnv* env, jclass, jlong self, jstring name, jdouble defaultValue) {
FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
jboolean isCopy = true;
const char* utf = env->GetStringUTFChars(name, &isCopy);
auto res = jdouble(context->getOption(utf, double(defaultValue)));
if (isCopy) {
env->ReleaseStringUTFChars(name, utf);
}
return res;
}
jstring getOptionString(JNIEnv* env, jclass, jlong self, jstring name, jstring defaultValue) {
FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
jboolean isCopy;
jboolean defIsCopy;
const char* nameStr = env->GetStringUTFChars(name, &isCopy);
const char* defStr = env->GetStringUTFChars(defaultValue, &defIsCopy);
auto res = context->getOption(nameStr, std::string(defStr));
if (isCopy) {
env->ReleaseStringUTFChars(name, nameStr);
}
if (defIsCopy) {
env->ReleaseStringUTFChars(defaultValue, defStr);
}
return env->NewStringUTF(res.c_str());
}
jint getClientID(JNIEnv* env, jclass, jlong self) {
FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
return jint(context->clientId());
}
jint getClientCount(JNIEnv* env, jclass, jlong self) {
FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
return jint(context->clientCount());
}
jlong getSharedRandomNumber(JNIEnv* env, jclass, jlong self) {
FDBWorkloadContext* context = reinterpret_cast<FDBWorkloadContext*>(self);
return jlong(context->sharedRandomNumber());
}
struct JavaPromise {
GenericPromise<bool> impl;
JavaPromise(GenericPromise<bool>&& promise) : impl(std::move(promise)) {}
void send(bool val) {
impl.send(val);
delete this;
}
};
void promiseSend(JNIEnv, jclass, jlong self, jboolean value) {
auto p = reinterpret_cast<JavaPromise*>(self);
p->send(bool(value));
}
struct JNIError {
JNIEnv* env;
jthrowable throwable = nullptr;
const char* file;
int line;
std::string location() const {
if (file == nullptr) {
return "UNKNOWN";
} else {
return file + std::string(":") + std::to_string(line);
}
}
std::string toString() {
if (!throwable) {
return "JNIError";
} else {
jboolean isCopy = false;
jmethodID toStringM =
env->GetMethodID(env->FindClass("java/lang/Object"), "toString", "()Ljava/lang/String;");
jstring s = (jstring)env->CallObjectMethod(throwable, toStringM);
const char* utf = env->GetStringUTFChars(s, &isCopy);
std::string res(utf);
env->ReleaseStringUTFChars(s, utf);
return res;
}
}
};
struct JVM {
FDBLogger* log;
JavaVM* jvm;
JNIEnv* env;
std::set<std::string> classPath;
bool healthy = false;
jclass throwableClass;
jclass abstractWorkloadClass = nullptr;
// this is a bit ugly - but JNINativeMethod requires
// char* not const char *
std::vector<char*> charArrays;
void checkExceptionImpl(const char* file, int line) {
if (env->ExceptionCheck()) {
throw JNIError{ env, env->ExceptionOccurred(), file, line };
}
}
#define checkException() checkExceptionImpl(__FILE__, __LINE__)
void success(int res) {
bool didThrow = env->ExceptionCheck();
if (res == JNI_ERR || didThrow) {
throw JNIError{ env, didThrow ? env->ExceptionOccurred() : nullptr };
}
}
JVM(FDBLogger* log) : log(log) {
try {
log->trace(FDBSeverity::Debug, "InitializeJVM", {});
JavaVMInitArgs args;
args.version = JNI_VERSION_1_6;
args.ignoreUnrecognized = JNI_TRUE;
args.nOptions = 0;
success(JNI_CreateJavaVM(&jvm, reinterpret_cast<void**>(&env), &args));
log->trace(debug, "JVMCreated", {});
throwableClass = env->FindClass("java/lang/Throwable");
} catch (JNIError& e) {
healthy = false;
env->ExceptionClear();
}
}
~JVM() {
log->trace(debug, "JVMDestruct", {});
if (jvm) {
jvm->DestroyJavaVM();
}
for (auto& a : charArrays) {
delete[] a;
}
log->trace(debug, "JVMDestructDone", {});
}
void setNativeMethods(jclass clazz,
const std::initializer_list<std::tuple<std::string_view, std::string_view, void*>>& methods) {
charArrays.reserve(charArrays.size() + 2 * methods.size());
std::unique_ptr<JNINativeMethod[]> nativeMethods;
int numNativeMethods = methods.size();
nativeMethods.reset(new JNINativeMethod[numNativeMethods]);
int i = 0;
for (const auto& t : methods) {
auto& w = nativeMethods[i];
auto nameStr = std::get<0>(t);
auto sigStr = std::get<1>(t);
charArrays.push_back(new char[nameStr.size() + 1]);
char* name = charArrays.back();
charArrays.push_back(new char[sigStr.size() + 1]);
char* sig = charArrays.back();
memcpy(name, nameStr.data(), nameStr.size());
memcpy(sig, sigStr.data(), sigStr.size());
name[nameStr.size()] = '\0';
sig[sigStr.size()] = '\0';
w.name = name;
w.signature = sig;
w.fnPtr = std::get<2>(t);
log->trace(info, "PreparedNativeMethod",
{ { "Name", w.name },
{ "Signature", w.signature },
{ "Ptr", std::to_string(reinterpret_cast<uintptr_t>(w.fnPtr)) } });
++i;
}
env->RegisterNatives(clazz, nativeMethods.get(), numNativeMethods);
checkException();
}
jclass getClassImpl(const char* file, int line, const char* name) {
auto res = env->FindClass(name);
checkExceptionImpl(file, line);
return res;
}
#define getClass(name) getClassImpl(__FILE__, __LINE__, name)
jmethodID getMethodImpl(const char* file, int line, jclass clazz, const char* name, const char* signature) {
auto res = env->GetMethodID(clazz, name, signature);
checkExceptionImpl(file, line);
return res;
}
#define getMethod(clazz, name, signature) getMethodImpl(__FILE__, __LINE__, clazz, name, signature)
jfieldID getFieldImpl(const char* file, int line, jclass clazz, const char* name, const char* signature) {
auto res = env->GetFieldID(clazz, name, signature);
checkException();
return res;
}
#define getField(clazz, name, signature) getFieldImpl(__FILE__, __LINE__, clazz, name, signature)
void addToClassPath(const std::string& path) {
log->trace(info, "TryAddToClassPath", { { "Path", path } });
if (!env) {
throw JNIError{};
}
if (classPath.count(path) > 0) {
// already added
return;
}
auto p = env->NewStringUTF(path.c_str());
checkException();
auto fileClass = getClass("java/io/File");
auto file = env->NewObject(fileClass, getMethod(fileClass, "<init>", "(Ljava/lang/String;)V"), p);
checkException();
auto uri = env->CallObjectMethod(file, env->GetMethodID(fileClass, "toURI", "()Ljava/net/URI;"));
checkException();
auto uriClass = getClass("java/net/URI");
auto url = env->CallObjectMethod(uri, getMethod(uriClass, "toURL", "()Ljava/net/URL;"));
checkException();
auto classLoaderClass = getClass("java/lang/ClassLoader");
auto sysLoaderMethod =
env->GetStaticMethodID(classLoaderClass, "getSystemClassLoader", "()Ljava/lang/ClassLoader;");
checkException();
auto classLoader = env->CallStaticObjectMethod(classLoaderClass, sysLoaderMethod);
checkException();
auto urlLoaderClass = getClass("java/net/URLClassLoader");
env->CallVoidMethod(classLoader, getMethod(urlLoaderClass, "addURL", "(Ljava/net/URL;)V"), url);
env->DeleteLocalRef(classLoader);
checkException();
}
void init() {
if (abstractWorkloadClass != nullptr) {
return;
}
abstractWorkloadClass = getClass("com/apple/foundationdb/testing/AbstractWorkload");
setNativeMethods(abstractWorkloadClass,
{ { "log", "(JILjava/lang/String;Ljava/util/Map;)V", reinterpret_cast<void*>(&printTrace) } });
auto loggerField = env->GetStaticFieldID(abstractWorkloadClass, "logger", "J");
checkException();
env->SetStaticLongField(abstractWorkloadClass, loggerField, reinterpret_cast<jlong>(log));
log->trace(info, "SetLogger", { { "Logger", std::to_string(reinterpret_cast<jlong>(log)) } });
setNativeMethods(getClass("com/apple/foundationdb/testing/WorkloadContext"),
{ { "getProcessID", "(J)J", reinterpret_cast<void*>(&getProcessID) },
{ "setProcessID", "(JJ)V", reinterpret_cast<void*>(&setProcessID) },
{ "getOption", "(JLjava/lang/String;Z)Z", reinterpret_cast<void*>(&getOptionBool) },
{ "getOption", "(JLjava/lang/String;J)J", reinterpret_cast<void*>(&getOptionLong) },
{ "getOption", "(JLjava/lang/String;D)D", reinterpret_cast<void*>(&getOptionDouble) },
{ "getOption", "(JLjava/lang/String;Ljava/lang/String;)Ljava/lang/String;",
reinterpret_cast<void*>(&getOptionString) },
{ "getClientID", "(J)I", reinterpret_cast<void*>(&getClientID) },
{ "getClientCount", "(J)I", reinterpret_cast<void*>(&getClientCount) },
{ "getSharedRandomNumber", "(J)J", reinterpret_cast<void*>(&getSharedRandomNumber) } });
setNativeMethods(getClass("com/apple/foundationdb/testing/Promise"),
{ { "send", "(JZ)V", reinterpret_cast<void*>(&promiseSend) } });
auto fdbClass = getClass("com/apple/foundationdb/FDB");
jmethodID selectMethod =
env->GetStaticMethodID(fdbClass, "selectAPIVersion", "(IZ)Lcom/apple/foundationdb/FDB;");
checkException();
env->CallStaticObjectMethod(fdbClass, selectMethod, jint(610), jboolean(false));
checkException();
}
jobject createWorkloadContext(FDBWorkloadContext* context) {
auto clazz = getClass("com/apple/foundationdb/testing/WorkloadContext");
auto constructor = getMethod(clazz, "<init>", "(J)V");
auto jContext = reinterpret_cast<jlong>(context);
jobject res = env->NewObject(clazz, constructor, jContext);
std::cout.flush();
checkException();
auto field = env->GetFieldID(clazz, "impl", "J");
checkException();
auto impl = env->GetLongField(res, field);
checkException();
if (impl != jContext) {
log->trace(error, "ContextNotCorrect",
{ { "Expected", std::to_string(jContext) }, { "Impl", std::to_string(impl) } });
std::terminate();
}
return res;
}
jobject createWorkload(jobject context, const std::string& workloadName) {
auto clazz = getClass(workloadName.c_str());
if (!env->IsAssignableFrom(clazz, abstractWorkloadClass)) {
log->trace(error, "ClassNotAWorkload", { { "Class", workloadName } });
return nullptr;
}
auto constructor = getMethod(clazz, "<init>", "(Lcom/apple/foundationdb/testing/WorkloadContext;)V");
auto res = env->NewObject(clazz, constructor, context);
checkException();
env->NewGlobalRef(res);
return res;
}
jobject createPromise(GenericPromise<bool>&& promise) {
auto p = std::make_unique<JavaPromise>(std::move(promise));
auto clazz = getClass("com/apple/foundationdb/testing/Promise");
auto res = env->NewObject(clazz, getMethod(clazz, "<init>", "(J)V"), reinterpret_cast<jlong>(p.get()));
checkException();
p.release();
return res;
}
void shutdownWorkload(jobject workload, const std::string& workloadName) {
auto clazz = getClass(workloadName.c_str());
env->CallVoidMethod(workload, getMethod(clazz, "shutdown", "()V"));
checkException();
}
std::string jtoStr(jstring str) {
jboolean isCopy;
auto arr = env->GetStringUTFChars(str, &isCopy);
std::string res(arr);
if (isCopy) {
env->ReleaseStringUTFChars(str, arr);
}
return res;
}
void getMetrics(jobject workload, const std::string& workloadName, std::vector<FDBPerfMetric>& result) {
auto clazz = getClass(workloadName.c_str());
auto perfMetricClass = getClass("Lcom/apple/foundationdb/testing/PerfMetric;");
auto nameId = getField(perfMetricClass, "name", "Ljava/lang/String;");
auto valueId = getField(perfMetricClass, "value", "D");
auto averagedId = getField(perfMetricClass, "averaged", "Z");
auto formatCodeId = getField(perfMetricClass, "formatCode", "Ljava/lang/String;");
auto v = env->CallObjectMethod(workload, getMethod(clazz, "getMetrics", "()Ljava/util/List;"));
checkException();
auto listClass = getClass("java/util/List");
auto iter = env->CallObjectMethod(v, getMethod(listClass, "iterator", "()Ljava/util/Iterator;"));
checkException();
auto iterClass = getClass("java/util/Iterator");
auto hasNextM = getMethod(iterClass, "hasNext", "()Z");
auto nextM = getMethod(iterClass, "next", "()Ljava/lang/Object;");
jboolean hasNext = env->CallBooleanMethod(iter, hasNextM);
checkException();
while (hasNext) {
auto perfMetric = env->CallObjectMethod(iter, nextM);
checkException();
auto name = jtoStr(jstring(env->GetObjectField(perfMetric, nameId)));
checkException();
auto value = env->GetDoubleField(perfMetric, valueId);
checkException();
auto averaged = env->GetBooleanField(perfMetric, averagedId);
checkException();
auto formatCode = jtoStr(jstring(env->GetObjectField(perfMetric, formatCodeId)));
result.emplace_back(FDBPerfMetric{ name, value, bool(averaged), formatCode });
hasNext = env->CallBooleanMethod(iter, hasNextM);
checkException();
}
return;
}
jobject createDatabase(jobject workload, FDBDatabase* db) {
auto executor =
env->CallObjectMethod(workload, getMethod(getClass("com/apple/foundationdb/testing/AbstractWorkload"),
"getExecutor", "()Ljava/util/concurrent/Executor;"));
auto databaseClass = getClass("com/apple/foundationdb/FDBDatabase");
jlong databasePtr = reinterpret_cast<jlong>(db);
jobject javaDatabase =
env->NewObject(databaseClass, getMethod(databaseClass, "<init>", "(JLjava/util/concurrent/Executor;)V"),
databasePtr, executor);
env->DeleteLocalRef(executor);
return javaDatabase;
}
void callWorkload(jobject workload, FDBDatabase* db, const char* method, GenericPromise<bool>&& promise) {
jobject jPromise = nullptr;
try {
auto clazz = getClass("com/apple/foundationdb/testing/AbstractWorkload");
auto jdb = createDatabase(workload, db);
jPromise = createPromise(std::move(promise));
env->CallVoidMethod(
workload,
getMethod(clazz, method,
"(Lcom/apple/foundationdb/Database;Lcom/apple/foundationdb/testing/Promise;)V"),
jdb, jPromise);
env->DeleteLocalRef(jdb);
env->DeleteLocalRef(jPromise);
jPromise = nullptr;
checkException();
} catch (...) {
if (jPromise) {
env->DeleteLocalRef(jPromise);
}
throw;
}
}
};
struct JavaWorkload : FDBWorkload {
std::shared_ptr<JVM> jvm;
FDBLogger& log;
FDBWorkloadContext* context = nullptr;
std::string name;
bool failed = false;
jobject workload = nullptr;
JavaWorkload(const std::shared_ptr<JVM>& jvm, FDBLogger& log, const std::string& name)
: jvm(jvm), log(log), name(name) {
boost::replace_all(this->name, ".", "/");
}
~JavaWorkload() {
if (workload) {
try {
jvm->shutdownWorkload(workload, name);
jvm->env->DeleteGlobalRef(workload);
} catch (JNIError& e) {
log.trace(error, "JNIShutDownUnsucessful", { { "Error", e.toString() }, { "Location", e.location() } });
}
}
}
std::string description() const override { return name; }
bool init(FDBWorkloadContext* context) override {
this->context = context;
try {
std::string classPath = context->getOption("classPath", std::string(""));
std::vector<std::string> paths;
boost::split(paths, classPath, boost::is_any_of(";,"), boost::token_compress_on);
for (const auto& path : paths) {
jvm->addToClassPath(path);
}
jvm->init();
jobject jContext = jvm->createWorkloadContext(context);
if (jContext == nullptr) {
failed = true;
return failed;
}
workload = jvm->createWorkload(jContext, name);
} catch (JNIError& e) {
failed = true;
log.trace(error, "JNIError", { { "Location", e.location() }, { "Error", e.toString() } });
}
return failed;
};
void setup(FDBDatabase* db, GenericPromise<bool> done) override {
if (failed) {
done.send(false);
return;
}
try {
jvm->callWorkload(workload, db, "setup", std::move(done));
} catch (JNIError& e) {
failed = true;
log.trace(error, "SetupFailedWithJNIError", { { "Error", e.toString() }, { "Location", e.location() } });
}
}
void start(FDBDatabase* db, GenericPromise<bool> done) override {
if (failed) {
done.send(false);
return;
}
try {
jvm->callWorkload(workload, db, "start", std::move(done));
} catch (JNIError& e) {
failed = true;
log.trace(error, "StartFailedWithJNIError", { { "Error", e.toString() }, { "Location", e.location() } });
}
}
void check(FDBDatabase* db, GenericPromise<bool> done) override {
if (failed) {
done.send(false);
return;
}
try {
jvm->callWorkload(workload, db, "check", std::move(done));
} catch (JNIError& e) {
failed = true;
log.trace(error, "CheckFailedWithJNIError", { { "Error", e.toString() }, { "Location", e.location() } });
}
}
void getMetrics(std::vector<FDBPerfMetric>& out) const override {
jvm->getMetrics(workload, name, out);
}
};
struct JavaWorkloadFactory : FDBWorkloadFactory {
FDBLogger* log;
std::weak_ptr<JVM> jvm;
JavaWorkloadFactory(FDBLogger* log) : log(log) {}
JavaWorkloadFactory(const JavaWorkloadFactory&) = delete;
JavaWorkloadFactory& operator=(const JavaWorkloadFactory&) = delete;
std::shared_ptr<FDBWorkload> create(const std::string& name) override {
auto jvmPtr = jvm.lock();
if (!jvmPtr) {
jvmPtr = std::make_shared<JVM>(log);
jvm = jvmPtr;
}
return std::make_shared<JavaWorkload>(jvmPtr, *log, name);
}
};
} // namespace
extern "C" DLLEXPORT FDBWorkloadFactory* workloadFactory(FDBLogger* logger);
FDBWorkloadFactory* workloadFactory(FDBLogger* logger) {
static JavaWorkloadFactory factory(logger);
return &factory;
}

View File

@ -22,11 +22,8 @@ package com.apple.foundationdb.testing;
import com.apple.foundationdb.Database;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Method;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.List;
import java.util.ArrayList;
import java.util.concurrent.Executor;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
@ -35,94 +32,45 @@ import java.util.concurrent.SynchronousQueue;
import java.util.Map;
public abstract class AbstractWorkload {
private static final Class<?>[] parameters = new Class<?>[]{URL.class};
protected WorkloadContext context;
private ThreadPoolExecutor executorService;
public AbstractWorkload(WorkloadContext context) {
this.context = context;
long contextID = context.getProcessID();
executorService =
new ThreadPoolExecutor(1, 2,
10, TimeUnit.SECONDS,
new SynchronousQueue<>()) {
@Override
protected void beforeExecute(Thread t, Runnable r) {
setProcessID(context.getProcessID());
context.setProcessID(contextID);
super.beforeExecute(t, r);
}
};
}
private Executor getExecutor() {
protected Executor getExecutor() {
return executorService;
}
public abstract void setup(Database db);
public abstract void start(Database db);
public abstract boolean check(Database db);
public double getCheckTimeout() {
protected abstract void setup(Database db, Promise promise);
protected abstract void start(Database db, Promise promise);
protected abstract void check(Database db, Promise promise);
protected List<PerfMetric> getMetrics() {
return new ArrayList<PerfMetric>();
}
protected double getCheckTimeout() {
return 3000;
}
private void setup(Database db, long voidCallback) {
AbstractWorkload self = this;
getExecutor().execute(new Runnable(){
public void run() {
self.setup(db);
self.sendVoid(voidCallback);
}
});
}
private void start(Database db, long voidCallback) {
AbstractWorkload self = this;
getExecutor().execute(new Runnable(){
public void run() {
self.start(db);
self.sendVoid(voidCallback);
}
});
}
private void check(Database db, long boolCallback) {
AbstractWorkload self = this;
getExecutor().execute(new Runnable(){
public void run() {
boolean res = self.check(db);
self.sendBool(boolCallback, res);
}
});
}
private void shutdown() {
executorService.shutdown();
}
public native void log(int severity, String message, Map<String, String> details);
private native void setProcessID(long processID);
private native void sendVoid(long handle);
private native void sendBool(long handle, boolean value);
// Helper functions to add to the class path at Runtime - will be called
// from C++
private static void addFile(String s) throws IOException {
File f = new File(s);
addFile(f);
}
private static void addFile(File f) throws IOException {
addURL(f.toURI().toURL());
}
private static void addURL(URL u) throws IOException {
URLClassLoader sysLoader = (URLClassLoader) ClassLoader.getSystemClassLoader();
Class<URLClassLoader> sysClass = URLClassLoader.class;
try {
Method method = sysClass.getDeclaredMethod("addURL", parameters);
method.setAccessible(true);
method.invoke(sysLoader, new Object[]{u});
} catch (Throwable t) {
t.printStackTrace();
throw new IOException("Error, could not add URL to system classloader");
}
private static long logger;
public static void log(int severity, String message, Map<String, String> details) {
log(logger, severity, message, details);
}
private static native void log(long logger, int severity, String message, Map<String, String> details);
}

View File

@ -0,0 +1,69 @@
/*
* PerfMetric.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb.testing;
public class PerfMetric {
private String name;
private double value;
private boolean averaged;
private String formatCode;
public PerfMetric(String name, double value) {
this(name, value, true, "0.3g");
}
public PerfMetric(String name, double value, boolean averaged) {
this(name, value, averaged, "0.3g");
}
public PerfMetric(String name, double value, boolean averaged, String formatCode) {
this.name = name;
this.value = value;
this.averaged = averaged;
this.formatCode = formatCode;
}
public String getName() {
return name;
}
public double getValue() {
return value;
}
public boolean isAveraged() {
return averaged;
}
public String getFormatCode() {
return formatCode;
}
public void setName(String name) {
this.name = name;
}
public void setValue(double value) {
this.value = value;
}
public void setAveraged(boolean averaged) {
this.averaged = averaged;
}
public void setFormatCode(String formatCode) {
this.formatCode = formatCode;
}
}

View File

@ -0,0 +1,44 @@
/*
* Promise.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.apple.foundationdb.testing;
public class Promise {
private long nativePromise;
private boolean wasSet;
private static native void send(long self, boolean value);
private Promise(long nativePromise) {
this.nativePromise = nativePromise;
this.wasSet = false;
}
public boolean canBeSet() {
return !wasSet;
}
public void send(boolean value) {
if (wasSet) {
throw new IllegalStateException("Promise was already set");
}
wasSet = true;
send(nativePromise, value);
}
}

View File

@ -23,39 +23,54 @@ package com.apple.foundationdb.testing;
import java.util.Map;
public class WorkloadContext {
private Map<String, String> options;
private int clientId, clientCount;
long sharedRandomNumber, processID;
long impl;
public WorkloadContext(Map<String, String> options, int clientId, int clientCount, long sharedRandomNumber, long processID)
private WorkloadContext(long impl)
{
this.options = options;
this.clientId = clientId;
this.clientCount = clientCount;
this.sharedRandomNumber = sharedRandomNumber;
this.processID = processID;
}
public String getOption(String name, String defaultValue) {
if (options.containsKey(name)) {
return options.get(name);
}
return defaultValue;
}
public int getClientId() {
return clientId;
}
public int getClientCount() {
return clientCount;
}
public long getSharedRandomNumber() {
return sharedRandomNumber;
this.impl = impl;
}
public long getProcessID() {
return processID;
return getProcessID(impl);
}
public void setProcessID(long processID) {
setProcessID(impl, processID);
}
public int getClientID() {
return getClientID(impl);
}
public int getClientCount() {
return getClientCount(impl);
}
public long getSharedRandomNumber() {
return getSharedRandomNumber(impl);
}
public String getOption(String name, String defaultValue) {
return getOption(impl, name, defaultValue);
}
public long getOption(String name, long defaultValue) {
return getOption(impl, name, defaultValue);
}
public boolean getOption(String name, boolean defaultValue) {
return getOption(impl, name, defaultValue);
}
public double getOption(String name, double defaultValue) {
return getOption(impl, name, defaultValue);
}
private static native long getProcessID(long self);
private static native void setProcessID(long self, long processID);
private static native boolean getOption(long impl, String name, boolean defaultValue);
private static native long getOption(long impl, String name, long defaultValue);
private static native double getOption(long impl, String name, double defaultValue);
private static native String getOption(long impl, String name, String defaultValue);
private static native int getClientID(long self);
private static native int getClientCount(long self);
private static native long getSharedRandomNumber(long self);
}

View File

@ -0,0 +1,64 @@
#!/usr/bin/python
#
# size_limit.py
#
# This source file is part of the FoundationDB open source project
#
# Copyright 2013-2019 Apple Inc. and the FoundationDB project authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import fdb
import sys
fdb.api_version(610)
@fdb.transactional
def setValue(tr, key, value):
tr[key] = value
@fdb.transactional
def setValueWithLimit(tr, key, value, limit):
tr.options.set_size_limit(limit)
tr[key] = value
def run(clusterFile):
db = fdb.open(clusterFile)
db.options.set_transaction_timeout(2000) # 2 seconds
db.options.set_transaction_retry_limit(3)
value = 'a' * 1024
setValue(db, 't1', value)
assert(value == db['t1'])
try:
db.options.set_transaction_size_limit(1000)
setValue(db, 't2', value)
assert(False) # not reached
except fdb.impl.FDBError as e:
assert(e.code == 2101) # Transaction exceeds byte limit (2101)
# Per transaction option overrides database option
db.options.set_transaction_size_limit(1000000)
try:
setValueWithLimit(db, 't3', value, 1000)
assert(False) # not reached
except fdb.impl.FDBError as e:
assert(e.code == 2101) # Transaction exceeds byte limit (2101)
# Expect a cluster file as input. This test will write to the FDB cluster, so
# be aware of potential side effects.
if __name__ == '__main__':
clusterFile = sys.argv[1]
run(clusterFile)

View File

@ -133,6 +133,7 @@ def test_db_options(db):
db.options.set_transaction_timeout(0)
db.options.set_transaction_timeout(0)
db.options.set_transaction_max_retry_delay(100)
db.options.set_transaction_size_limit(100000)
db.options.set_transaction_retry_limit(10)
db.options.set_transaction_retry_limit(-1)
db.options.set_snapshot_ryw_enable()

View File

@ -459,6 +459,7 @@ class Tester
@db.options.set_transaction_timeout(100000)
@db.options.set_transaction_timeout(0)
@db.options.set_transaction_max_retry_delay(100)
@db.options.set_transaction_size_limit(100000)
@db.options.set_transaction_retry_limit(10)
@db.options.set_transaction_retry_limit(-1)
@db.options.set_snapshot_ryw_enable()

View File

@ -1,6 +1,6 @@
FROM centos:6
LABEL version=0.1.5
ENV DOCKER_IMAGEVER=0.1.5
LABEL version=0.1.6
ENV DOCKER_IMAGEVER=0.1.6
# Install dependencies for developer tools, bindings,\
# documentation, actorcompiler, and packaging tools\
@ -10,7 +10,7 @@ RUN yum install -y yum-utils &&\
yum -y install devtoolset-8 java-1.8.0-openjdk-devel \
rh-python36-python-devel devtoolset-8-valgrind-devel \
mono-core rh-ruby24 golang python27 rpm-build debbuild \
python-pip npm dos2unix valgrind-devel ccache &&\
python-pip npm dos2unix valgrind-devel ccache distcc &&\
pip install boto3==1.1.1
USER root
@ -42,4 +42,6 @@ RUN curl -L https://ftp.openbsd.org/pub/OpenBSD/LibreSSL/libressl-2.8.2.tar.gz >
rm -rf /tmp/libressl-2.8.2 /tmp/libressl.tar.gz
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++
CMD scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash

View File

@ -0,0 +1,5 @@
cmake_minimum_required(VERSION 2.8.0)
project(fdb_c_app C)
find_package(FoundationDB-Client REQUIRED)
add_executable(app app.c)
target_link_libraries(app PRIVATE fdb_c)

View File

@ -0,0 +1,7 @@
#define FDB_API_VERSION 610
#include <foundationdb/fdb_c.h>
int main(int argc, char* argv[]) {
fdb_select_api_version(610);
return 0;
}

View File

@ -6,6 +6,10 @@ then
source ${source_dir}/modules/util.sh
install_build_tools() {
apt-get -y install cmake gcc
}
install() {
local __res=0
enterfun

View File

@ -8,6 +8,10 @@ then
conf_save_extension=".rpmsave"
install_build_tools() {
yum -y install cmake gcc
}
install() {
local __res=0
enterfun

View File

@ -67,6 +67,17 @@ then
popd
python -c 'import fdb; fdb.api_version(610)'
successOr "Loading python bindings failed"
# Test cmake and pkg-config integration: https://github.com/apple/foundationdb/issues/1483
install_build_tools
rm -rf build-fdb_c_app
mkdir build-fdb_c_app
pushd build-fdb_c_app
cmake /foundationdb/build/cmake/package_tester/fdb_c_app && make
successOr "FoundationDB-Client cmake integration failed"
cc /foundationdb/build/cmake/package_tester/fdb_c_app/app.c `pkg-config --libs --cflags foundationdb-client`
successOr "FoundationDB-Client pkg-config integration failed"
popd
}
keep_config() {

View File

@ -2,7 +2,7 @@ version: "3"
services:
common: &common
image: foundationdb/foundationdb-build:0.1.5
image: foundationdb/foundationdb-build:0.1.6
build-setup: &build-setup
<<: *common
@ -60,7 +60,7 @@ services:
snapshot-cmake: &snapshot-cmake
<<: *build-setup
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DFDB_RELEASE=0 -DVALGRIND=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" packages preinstall && cpack'
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=0 -DVALGRIND=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" packages preinstall && cpack'
prb-cmake:
<<: *snapshot-cmake
@ -68,7 +68,7 @@ services:
snapshot-ctest: &snapshot-ctest
<<: *build-setup
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'
prb-ctest:
<<: *snapshot-ctest
@ -76,7 +76,7 @@ services:
snapshot-correctness: &snapshot-correctness
<<: *build-setup
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && make -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure
prb-correctness:
<<: *snapshot-correctness

View File

@ -123,6 +123,8 @@ function(add_fdb_test)
${ADD_FDB_TEST_TEST_FILES}
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
get_filename_component(test_dir_full ${first_file} DIRECTORY)
if(NOT ${test_dir_full} STREQUAL "")
get_filename_component(test_dir ${test_dir_full} NAME)
set_tests_properties(${test_name} PROPERTIES TIMEOUT ${this_test_timeout} LABELS "${test_dir}")
endif()
endfunction()

1
cmake/Config.cmake.in Normal file
View File

@ -0,0 +1 @@
include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake")

View File

@ -161,7 +161,6 @@ else()
-Wno-deprecated
-fvisibility=hidden
-Wreturn-type
-fdiagnostics-color=always
-fPIC)
if (GPERFTOOLS_FOUND AND GCC)
add_compile_options(

View File

@ -180,12 +180,12 @@ function(add_flow_target)
list(APPEND generated_files ${CMAKE_CURRENT_BINARY_DIR}/${generated})
if(WIN32)
add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${generated}"
COMMAND $<TARGET_FILE:actorcompiler> "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags} ${actor_compiler_flags}
COMMAND $<TARGET_FILE:actorcompiler> "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags}
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" actorcompiler
COMMENT "Compile actor: ${src}")
else()
add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${generated}"
COMMAND ${MONO_EXECUTABLE} ${actor_exe} "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags} ${actor_compiler_flags} > /dev/null
COMMAND ${MONO_EXECUTABLE} ${actor_exe} "${CMAKE_CURRENT_SOURCE_DIR}/${src}" "${CMAKE_CURRENT_BINARY_DIR}/${generated}" ${actor_compiler_flags} > /dev/null
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${src}" actorcompiler
COMMENT "Compile actor: ${src}")
endif()

View File

@ -79,70 +79,110 @@ function(install_symlink)
endif()
endfunction()
# 'map' from (destination, package) to path
# format vars like install_destination_for_${destination}_${package}
set(install_destination_for_bin_tgz "bin")
set(install_destination_for_bin_deb "usr/bin")
set(install_destination_for_bin_el6 "usr/bin")
set(install_destination_for_bin_el7 "usr/bin")
set(install_destination_for_bin_pm "usr/local/bin")
set(install_destination_for_sbin_tgz "sbin")
set(install_destination_for_sbin_deb "usr/sbin")
set(install_destination_for_sbin_el6 "usr/sbin")
set(install_destination_for_sbin_el7 "usr/sbin")
set(install_destination_for_sbin_pm "usr/local/libexec")
set(install_destination_for_lib_tgz "lib")
set(install_destination_for_lib_deb "usr/lib")
set(install_destination_for_lib_el6 "usr/lib64")
set(install_destination_for_lib_el7 "usr/lib64")
set(install_destination_for_lib_pm "lib")
set(install_destination_for_fdbmonitor_tgz "libexec")
set(install_destination_for_fdbmonitor_deb "usr/lib/foundationdb")
set(install_destination_for_fdbmonitor_el6 "usr/lib/foundationdb")
set(install_destination_for_fdbmonitor_el7 "usr/lib/foundationdb")
set(install_destination_for_fdbmonitor_pm "usr/local/libexec")
set(install_destination_for_include_tgz "include")
set(install_destination_for_include_deb "usr/include")
set(install_destination_for_include_el6 "usr/include")
set(install_destination_for_include_el7 "usr/include")
set(install_destination_for_include_pm "usr/local/include")
set(install_destination_for_etc_tgz "etc/foundationdb")
set(install_destination_for_etc_deb "etc/foundationdb")
set(install_destination_for_etc_el6 "etc/foundationdb")
set(install_destination_for_etc_el7 "etc/foundationdb")
set(install_destination_for_etc_pm "usr/local/etc/foundationdb")
set(install_destination_for_log_tgz "log/foundationdb")
set(install_destination_for_log_deb "var/log/foundationdb")
set(install_destination_for_log_el6 "var/log/foundationdb")
set(install_destination_for_log_el7 "var/log/foundationdb")
set(install_destination_for_log_pm "")
set(install_destination_for_data_tgz "lib/foundationdb")
set(install_destination_for_data_deb "var/lib/foundationdb")
set(install_destination_for_data_el6 "var/lib/foundationdb")
set(install_destination_for_data_el7 "var/lib/foundationdb")
set(install_destination_for_data_pm "")
set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
function(fdb_configure_and_install)
if(NOT WIN32 AND NOT OPEN_FOR_IDE)
set(one_value_options COMPONENT DESTINATION FILE DESTINATION_SUFFIX)
cmake_parse_arguments(IN "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}")
foreach(package tgz deb el6 el7 pm)
set(INCLUDE_DIR "${install_destination_for_include_${package}}")
set(LIB_DIR "${install_destination_for_lib_${package}}")
set(install_path "${install_destination_for_${IN_DESTINATION}_${package}}")
string(REGEX REPLACE "\.in$" "" name "${IN_FILE}")
get_filename_component(name "${name}" NAME)
set(generated_file_name "${generated_dir}/${package}/${name}")
configure_file("${IN_FILE}" "${generated_file_name}" @ONLY)
install(
FILES "${generated_file_name}"
DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}"
COMPONENT "${IN_COMPONENT}-${package}")
endforeach()
endif()
endfunction()
function(fdb_install)
if(NOT WIN32 AND NOT OPEN_FOR_IDE)
set(one_value_options COMPONENT DESTINATION)
set(one_value_options COMPONENT DESTINATION EXPORT DESTINATION_SUFFIX)
set(multi_value_options TARGETS FILES DIRECTORY)
cmake_parse_arguments(IN "${options}" "${one_value_options}" "${multi_value_options}" "${ARGN}")
set(install_export 0)
if(IN_TARGETS)
set(args TARGETS ${IN_TARGETS})
elseif(IN_FILES)
set(args FILES ${IN_FILES})
elseif(IN_DIRECTORY)
set(args DIRECTORY ${IN_DIRECTORY})
elseif(IN_EXPORT)
set(install_export 1)
else()
message(FATAL_ERROR "Expected FILES or TARGETS")
endif()
if("${IN_DESTINATION}" STREQUAL "bin")
install(${args} DESTINATION "bin" COMPONENT "${IN_COMPONENT}-tgz")
install(${args} DESTINATION "usr/bin" COMPONENT "${IN_COMPONENT}-deb")
install(${args} DESTINATION "usr/bin" COMPONENT "${IN_COMPONENT}-el6")
install(${args} DESTINATION "usr/bin" COMPONENT "${IN_COMPONENT}-el7")
install(${args} DESTINATION "usr/local/bin" COMPONENT "${IN_COMPONENT}-pm")
elseif("${IN_DESTINATION}" STREQUAL "sbin")
install(${args} DESTINATION "sbin" COMPONENT "${IN_COMPONENT}-tgz")
install(${args} DESTINATION "usr/sbin" COMPONENT "${IN_COMPONENT}-deb")
install(${args} DESTINATION "usr/sbin" COMPONENT "${IN_COMPONENT}-el6")
install(${args} DESTINATION "usr/sbin" COMPONENT "${IN_COMPONENT}-el7")
install(${args} DESTINATION "usr/local/libexec" COMPONENT "${IN_COMPONENT}-pm")
elseif("${IN_DESTINATION}" STREQUAL "lib")
install(${args} DESTINATION "lib" COMPONENT "${IN_COMPONENT}-tgz")
install(${args} DESTINATION "usr/lib" COMPONENT "${IN_COMPONENT}-deb")
install(${args} DESTINATION "usr/lib64" COMPONENT "${IN_COMPONENT}-el6")
install(${args} DESTINATION "usr/lib64" COMPONENT "${IN_COMPONENT}-el7")
install(${args} DESTINATION "lib" COMPONENT "${IN_COMPONENT}-pm")
elseif("${IN_DESTINATION}" STREQUAL "fdbmonitor")
install(${args} DESTINATION "libexec" COMPONENT "${IN_COMPONENT}-tgz")
install(${args} DESTINATION "usr/lib/foundationdb" COMPONENT "${IN_COMPONENT}-deb")
install(${args} DESTINATION "usr/lib/foundationdb" COMPONENT "${IN_COMPONENT}-el6")
install(${args} DESTINATION "usr/lib/foundationdb" COMPONENT "${IN_COMPONENT}-el7")
install(${args} DESTINATION "usr/local/libexec" COMPONENT "${IN_COMPONENT}-pm")
elseif("${IN_DESTINATION}" STREQUAL "include")
install(${args} DESTINATION "include" COMPONENT "${IN_COMPONENT}-tgz")
install(${args} DESTINATION "usr/include" COMPONENT "${IN_COMPONENT}-deb")
install(${args} DESTINATION "usr/include" COMPONENT "${IN_COMPONENT}-el6")
install(${args} DESTINATION "usr/include" COMPONENT "${IN_COMPONENT}-el7")
install(${args} DESTINATION "usr/local/include" COMPONENT "${IN_COMPONENT}-pm")
elseif("${IN_DESTINATION}" STREQUAL "etc")
install(${args} DESTINATION "etc/foundationdb" COMPONENT "${IN_COMPONENT}-tgz")
install(${args} DESTINATION "etc/foundationdb" COMPONENT "${IN_COMPONENT}-deb")
install(${args} DESTINATION "etc/foundationdb" COMPONENT "${IN_COMPONENT}-el6")
install(${args} DESTINATION "etc/foundationdb" COMPONENT "${IN_COMPONENT}-el7")
install(${args} DESTINATION "usr/local/etc/foundationdb" COMPONENT "${IN_COMPONENT}-pm")
elseif("${IN_DESTINATION}" STREQUAL "log")
install(${args} DESTINATION "log/foundationdb" COMPONENT "${IN_COMPONENT}-tgz")
install(${args} DESTINATION "var/log/foundationdb" COMPONENT "${IN_COMPONENT}-deb")
install(${args} DESTINATION "var/log/foundationdb" COMPONENT "${IN_COMPONENT}-el6")
install(${args} DESTINATION "var/log/foundationdb" COMPONENT "${IN_COMPONENT}-el7")
elseif("${IN_DESTINATION}" STREQUAL "data")
install(${args} DESTINATION "lib/foundationdb" COMPONENT "${IN_COMPONENT}-tgz")
install(${args} DESTINATION "var/lib/foundationdb/data" COMPONENT "${IN_COMPONENT}-deb")
install(${args} DESTINATION "var/lib/foundationdb/data" COMPONENT "${IN_COMPONENT}-el6")
install(${args} DESTINATION "var/lib/foundationdb/data" COMPONENT "${IN_COMPONENT}-el7")
foreach(package tgz deb el6 el7 pm)
set(install_path "${install_destination_for_${IN_DESTINATION}_${package}}")
if(install_export)
install(
EXPORT "${IN_EXPORT}-${package}"
DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}"
FILE "${IN_EXPORT}.cmake"
COMPONENT "${IN_COMPONENT}-${package}")
else()
message(FATAL_ERROR "unrecognized destination ${IN_DESTINATION}")
set(export_args "")
if (IN_EXPORT)
set(export_args EXPORT "${IN_EXPORT}-${package}")
endif()
if(NOT ${install_path} STREQUAL "")
install(
${args}
${export_args}
DESTINATION "${install_path}${IN_DESTINATION_SUFFIX}"
COMPONENT "${IN_COMPONENT}-${package}")
endif()
endif()
endforeach()
endif()
endfunction()

View File

@ -0,0 +1,9 @@
libdir=/@LIB_DIR@
includedir=/@INCLUDE_DIR@
Name: foundationdb-client
Description: FoundationDB c client
Version: @PROJECT_VERSION@
Libs: -L${libdir} -lfdb_c
Cflags: -I${includedir}

View File

@ -40,12 +40,14 @@
.. |retry-limit-transaction-option| replace:: FIXME
.. |timeout-transaction-option| replace:: FIXME
.. |max-retry-delay-transaction-option| replace:: FIXME
.. |size-limit-transaction-option| replace:: FIXME
.. |snapshot-ryw-enable-transaction-option| replace:: FIXME
.. |snapshot-ryw-disable-transaction-option| replace:: FIXME
.. |snapshot-ryw-enable-database-option| replace:: FIXME
.. |snapshot-ryw-disable-database-option| replace:: FIXME
.. |retry-limit-database-option| replace:: FIXME
.. |max-retry-delay-database-option| replace:: FIXME
.. |transaction-size-limit-database-option| replace:: FIXME
.. |timeout-database-option| replace:: FIXME
.. include:: api-common.rst.inc

View File

@ -306,6 +306,10 @@
Set the default maximum number of retries for each transaction after which additional calls to |on-error-func| will throw the most recently seen error code. This is equivalent to calling |retry-limit-transaction-option| on each transaction created by this database.
.. |option-db-tr-size-limit-blurb| replace::
Set the default maximum transaction size in bytes. This is equivalent to calling |transaction-size-limit-database-option| on each transaction created by this database.
.. |option-db-tr-timeout-blurb| replace::
Set the default timeout duration in milliseconds after which all transactions created by this database will automatically be cancelled. This is equivalent to calling |timeout-transaction-option| on each transaction created by this database. This option can only be called if the API version is at least 610.
@ -393,6 +397,10 @@
Set the maximum backoff delay incurred in the call to |on-error-func| if the error is retryable. Prior to API version 610, like all other transaction options, the maximum retry delay must be reset after a call to |on-error-func|. If the API version is 610 or newer, then the maximum retry delay is not reset. Note that at all API versions, it is safe and legal to call this option after each call to |on-error-func|, so most cade written assuming the older behavior can be upgraded without requiring any modification. This also means there is no need to introduce logic to conditionally set this option within retry loops. One can set the default retry limit for all transactions by calling |max-retry-delay-database-option|.
.. |option-set-size-limit-blurb| replace::
Set the maximum transaction size limit in bytes. The size is calculated by combining the sizes of all keys and values written or mutated, all key ranges cleared, and all read and write conflict ranges. (In other words, it includes the total size of all data included in the request to the cluster to commit the transaction.) Large transactions can cause performance problems on FoundationDB clusters, so setting this limit to a smaller value than the default can help prevent the client from accidentally degrading the cluster's performance. This value must be at least 32 and cannot be set to higher than 10,000,000, the default transaction size limit. The value set by this limit will persist across transaction resets.
.. |option-set-timeout-blurb1| replace::
Set a timeout duration in milliseconds after which the transaction automatically to be cancelled. The time is measured from transaction creation (or the most call to |reset-func-name|, if any). Valid parameter values are [0, INT_MAX]. If set to 0, all timeouts will be disabled. Once a transaction has timed out, all pending or future uses of the transaction will |error-raise-type| a :ref:`transaction_timed_out <developer-guide-error-codes>` |error-type|. The transaction can be used again after it is |reset-func-name|.

View File

@ -24,6 +24,7 @@
.. |retry-limit-database-option| replace:: :func:`Database.options.set_transaction_retry_limit`
.. |timeout-database-option| replace:: :func:`Database.options.set_transaction_timeout`
.. |max-retry-delay-database-option| replace:: :func:`Database.options.set_transaction_max_retry_delay`
.. |transaction-size-limit-database-option| replace:: :func:`Database.options.set_transaction_size_limit`
.. |snapshot-ryw-enable-database-option| replace:: :func:`Database.options.set_snapshot_ryw_enable`
.. |snapshot-ryw-disable-database-option| replace:: :func:`Database.options.set_snapshot_ryw_disable`
.. |future-type-string| replace:: a :ref:`future <api-python-future>`
@ -31,6 +32,7 @@
.. |retry-limit-transaction-option| replace:: :func:`Transaction.options.set_retry_limit`
.. |timeout-transaction-option| replace:: :func:`Transaction.options.set_timeout`
.. |max-retry-delay-transaction-option| replace:: :func:`Transaction.options.set_max_retry_delay`
.. |size-limit-transaction-option| replace:: :func:`Transaction.options.set_size_limit`
.. |snapshot-ryw-enable-transaction-option| replace:: :func:`Transaction.options.set_snapshot_ryw_enable`
.. |snapshot-ryw-disable-transaction-option| replace:: :func:`Transaction.options.set_snapshot_ryw_disable`
.. |lazy-iterator-object| replace:: generator
@ -378,6 +380,10 @@ Database options
|option-db-tr-max-retry-delay-blurb|
.. method:: Database.options.set_transaction_size_limit(size_limit)
|option-db-tr-size-limit-blurb|
.. method:: Database.options.set_snapshot_ryw_enable()
|option-db-snapshot-ryw-enable-blurb|
@ -835,6 +841,10 @@ Transaction options
|option-set-max-retry-delay-blurb|
.. method:: Transaction.options.set_size_limit
|option-set-size-limit-blurb|
.. _api-python-timeout:
.. method:: Transaction.options.set_timeout

View File

@ -22,6 +22,7 @@
.. |retry-limit-database-option| replace:: :meth:`Database.options.set_transaction_retry_limit`
.. |timeout-database-option| replace:: :meth:`Database.options.set_transaction_timeout`
.. |max-retry-delay-database-option| replace:: :meth:`Database.options.set_transaction_max_retry_delay`
.. |transaction-size-limit-database-option| replace:: :func:`Database.options.set_transaction_size_limit`
.. |snapshot-ryw-enable-database-option| replace:: :meth:`Database.options.set_snapshot_ryw_enable`
.. |snapshot-ryw-disable-database-option| replace:: :meth:`Database.options.set_snapshot_ryw_disable`
.. |future-type-string| replace:: a :class:`Future`
@ -29,6 +30,7 @@
.. |retry-limit-transaction-option| replace:: :meth:`Transaction.options.set_retry_limit`
.. |timeout-transaction-option| replace:: :meth:`Transaction.options.set_timeout`
.. |max-retry-delay-transaction-option| replace:: :meth:`Transaction.options.set_max_retry_delay`
.. |size-limit-transaction-option| replace:: :meth:`Transaction.options.set_size_limit`
.. |snapshot-ryw-enable-transaction-option| replace:: :meth:`Transaction.options.set_snapshot_ryw_enable`
.. |snapshot-ryw-disable-transaction-option| replace:: :meth:`Transaction.options.set_snapshot_ryw_disable`
.. |lazy-iterator-object| replace:: :class:`Enumerator`
@ -374,6 +376,10 @@ Database options
|option-db-tr-max-retry-delay-blurb|
.. method:: Database.options.set_transaction_size_limit(size_limit) -> nil
|option-db-tr-size-limit-blurb|
.. method:: Database.options.set_snapshot_ryw_enable() -> nil
|option-db-snapshot-ryw-enable-blurb|
@ -779,6 +785,10 @@ Transaction options
|option-set-max-retry-delay-blurb|
.. method:: Transaction.options.set_size_limit() -> nil
|option-set-size-limit-blurb|
.. method:: Transaction.options.set_timeout() -> nil
|option-set-timeout-blurb1|

View File

@ -1,3 +1,6 @@
.. default-domain:: cpp
.. highlight:: cpp
###############
Client Testing
###############
@ -49,24 +52,112 @@ that gets called by server processes running the ``tester`` role. Additionally,
simulates a full fdb cluster with several machines and different configurations in one process. This simulator can run the same
workloads you can run on a real cluster. It will also inject random failures like network partitions and disk failures.
Currently, workloads can only be implemented in Java, support for other languages might come later.
This tutorial explains how one can implement a workload, how one can orchestrate a workload on a cluster with multiple clients, and
how one can run a workload within a simulator. Running in a simulator is also useful as it does not require any setup: you can simply
run one command that will provide you with a fully functional FoundationDB cluster.
Preparing the fdbserver Binary
==============================
General Overview
================
In order to run a Java workload, ``fdbserver`` needs to be able to embed a JVM. Because of that it needs to be linked against JNI.
The official FDB binaries do not link against JNI and therefore one can't use that to run a Java workload. Instead you need to
download the sources and build them. Make sure that ``cmake`` can find Java and pass ``-DWITH_JAVA_WORKLOAD=ON`` to cmake.
Workloads in FoundationDB are generally compiled into the binary. However, FoundationDB also provides the ability to load workloads
dynamically. This is done through ``dlopen`` (on Unix like operating systems) or ``LoadLibrary`` (on Windows).
After FoundationDB was built, you can use ``bin/fdbserver`` to run the server. The jar file containing the client library can be
found in ``packages/fdb-VERSION.jar``. Both of these are in the build directory.
Parallelism and Determinism
===========================
Implementing a Workload
=======================
A workload can run either in a simulation or on a real cluster. In simulation, ``fdbserver`` will simulate a whole cluster and will
use a deterministic random number generator to simulate random behavior and random failures. This random number generator is initialized
with a random seed. In case of a test failure, the user can reuse the given seed and rerun the same test in order to further observe
and debug the behavior.
However, this will only work as long as the workload doesn't introduce any non-deterministic behavior. One example of non-deterministic
behavior is the running multiple threads.
The workload is created in the main network thread and it will run in the main network thread. Because of this, using any blocking
function (for example ``blockUntilReady`` on a future object) will result in a deadlock. Using the callback API is therefore required
if one wants to keep the simulator's deterministic behavior.
For existing applications and layers, however, not using the blocking API might not be an option. For these use-cases, a user can chose
to start new threads and use the blocking API from within these threads. This will mean that test failures will be non-deterministic and
might be hard to reproduce.
To start a new thread, one has to "bind" operating system threads to their simulated processes. This can be done by setting the
``ProcessId`` in the child threads when they get created. In Java this is done by only starting new threads through the provided
``Executor``. In the C++ API one can use the ``FDBWorkloadContext`` to do that. For example:
.. code-block:: C++
template<class Fun>
std::thread startThread(FDBWorkloadContext* context, Fun fun) {
auto processId = context->getProcessID();
return std::thread([context, processID, fun](
context->setProcessID(processID);
fun();
));
}
Finding the Shared Object
=========================
When the test starts, ``fdbserver`` needs to find the shared object to load. The name of this shared object has to be provided.
For Java, we provide an implementation in ``libjava_workloads.so`` which can be built out of the sources. The tester will look
for the key ``libraryName`` in the test file which should be the name of the library without extension and without the ``lib``
prefix (so ``java_workloads`` if you want to write a Java workload).
By default, the process will look for the library in the directory ``../shared/foundationdb/`` relative to the location of the
``fdbserver`` binary. If the library is somewhere else on the system, one can provide the absolute path to the library (only
the folder, not the file name) in the test file with the ``libraryPath`` option.
Implementing a C++ Workload
===========================
In order to implement a workload, one has to build a shared library that links against the fdb client library. This library has to
exppse a function (with C linkage) called workloadFactory which needs to return a pointer to an object of type ``FDBWorkloadFactory``.
This mechanism allows the other to implement as many workloads within one library as she wants. To do this the pure virtual classes
``FDBWorkloadFactory`` and ``FDBWorkload`` have to be implemented.
.. function:: FDBWorkloadFactory* workloadFactory(FDBLogger*)
This function has to be defined within the shared library and will be called by ``fdbserver`` for looking up a specific workload.
``FDBLogger`` will be passed and is guaranteed to survive for the lifetime of the process. This class can be used to write to the
FoundationDB traces. Logging anything with severity ``FDBSeverity::Error`` will result in a hard test failure. This function needs
to have c-linkage, so define it in a ``extern "C"`` block.
.. function:: std::shared_ptr<FDBWorkload> FDBWorkload::create(const std::string& name)
This is the only method to be implemented in ``FDBWorkloadFactory``. If the test file contains a key-value pair ``workloadName``
the value will be passed to this method (empty string otherwise). This way, a library author can implement many workloads in one
library and use the test file to chose which one to run (or run multiple workloads either concurrently or serially).
.. function:: std::string FDBWorkload::description() const
This method has to return the name of the workload. This can be a static name and is primarily used for tracing.
.. function:: bool FDBWorkload::init(FDBWorkloadContext* context)
Right after initialization
.. function:: void FDBWorkload::setup(FDBDatabase* db, GenericPromise<bool> done)
This method will be called by the tester during the setup phase. It should be used to populate the database.
.. function:: void FDBWorkload::start(FDBDatabase* db, GenericPromise<bool> done)
This method should run the actual test.
.. function:: void FDBWorkload::check(FDBDatabase* db, GenericPromise<bool> done)
When the tester completes, this method will be called. A workload should run any consistency/correctness tests
during this phase.
.. function:: void FDBWorkload::getMetrics(std::vector<FDBPerfMetric>& out) const
If a workload collects metrics (like latencies or throughput numbers), these should be reported back here.
The multitester (or test orchestrator) will collect all metrics from all test clients and it will aggregate them.
Implementing a Java Workload
============================
In order to implement your own workload in Java you can simply create an implementation of the abstract class ``AbstractWorkload``.
A minimal implementation will look like this:
@ -74,6 +165,7 @@ A minimal implementation will look like this:
.. code-block:: java
package my.package;
import com.apple.foundationdb.testing.Promise;
import com.apple.foundationdb.testing.AbstractWorkload;
import com.apple.foundationdb.testing.WorkloadContext;
@ -83,19 +175,21 @@ A minimal implementation will look like this:
}
@Override
public void setup(Database db) {
public void setup(Database db, Promise promise) {
log(20, "WorkloadSetup", null);
promise.send(true);
}
@Override
public void start(Database db) {
log(20, "WorkloadStarted", null);
promise.send(true);
}
@Override
public boolean check(Database db) {
log(20, "WorkloadFailureCheck", null);
return true;
promise.send(true);
}
}
@ -165,9 +259,9 @@ A test file might look like this:
.. code-block:: none
testTitle=MyTest
testName=JavaWorkload
workloadClass=my.package.MinimalWorkload
jvmOptions=-Djava.class.path=*PATH_TO_FDB_CLIENT_JAR*,*other options you want to pass to the JVM*
testName=External
libraryName=java_workloads
workloadName=my.package.MinimalWorkload
classPath=PATH_TO_JAR_OR_DIR_CONTAINING_WORKLOAD,OTHER_DEPENDENCIES
testName=Attrition
@ -176,15 +270,15 @@ A test file might look like this:
machinesToKill=3
testTitle=AnotherTest
workloadClass=my.package.MinimalWorkload
workloadClass=my.package.MinimalWorkload
jvmOptions=-Djava.class.path=*PATH_TO_FDB_CLIENT_JAR*,*other options you want to pass to the JVM*
testName=External
libraryName=java_workloads
workloadName=my.package.MinimalWorkload
classPath=PATH_TO_JAR_OR_DIR_CONTAINING_WORKLOAD,OTHER_DEPENDENCIES
someOpion=foo
someOption=foo
workloadClass=my.package.AnotherWorkload
workloadClass=my.package.AnotherWorkload
jvmOptions=-Djava.class.path=*PATH_TO_FDB_CLIENT_JAR*,*other options you want to pass to the JVM*
testName=External
libraryName=java_workloads
workloadName=my.package.AnotherWorkload
classPath=PATH_TO_JAR_OR_DIR_CONTAINING_WORKLOAD,OTHER_DEPENDENCIES
anotherOption=foo

View File

@ -42,12 +42,14 @@
.. |retry-limit-transaction-option| replace:: FIXME
.. |timeout-transaction-option| replace:: FIXME
.. |max-retry-delay-transaction-option| replace:: FIXME
.. |size-limit-transaction-option| replace:: FIXME
.. |snapshot-ryw-enable-transaction-option| replace:: FIXME
.. |snapshot-ryw-disable-transaction-option| replace:: FIXME
.. |snapshot-ryw-enable-database-option| replace:: FIXME
.. |snapshot-ryw-disable-database-option| replace:: FIXME
.. |retry-limit-database-option| replace:: FIXME
.. |max-retry-delay-database-option| replace:: FIXME
.. |transaction-size-limit-database-option| replace:: FIXME
.. |timeout-database-option| replace:: FIXME
.. include:: api-common.rst.inc

View File

@ -42,12 +42,14 @@
.. |retry-limit-transaction-option| replace:: FIXME
.. |timeout-transaction-option| replace:: FIXME
.. |max-retry-delay-transaction-option| replace:: FIXME
.. |size-limit-transaction-option| replace:: FIXME
.. |snapshot-ryw-enable-transaction-option| replace:: FIXME
.. |snapshot-ryw-disable-transaction-option| replace:: FIXME
.. |snapshot-ryw-enable-database-option| replace:: FIXME
.. |snapshot-ryw-disable-database-option| replace:: FIXME
.. |retry-limit-database-option| replace:: FIXME
.. |max-retry-delay-database-option| replace:: FIXME
.. |transaction-size-limit-database-option| replace:: FIXME
.. |timeout-database-option| replace:: FIXME
.. include:: api-common.rst.inc

View File

@ -14,6 +14,8 @@ Performance
Fixes
-----
* If a cluster is upgraded during an ``onError`` call, the cluster could return a ``cluster_version_changed`` error. `(PR #1734) <https://github.com/apple/foundationdb/pull/1734>`_.
Status
------

View File

@ -779,7 +779,6 @@ const KeyRef exeRestore = LiteralStringRef("fdbrestore");
const KeyRef exeDatabaseAgent = LiteralStringRef("dr_agent");
const KeyRef exeDatabaseBackup = LiteralStringRef("fdbdr");
extern void flushTraceFileVoid();
extern const char* getHGVersion();
#ifdef _WIN32

View File

@ -59,10 +59,22 @@ extern const char* getHGVersion();
std::vector<std::string> validOptions;
enum { OPT_CONNFILE, OPT_DATABASE, OPT_HELP, OPT_TRACE, OPT_TRACE_DIR, OPT_TIMEOUT, OPT_EXEC, OPT_NO_STATUS, OPT_STATUS_FROM_JSON, OPT_VERSION, OPT_TRACE_FORMAT };
enum {
OPT_CONNFILE,
OPT_DATABASE,
OPT_HELP,
OPT_TRACE,
OPT_TRACE_DIR,
OPT_TIMEOUT,
OPT_EXEC,
OPT_NO_STATUS,
OPT_STATUS_FROM_JSON,
OPT_VERSION,
OPT_TRACE_FORMAT,
OPT_USE_OBJECT_SERIALIZER
};
CSimpleOpt::SOption g_rgOptions[] = {
{ OPT_CONNFILE, "-C", SO_REQ_SEP },
CSimpleOpt::SOption g_rgOptions[] = { { OPT_CONNFILE, "-C", SO_REQ_SEP },
{ OPT_CONNFILE, "--cluster_file", SO_REQ_SEP },
{ OPT_DATABASE, "-d", SO_REQ_SEP },
{ OPT_TRACE, "--log", SO_NONE },
@ -77,13 +89,14 @@ CSimpleOpt::SOption g_rgOptions[] = {
{ OPT_VERSION, "--version", SO_NONE },
{ OPT_VERSION, "-v", SO_NONE },
{ OPT_TRACE_FORMAT, "--trace_format", SO_REQ_SEP },
{ OPT_USE_OBJECT_SERIALIZER, "-S", SO_REQ_SEP },
{ OPT_USE_OBJECT_SERIALIZER, "--object-serializer", SO_REQ_SEP },
#ifndef TLS_DISABLED
TLS_OPTION_FLAGS
#endif
SO_END_OF_OPTIONS
};
SO_END_OF_OPTIONS };
void printAtCol(const char* text, int col) {
const char* iter = text;
@ -407,6 +420,10 @@ static void printProgramUsage(const char* name) {
" --trace_format FORMAT\n"
" Select the format of the log files. xml (the default) and json\n"
" are supported. Has no effect unless --log is specified.\n"
" -S ON|OFF, --object-serializer ON|OFF\n"
" Use object serializer for sending messages. The object serializer\n"
" is currently a beta feature and it allows fdb processes to talk to\n"
" each other even if they don't have the same version\n"
" --exec CMDS Immediately executes the semicolon separated CLI commands\n"
" and then exits.\n"
" --no-status Disables the initial status check done when starting\n"
@ -2332,6 +2349,7 @@ struct CLIOptions {
bool trace;
std::string traceDir;
std::string traceFormat;
bool useObjectSerializer = false;
int exit_timeout;
Optional<std::string> exec;
bool initialStatusCheck;
@ -2433,6 +2451,20 @@ struct CLIOptions {
}
traceFormat = args.OptionArg();
break;
case OPT_USE_OBJECT_SERIALIZER: {
std::string s = args.OptionArg();
std::transform(s.begin(), s.end(), s.begin(), ::tolower);
if (s == "on" || s == "true" || s == "1") {
useObjectSerializer = true;
} else if (s == "off" || s == "false" || s == "0") {
useObjectSerializer = false;
} else {
fprintf(stderr, "ERROR: Could not parse object serializer option: `%s'\n", s.c_str());
printProgramUsage(program_name.c_str());
flushAndExit(FDB_EXIT_ERROR);
}
break;
}
case OPT_VERSION:
printVersion();
return FDB_EXIT_SUCCESS;
@ -3490,6 +3522,11 @@ int main(int argc, char **argv) {
}
setNetworkOption(FDBNetworkOptions::ENABLE_SLOW_TASK_PROFILING);
}
// The USE_OBJECT_SERIALIZER network option expects an 8 byte little endian integer which is interpreted as zero =
// false, non-zero = true.
setNetworkOption(FDBNetworkOptions::USE_OBJECT_SERIALIZER,
opt.useObjectSerializer ? LiteralStringRef("\x01\x00\x00\x00\x00\x00\x00\x00")
: LiteralStringRef("\x00\x00\x00\x00\x00\x00\x00\x00"));
initHelp();

View File

@ -419,7 +419,7 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RangeResultWithVersi
//add lock
releaser.release();
wait(lock->take(TaskDefaultYield, limits.bytes + CLIENT_KNOBS->VALUE_SIZE_LIMIT + CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT));
wait(lock->take(TaskPriority::DefaultYield, limits.bytes + CLIENT_KNOBS->VALUE_SIZE_LIMIT + CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT));
releaser = FlowLock::Releaser(*lock, limits.bytes + CLIENT_KNOBS->VALUE_SIZE_LIMIT + CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT);
state Standalone<RangeResultRef> values = wait(tr.getRange(begin, end, limits));
@ -495,7 +495,7 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RCGroup> results, Fu
//add lock
wait(active);
releaser.release();
wait(lock->take(TaskDefaultYield, rangevalue.expectedSize() + rcGroup.items.expectedSize()));
wait(lock->take(TaskPriority::DefaultYield, rangevalue.expectedSize() + rcGroup.items.expectedSize()));
releaser = FlowLock::Releaser(*lock, rangevalue.expectedSize() + rcGroup.items.expectedSize());
for (auto & s : rangevalue){
@ -613,7 +613,7 @@ ACTOR Future<int> dumpData(Database cx, PromiseStream<RCGroup> results, Referenc
req.flags = req.flags | CommitTransactionRequest::FLAG_IS_LOCK_AWARE;
totalBytes += mutationSize;
wait( commitLock->take(TaskDefaultYield, mutationSize) );
wait( commitLock->take(TaskPriority::DefaultYield, mutationSize) );
addActor.send( commitLock->releaseWhen( success(commit.getReply(req)), mutationSize ) );
if(endOfStream) {
@ -653,7 +653,7 @@ ACTOR Future<Void> coalesceKeyVersionCache(Key uid, Version endVersion, Referenc
req.transaction.read_snapshot = committedVersion->get();
req.flags = req.flags | CommitTransactionRequest::FLAG_IS_LOCK_AWARE;
wait( commitLock->take(TaskDefaultYield, mutationSize) );
wait( commitLock->take(TaskPriority::DefaultYield, mutationSize) );
addActor.send( commitLock->releaseWhen( success(commit.getReply(req)), mutationSize ) );
}
@ -671,7 +671,7 @@ ACTOR Future<Void> applyMutations(Database cx, Key uid, Key addPrefix, Key remov
try {
loop {
if(beginVersion >= *endVersion) {
wait( commitLock.take(TaskDefaultYield, CLIENT_KNOBS->BACKUP_LOCK_BYTES) );
wait( commitLock.take(TaskPriority::DefaultYield, CLIENT_KNOBS->BACKUP_LOCK_BYTES) );
commitLock.release(CLIENT_KNOBS->BACKUP_LOCK_BYTES);
if(beginVersion >= *endVersion) {
return Void();

View File

@ -49,12 +49,14 @@ struct RebootRequest {
constexpr static FileIdentifier file_identifier = 11913957;
bool deleteData;
bool checkData;
uint32_t waitForDuration;
explicit RebootRequest(bool deleteData = false, bool checkData = false) : deleteData(deleteData), checkData(checkData) {}
explicit RebootRequest(bool deleteData = false, bool checkData = false, uint32_t waitForDuration = 0)
: deleteData(deleteData), checkData(checkData), waitForDuration(waitForDuration) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, deleteData, checkData);
serializer(ar, deleteData, checkData, waitForDuration);
}
};

View File

@ -52,12 +52,12 @@ struct ClusterInterface {
}
void initEndpoints() {
openDatabase.getEndpoint( TaskClusterController );
failureMonitoring.getEndpoint( TaskFailureMonitor );
databaseStatus.getEndpoint( TaskClusterController );
ping.getEndpoint( TaskClusterController );
getClientWorkers.getEndpoint( TaskClusterController );
forceRecovery.getEndpoint( TaskClusterController );
openDatabase.getEndpoint( TaskPriority::ClusterController );
failureMonitoring.getEndpoint( TaskPriority::FailureMonitor );
databaseStatus.getEndpoint( TaskPriority::ClusterController );
ping.getEndpoint( TaskPriority::ClusterController );
getClientWorkers.getEndpoint( TaskPriority::ClusterController );
forceRecovery.getEndpoint( TaskPriority::ClusterController );
}
template <class Ar>

View File

@ -584,7 +584,7 @@ namespace dbBackup {
loop{
try {
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
tr.options.customTransactionSizeLimit = 2 * CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT;
tr.options.sizeLimit = 2 * CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT;
wait(checkDatabaseLock(&tr, BinaryReader::fromStringRef<UID>(task->params[BackupAgentBase::keyConfigLogUid], Unversioned())));
state int64_t bytesSet = 0;
@ -1080,7 +1080,7 @@ namespace dbBackup {
loop{
try {
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
tr.options.customTransactionSizeLimit = 2 * CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT;
tr.options.sizeLimit = 2 * CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT;
wait(checkDatabaseLock(&tr, BinaryReader::fromStringRef<UID>(task->params[BackupAgentBase::keyConfigLogUid], Unversioned())));
state int64_t bytesSet = 0;

View File

@ -54,7 +54,7 @@ public:
// For internal (fdbserver) use only
static Database create( Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface, Reference<ClusterConnectionFile> connFile, LocalityData const& clientLocality );
static Database create( Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, int taskID=TaskDefaultEndpoint, bool lockAware=false, int apiVersion=Database::API_VERSION_LATEST );
static Database create( Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, TaskPriority taskID=TaskPriority::DefaultEndpoint, bool lockAware=false, int apiVersion=Database::API_VERSION_LATEST );
~DatabaseContext();
@ -97,7 +97,7 @@ public:
//private:
explicit DatabaseContext( Reference<Cluster> cluster, Reference<AsyncVar<ClientDBInfo>> clientDBInfo,
Future<Void> clientInfoMonitor, Standalone<StringRef> dbId, int taskID, LocalityData const& clientLocality,
Future<Void> clientInfoMonitor, Standalone<StringRef> dbId, TaskPriority taskID, LocalityData const& clientLocality,
bool enableLocalityLoadBalance, bool lockAware, int apiVersion = Database::API_VERSION_LATEST );
explicit DatabaseContext( const Error &err );
@ -157,11 +157,12 @@ public:
double transactionTimeout;
int transactionMaxRetries;
double transactionMaxBackoff;
int transactionMaxSize; // Max size in bytes.
int snapshotRywEnabled;
Future<Void> logger;
int taskID;
TaskPriority taskID;
Int64MetricHandle getValueSubmitted;
EventMetricHandle<GetValueComplete> getValueCompleted;

View File

@ -605,7 +605,7 @@ struct TLogVersion {
MIN_SUPPORTED = V2,
MAX_SUPPORTED = V3,
MIN_RECRUITABLE = V2,
DEFAULT = V2,
DEFAULT = V3,
} version;
TLogVersion() : version(UNSET) {}
@ -640,7 +640,7 @@ struct TLogSpillType {
// These enumerated values are stored in the database configuration, so can NEVER be changed. Only add new ones just before END.
enum SpillType {
UNSET = 0,
DEFAULT = 1,
DEFAULT = 2,
VALUE = 1,
REFERENCE = 2,
END = 3,

View File

@ -41,7 +41,7 @@ ACTOR Future<Void> failureMonitorClientLoop(
{
state Version version = 0;
state Future<FailureMonitoringReply> request = Never();
state Future<Void> nextRequest = delay(0, TaskFailureMonitor);
state Future<Void> nextRequest = delay(0, TaskPriority::FailureMonitor);
state Future<Void> requestTimeout = Never();
state double before = now();
state double waitfor = 0;
@ -61,7 +61,7 @@ ACTOR Future<Void> failureMonitorClientLoop(
loop {
choose {
when( FailureMonitoringReply reply = wait( request ) ) {
g_network->setCurrentTask(TaskDefaultDelay);
g_network->setCurrentTask(TaskPriority::DefaultDelay);
request = Never();
requestTimeout = Never();
if (reply.allOthersFailed) {
@ -122,10 +122,10 @@ ACTOR Future<Void> failureMonitorClientLoop(
}
before = now();
waitfor = reply.clientRequestIntervalMS * .001;
nextRequest = delayJittered( waitfor, TaskFailureMonitor );
nextRequest = delayJittered( waitfor, TaskPriority::FailureMonitor );
}
when( wait( requestTimeout ) ) {
g_network->setCurrentTask(TaskDefaultDelay);
g_network->setCurrentTask(TaskPriority::DefaultDelay);
requestTimeout = Never();
TraceEvent(SevWarn, "FailureMonitoringServerDown").detail("OldServerID",controller.id());
monitor->setStatus(controlAddr.address, FailureStatus(true));
@ -136,7 +136,7 @@ ACTOR Future<Void> failureMonitorClientLoop(
}
}
when( wait( nextRequest ) ) {
g_network->setCurrentTask(TaskDefaultDelay);
g_network->setCurrentTask(TaskPriority::DefaultDelay);
nextRequest = Never();
double elapsed = now() - before;
@ -152,9 +152,9 @@ ACTOR Future<Void> failureMonitorClientLoop(
req.addresses = g_network->getLocalAddresses();
if (trackMyStatus)
req.senderStatus = FailureStatus(false);
request = controller.failureMonitoring.getReply( req, TaskFailureMonitor );
request = controller.failureMonitoring.getReply( req, TaskPriority::FailureMonitor );
if(!controller.failureMonitoring.getEndpoint().isLocal())
requestTimeout = delay( fmState->serverFailedTimeout, TaskFailureMonitor );
requestTimeout = delay( fmState->serverFailedTimeout, TaskPriority::FailureMonitor );
}
}
}

View File

@ -93,7 +93,7 @@ namespace HTTP {
loop {
// Wait for connection to have something to read
wait(conn->onReadable());
wait( delay( 0, TaskReadSocket ) );
wait( delay( 0, TaskPriority::ReadSocket ) );
// Read into buffer
int originalSize = buf->size();
@ -353,7 +353,7 @@ namespace HTTP {
loop {
wait(conn->onWritable());
wait( delay( 0, TaskWriteSocket ) );
wait( delay( 0, TaskPriority::WriteSocket ) );
// If we already got a response, before finishing sending the request, then close the connection,
// set the Connection header to "close" as a hint to the caller that this connection can't be used

View File

@ -51,7 +51,7 @@ public:
double RESOURCE_CONSTRAINED_MAX_BACKOFF;
int PROXY_COMMIT_OVERHEAD_BYTES;
int64_t TRANSACTION_SIZE_LIMIT;
int TRANSACTION_SIZE_LIMIT;
int64_t KEY_SIZE_LIMIT;
int64_t SYSTEM_KEY_SIZE_LIMIT;
int64_t VALUE_SIZE_LIMIT;

View File

@ -967,7 +967,7 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
vector<Future<Optional<LeaderInfo>>> leaderServers;
ClientCoordinators coord( Reference<ClusterConnectionFile>( new ClusterConnectionFile( conn ) ) );
for( int i = 0; i < coord.clientLeaderServers.size(); i++ )
leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskCoordinationReply ) );
leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskPriority::CoordinationReply ) );
choose {
when( wait( waitForAll( leaderServers ) ) ) {}
@ -1047,7 +1047,7 @@ struct AutoQuorumChange : IQuorumChange {
ClientCoordinators coord(ccf);
vector<Future<Optional<LeaderInfo>>> leaderServers;
for( int i = 0; i < coord.clientLeaderServers.size(); i++ )
leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskCoordinationReply ) );
leaderServers.push_back( retryBrokenPromise( coord.clientLeaderServers[i].getLeader, GetLeaderRequest( coord.clusterKey, UID() ), TaskPriority::CoordinationReply ) );
Optional<vector<Optional<LeaderInfo>>> results = wait( timeout( getAll(leaderServers), CLIENT_KNOBS->IS_ACCEPTABLE_DELAY ) );
if (!results.present()) return false; // Not all responded
for(auto& r : results.get())

View File

@ -67,10 +67,10 @@ struct MasterProxyInterface {
}
void initEndpoints() {
getConsistentReadVersion.getEndpoint(TaskProxyGetConsistentReadVersion);
getRawCommittedVersion.getEndpoint(TaskProxyGetRawCommittedVersion);
commit.getEndpoint(TaskProxyCommitDispatcher);
getStorageServerRejoinInfo.getEndpoint(TaskProxyStorageRejoin);
getConsistentReadVersion.getEndpoint(TaskPriority::ProxyGetConsistentReadVersion);
getRawCommittedVersion.getEndpoint(TaskPriority::ProxyGetRawCommittedVersion);
commit.getEndpoint(TaskPriority::ProxyCommitDispatcher);
getStorageServerRejoinInfo.getEndpoint(TaskPriority::ProxyStorageRejoin);
//getKeyServersLocations.getEndpoint(TaskProxyGetKeyServersLocations); //do not increase the priority of these requests, because clients cans bring down the cluster with too many of these messages.
}
};

View File

@ -371,7 +371,7 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( NetworkAddress remote )
}
ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) {
getLeader.makeWellKnownEndpoint( WLTOKEN_CLIENTLEADERREG_GETLEADER, TaskCoordination );
getLeader.makeWellKnownEndpoint( WLTOKEN_CLIENTLEADERREG_GETLEADER, TaskPriority::Coordination );
}
// Nominee is the worker among all workers that are considered as leader by a coordinator
@ -380,7 +380,7 @@ ClientLeaderRegInterface::ClientLeaderRegInterface( INetwork* local ) {
ACTOR Future<Void> monitorNominee( Key key, ClientLeaderRegInterface coord, AsyncTrigger* nomineeChange, Optional<LeaderInfo> *info, int generation, Reference<AsyncVar<int>> connectedCoordinatorsNum ) {
state bool hasCounted = false;
loop {
state Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskCoordinationReply ) );
state Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.getLeader, GetLeaderRequest( key, info->present() ? info->get().changeID : UID() ), TaskPriority::CoordinationReply ) );
if (li.present() && !hasCounted && connectedCoordinatorsNum.isValid()) {
connectedCoordinatorsNum->set(connectedCoordinatorsNum->get() + 1);
hasCounted = true;

View File

@ -588,7 +588,20 @@ ThreadFuture<Void> MultiVersionTransaction::onError(Error const& e) {
else {
auto tr = getTransaction();
auto f = tr.transaction ? tr.transaction->onError(e) : ThreadFuture<Void>(Never());
return abortableFuture(f, tr.onChange);
f = abortableFuture(f, tr.onChange);
return flatMapThreadFuture<Void, Void>(f, [this, e](ErrorOr<Void> ready) {
if(!ready.isError() || ready.getError().code() != error_code_cluster_version_changed) {
if(ready.isError()) {
return ErrorOr<ThreadFuture<Void>>(ready.getError());
}
return ErrorOr<ThreadFuture<Void>>(Void());
}
updateTransaction();
return ErrorOr<ThreadFuture<Void>>(onError(e));
});
}
}

View File

@ -18,33 +18,36 @@
* limitations under the License.
*/
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/NativeAPI.actor.h"
#include <iterator>
#include "fdbclient/Atomic.h"
#include "flow/Platform.h"
#include "flow/ActorCollection.h"
#include "fdbclient/ClusterInterface.h"
#include "fdbclient/CoordinationInterface.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/FailureMonitorClient.h"
#include "fdbclient/KeyRangeMap.h"
#include "fdbclient/Knobs.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/MonitorLeader.h"
#include "fdbclient/MutationList.h"
#include "fdbclient/StorageServerInterface.h"
#include "fdbclient/SystemData.h"
#include "fdbrpc/LoadBalance.h"
#include "fdbclient/StorageServerInterface.h"
#include "fdbclient/MasterProxyInterface.h"
#include "fdbclient/ClusterInterface.h"
#include "fdbclient/FailureMonitorClient.h"
#include "fdbrpc/Net2FileSystem.h"
#include "fdbrpc/simulator.h"
#include "fdbrpc/TLSConnection.h"
#include "flow/ActorCollection.h"
#include "flow/DeterministicRandom.h"
#include "fdbclient/KeyRangeMap.h"
#include "flow/Knobs.h"
#include "flow/Platform.h"
#include "flow/SystemMonitor.h"
#include "fdbclient/MutationList.h"
#include "fdbclient/CoordinationInterface.h"
#include "fdbclient/MonitorLeader.h"
#include "flow/UnitTest.h"
#if defined(CMAKE_BUILD) || !defined(WIN32)
#include "versions.h"
#endif
#include "fdbrpc/TLSConnection.h"
#include "flow/Knobs.h"
#include "fdbclient/Knobs.h"
#include "fdbrpc/Net2FileSystem.h"
#include "fdbrpc/simulator.h"
#include <iterator>
#ifdef WIN32
#define WIN32_LEAN_AND_MEAN
@ -58,7 +61,6 @@
extern const char* getHGVersion();
using std::make_pair;
using std::max;
using std::min;
using std::pair;
@ -509,7 +511,7 @@ Future<HealthMetrics> DatabaseContext::getHealthMetrics(bool detailed = false) {
DatabaseContext::DatabaseContext(
Reference<Cluster> cluster, Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, Standalone<StringRef> dbId,
int taskID, LocalityData const& clientLocality, bool enableLocalityLoadBalance, bool lockAware, int apiVersion )
TaskPriority taskID, LocalityData const& clientLocality, bool enableLocalityLoadBalance, bool lockAware, int apiVersion )
: cluster(cluster), clientInfo(clientInfo), clientInfoMonitor(clientInfoMonitor), dbId(dbId), taskID(taskID), clientLocality(clientLocality), enableLocalityLoadBalance(enableLocalityLoadBalance),
lockAware(lockAware), apiVersion(apiVersion), provisional(false),
transactionReadVersions(0), transactionLogicalReads(0), transactionPhysicalReads(0), transactionCommittedMutations(0), transactionCommittedMutationBytes(0),
@ -522,6 +524,7 @@ DatabaseContext::DatabaseContext(
maxOutstandingWatches = CLIENT_KNOBS->DEFAULT_MAX_OUTSTANDING_WATCHES;
transactionMaxBackoff = CLIENT_KNOBS->FAILURE_MAX_DELAY;
transactionMaxSize = CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT;
snapshotRywEnabled = apiVersionAtLeast(300) ? 1 : 0;
logger = databaseLogger( this );
@ -629,10 +632,10 @@ Database DatabaseContext::create(Reference<AsyncVar<Optional<ClusterInterface>>>
Reference<AsyncVar<ClientDBInfo>> clientInfo(new AsyncVar<ClientDBInfo>());
Future<Void> clientInfoMonitor = delayedAsyncVar(connectedCoordinatorsNum, connectedCoordinatorsNumDelayed, CLIENT_KNOBS->CHECK_CONNECTED_COORDINATOR_NUM_DELAY) || monitorClientInfo(clusterInterface, connFile, clientInfo, connectedCoordinatorsNumDelayed);
return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false));
return Database(new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskPriority::DefaultEndpoint, clientLocality, true, false));
}
Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, int taskID, bool lockAware, int apiVersion) {
Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo, Future<Void> clientInfoMonitor, LocalityData clientLocality, bool enableLocalityLoadBalance, TaskPriority taskID, bool lockAware, int apiVersion) {
return Database( new DatabaseContext( Reference<Cluster>(nullptr), clientInfo, clientInfoMonitor, LiteralStringRef(""), taskID, clientLocality, enableLocalityLoadBalance, lockAware, apiVersion ) );
}
@ -668,12 +671,10 @@ bool DatabaseContext::getCachedLocations( const KeyRangeRef& range, vector<std::
result.clear();
return false;
}
result.push_back( make_pair(r->range() & range, r->value()) );
if(result.size() == limit)
break;
if(begin == end)
result.emplace_back(r->range() & range, r->value());
if (result.size() == limit || begin == end) {
break;
}
if(reverse)
--end;
@ -778,6 +779,10 @@ void DatabaseContext::setOption( FDBDatabaseOptions::Option option, Optional<Str
validateOptionValue(value, true);
transactionMaxBackoff = extractIntOption(value, 0, std::numeric_limits<int32_t>::max()) / 1000.0;
break;
case FDBDatabaseOptions::TRANSACTION_SIZE_LIMIT:
validateOptionValue(value, true);
transactionMaxSize = extractIntOption(value, 32, CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT);
break;
case FDBDatabaseOptions::SNAPSHOT_RYW_ENABLE:
validateOptionValue(value, false);
snapshotRywEnabled++;
@ -820,10 +825,10 @@ Database Database::createDatabase( Reference<ClusterConnectionFile> connFile, in
DatabaseContext *db;
if(preallocatedDb) {
db = new (preallocatedDb) DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion);
db = new (preallocatedDb) DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskPriority::DefaultEndpoint, clientLocality, true, false, apiVersion);
}
else {
db = new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskDefaultEndpoint, clientLocality, true, false, apiVersion);
db = new DatabaseContext(cluster, clientInfo, clientInfoMonitor, LiteralStringRef(""), TaskPriority::DefaultEndpoint, clientLocality, true, false, apiVersion);
}
return Database(db);
@ -879,7 +884,7 @@ void Cluster::init( Reference<ClusterConnectionFile> connFile, bool startClientI
initializeSystemMonitorMachineState(SystemMonitorMachineState(IPAddress(publicIP)));
systemMonitor();
uncancellable( recurring( &systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskFlushTrace ) );
uncancellable( recurring( &systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskPriority::FlushTrace ) );
}
failMon = failureMonitorClient( clusterInterface, false );
@ -1053,7 +1058,7 @@ void setupNetwork(uint64_t transportId, bool useMetrics) {
networkOptions.logClientInfo = true;
g_network = newNet2(false, useMetrics || networkOptions.traceDirectory.present(), networkOptions.useObjectSerializer);
FlowTransport::createInstance(transportId);
FlowTransport::createInstance(true, transportId);
Net2FileSystem::newFileSystem();
initTLSOptions();
@ -1235,7 +1240,7 @@ ACTOR Future< pair<KeyRange,Reference<LocationInfo>> > getKeyLocation_internal(
loop {
choose {
when ( wait( cx->onMasterProxiesChanged() ) ) {}
when ( GetKeyServerLocationsReply rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(key, Optional<KeyRef>(), 100, isBackward, key.arena()), TaskDefaultPromiseEndpoint ) ) ) {
when ( GetKeyServerLocationsReply rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(key, Optional<KeyRef>(), 100, isBackward, key.arena()), TaskPriority::DefaultPromiseEndpoint ) ) ) {
if( info.debugID.present() )
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocation.After");
ASSERT( rep.results.size() == 1 );
@ -1272,7 +1277,7 @@ ACTOR Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLoca
loop {
choose {
when ( wait( cx->onMasterProxiesChanged() ) ) {}
when ( GetKeyServerLocationsReply _rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(keys.begin, keys.end, limit, reverse, keys.arena()), TaskDefaultPromiseEndpoint ) ) ) {
when ( GetKeyServerLocationsReply _rep = wait( loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::getKeyServersLocations, GetKeyServerLocationsRequest(keys.begin, keys.end, limit, reverse, keys.arena()), TaskPriority::DefaultPromiseEndpoint ) ) ) {
state GetKeyServerLocationsReply rep = _rep;
if( info.debugID.present() )
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocations.After");
@ -1282,7 +1287,7 @@ ACTOR Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLoca
state int shard = 0;
for (; shard < rep.results.size(); shard++) {
//FIXME: these shards are being inserted into the map sequentially, it would be much more CPU efficient to save the map pairs and insert them all at once.
results.push_back( make_pair(rep.results[shard].first & keys, cx->setCachedLocation(rep.results[shard].first, rep.results[shard].second)) );
results.emplace_back(rep.results[shard].first & keys, cx->setCachedLocation(rep.results[shard].first, rep.results[shard].second));
wait(yield());
}
@ -1393,7 +1398,7 @@ ACTOR Future<Optional<Value>> getValue( Future<Version> version, Key key, Databa
}
state GetValueReply reply = wait(
loadBalance(ssi.second, &StorageServerInterface::getValue, GetValueRequest(key, ver, getValueID),
TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL));
TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL));
double latency = now() - startTimeD;
cx->readLatencies.addSample(latency);
if (trLogInfo) {
@ -1456,7 +1461,7 @@ ACTOR Future<Key> getKey( Database cx, KeySelector k, Future<Version> version, T
if( info.debugID.present() )
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKey.Before"); //.detail("StartKey", k.getKey()).detail("Offset",k.offset).detail("OrEqual",k.orEqual);
++cx->transactionPhysicalReads;
GetKeyReply reply = wait( loadBalance( ssi.second, &StorageServerInterface::getKey, GetKeyRequest(k, version.get()), TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
GetKeyReply reply = wait( loadBalance( ssi.second, &StorageServerInterface::getKey, GetKeyRequest(k, version.get()), TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
if( info.debugID.present() )
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKey.After"); //.detail("NextKey",reply.sel.key).detail("Offset", reply.sel.offset).detail("OrEqual", k.orEqual);
k = reply.sel;
@ -1519,7 +1524,7 @@ ACTOR Future< Void > watchValue( Future<Version> version, Key key, Optional<Valu
g_traceBatch.addAttach("WatchValueAttachID", info.debugID.get().first(), watchValueID.get().first());
g_traceBatch.addEvent("WatchValueDebug", watchValueID.get().first(), "NativeAPI.watchValue.Before"); //.detail("TaskID", g_network->getCurrentTask());
}
state Version resp = wait( loadBalance( ssi.second, &StorageServerInterface::watchValue, WatchValueRequest(key, value, ver, watchValueID), TaskDefaultPromiseEndpoint ) );
state Version resp = wait( loadBalance( ssi.second, &StorageServerInterface::watchValue, WatchValueRequest(key, value, ver, watchValueID), TaskPriority::DefaultPromiseEndpoint ) );
if( info.debugID.present() ) {
g_traceBatch.addEvent("WatchValueDebug", watchValueID.get().first(), "NativeAPI.watchValue.After"); //.detail("TaskID", g_network->getCurrentTask());
}
@ -1611,7 +1616,7 @@ ACTOR Future<Standalone<RangeResultRef>> getExactRange( Database cx, Version ver
.detail("Servers", locations[shard].second->description());*/
}
++cx->transactionPhysicalReads;
GetKeyValuesReply rep = wait( loadBalance( locations[shard].second, &StorageServerInterface::getKeyValues, req, TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
GetKeyValuesReply rep = wait( loadBalance( locations[shard].second, &StorageServerInterface::getKeyValues, req, TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
if( info.debugID.present() )
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getExactRange.After");
output.arena().dependsOn( rep.arena );
@ -1888,7 +1893,7 @@ ACTOR Future<Standalone<RangeResultRef>> getRange( Database cx, Reference<Transa
transaction_too_old(), future_version()
});
}
GetKeyValuesReply rep = wait( loadBalance(beginServer.second, &StorageServerInterface::getKeyValues, req, TaskDefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
GetKeyValuesReply rep = wait( loadBalance(beginServer.second, &StorageServerInterface::getKeyValues, req, TaskPriority::DefaultPromiseEndpoint, false, cx->enableLocalityLoadBalance ? &cx->queueModel : NULL ) );
if( info.debugID.present() ) {
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getRange.After");//.detail("SizeOf", rep.data.size());
@ -2018,7 +2023,7 @@ Future<Standalone<RangeResultRef>> getRange( Database const& cx, Future<Version>
}
Transaction::Transaction( Database const& cx )
: cx(cx), info(cx->taskID), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF), committedVersion(invalidVersion), versionstampPromise(Promise<Standalone<StringRef>>()), options(cx), numErrors(0), numRetries(0), trLogInfo(createTrLogInfoProbabilistically(cx))
: cx(cx), info(cx->taskID), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF), committedVersion(invalidVersion), versionstampPromise(Promise<Standalone<StringRef>>()), options(cx), numErrors(0), trLogInfo(createTrLogInfoProbabilistically(cx))
{
setPriority(GetReadVersionRequest::PRIORITY_DEFAULT);
}
@ -2041,7 +2046,6 @@ void Transaction::operator=(Transaction&& r) BOOST_NOEXCEPT {
info = r.info;
backoff = r.backoff;
numErrors = r.numErrors;
numRetries = r.numRetries;
committedVersion = r.committedVersion;
versionstampPromise = std::move(r.versionstampPromise);
watches = r.watches;
@ -2454,36 +2458,28 @@ double Transaction::getBackoff(int errCode) {
TransactionOptions::TransactionOptions(Database const& cx) {
maxBackoff = cx->transactionMaxBackoff;
sizeLimit = cx->transactionMaxSize;
reset(cx);
if (BUGGIFY) {
commitOnFirstProxy = true;
}
maxRetries = cx->transactionMaxRetries;
if (maxRetries == -1) {
maxRetries = 10;
}
}
TransactionOptions::TransactionOptions() {
memset(this, 0, sizeof(*this));
maxBackoff = CLIENT_KNOBS->DEFAULT_MAX_BACKOFF;
sizeLimit = CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT;
}
void TransactionOptions::reset(Database const& cx) {
double oldMaxBackoff = maxBackoff;
double oldMaxRetries = maxRetries;
uint32_t oldSizeLimit = sizeLimit;
memset(this, 0, sizeof(*this));
maxBackoff = cx->apiVersionAtLeast(610) ? oldMaxBackoff : cx->transactionMaxBackoff;
maxRetries = oldMaxRetries;
sizeLimit = oldSizeLimit;
lockAware = cx->lockAware;
}
void Transaction::onErrorReset() {
int32_t oldNumRetires = numRetries;
reset();
numRetries = oldNumRetires;
}
void Transaction::reset() {
tr = CommitTransactionRequest();
readVersion = Future<Version>();
@ -2698,7 +2694,7 @@ ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo>
const std::vector<MasterProxyInterface>& proxies = cx->clientInfo->get().proxies;
reply = proxies.size() ? throwErrorOr ( brokenPromiseToMaybeDelivered ( proxies[0].commit.tryGetReply(req) ) ) : Never();
} else {
reply = loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::commit, req, TaskDefaultPromiseEndpoint, true );
reply = loadBalance( cx->getMasterProxies(info.useProvisionalProxies), &MasterProxyInterface::commit, req, TaskPriority::DefaultPromiseEndpoint, true );
}
choose {
@ -2820,8 +2816,9 @@ Future<Void> Transaction::commitMutations() {
transactionSize = tr.transaction.mutations.expectedSize(); // Old API versions didn't account for conflict ranges when determining whether to throw transaction_too_large
}
if (transactionSize > (options.customTransactionSizeLimit == 0 ? (uint64_t)CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT : (uint64_t)options.customTransactionSizeLimit))
if (transactionSize > options.sizeLimit) {
return transaction_too_large();
}
if( !readVersion.isValid() )
getReadVersion( GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY ); // sets up readVersion field. We had no reads, so no need for (expensive) full causal consistency.
@ -2889,7 +2886,6 @@ ACTOR Future<Void> commitAndWatch(Transaction *self) {
}
self->versionstampPromise.sendError(transaction_invalid_version());
//self->onErrorReset();
self->reset();
}
@ -2994,6 +2990,11 @@ void Transaction::setOption( FDBTransactionOptions::Option option, Optional<Stri
options.maxBackoff = extractIntOption(value, 0, std::numeric_limits<int32_t>::max()) / 1000.0;
break;
case FDBTransactionOptions::SIZE_LIMIT:
validateOptionValue(value, true);
options.sizeLimit = extractIntOption(value, 32, CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT);
break;
case FDBTransactionOptions::LOCK_AWARE:
validateOptionValue(value, false);
options.lockAware = true;
@ -3073,7 +3074,7 @@ ACTOR Future<Void> readVersionBatcher( DatabaseContext *cx, FutureStream< std::p
if (requests.size() == CLIENT_KNOBS->MAX_BATCH_SIZE)
send_batch = true;
else if (!timeout.isValid())
timeout = delay(batchTime, TaskProxyGetConsistentReadVersion);
timeout = delay(batchTime, TaskPriority::ProxyGetConsistentReadVersion);
}
when(wait(timeout.isValid() ? timeout : Never())) {
send_batch = true;
@ -3149,9 +3150,6 @@ Future<Standalone<StringRef>> Transaction::getVersionstamp() {
}
Future<Void> Transaction::onError( Error const& e ) {
if (numRetries < std::numeric_limits<int>::max()) {
numRetries++;
}
if (e.code() == error_code_success)
{
return client_invalid_operation();
@ -3173,13 +3171,10 @@ Future<Void> Transaction::onError( Error const& e ) {
cx->transactionsProcessBehind++;
if (e.code() == error_code_cluster_not_fully_recovered) {
cx->transactionWaitsForFullRecovery++;
if (numRetries > options.maxRetries) {
return e;
}
}
double backoff = getBackoff(e.code());
onErrorReset();
reset();
return delay( backoff, info.taskID );
}
if (e.code() == error_code_transaction_too_old ||
@ -3191,7 +3186,7 @@ Future<Void> Transaction::onError( Error const& e ) {
cx->transactionsFutureVersions++;
double maxBackoff = options.maxBackoff;
onErrorReset();
reset();
return delay( std::min(CLIENT_KNOBS->FUTURE_VERSION_RETRY_DELAY, maxBackoff), info.taskID );
}
@ -3240,7 +3235,7 @@ ACTOR Future< StorageMetrics > waitStorageMetricsMultipleLocations(
WaitMetricsRequest req(locations[i].first, StorageMetrics(), StorageMetrics());
req.min.bytes = 0;
req.max.bytes = -1;
fx[i] = loadBalance( locations[i].second, &StorageServerInterface::waitMetrics, req, TaskDataDistribution );
fx[i] = loadBalance( locations[i].second, &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution );
}
wait( waitForAll(fx) );
@ -3271,7 +3266,7 @@ ACTOR Future< StorageMetrics > waitStorageMetrics(
int shardLimit )
{
loop {
vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, shardLimit, false, &StorageServerInterface::waitMetrics, TransactionInfo(TaskDataDistribution) ) );
vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, shardLimit, false, &StorageServerInterface::waitMetrics, TransactionInfo(TaskPriority::DataDistribution) ) );
//SOMEDAY: Right now, if there are too many shards we delay and check again later. There may be a better solution to this.
if(locations.size() < shardLimit) {
@ -3281,7 +3276,7 @@ ACTOR Future< StorageMetrics > waitStorageMetrics(
fx = waitStorageMetricsMultipleLocations( locations, min, max, permittedError );
} else {
WaitMetricsRequest req( keys, min, max );
fx = loadBalance( locations[0].second, &StorageServerInterface::waitMetrics, req, TaskDataDistribution );
fx = loadBalance( locations[0].second, &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution );
}
StorageMetrics x = wait(fx);
return x;
@ -3291,14 +3286,14 @@ ACTOR Future< StorageMetrics > waitStorageMetrics(
throw;
}
cx->invalidateCache(keys);
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskDataDistribution));
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
}
} else {
TraceEvent(SevWarn, "WaitStorageMetricsPenalty")
.detail("Keys", keys)
.detail("Limit", CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT)
.detail("JitteredSecondsOfPenitence", CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY);
wait(delayJittered(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskDataDistribution));
wait(delayJittered(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskPriority::DataDistribution));
// make sure that the next getKeyRangeLocations() call will actually re-fetch the range
cx->invalidateCache( keys );
}
@ -3324,13 +3319,13 @@ Future< StorageMetrics > Transaction::getStorageMetrics( KeyRange const& keys, i
ACTOR Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( Database cx, KeyRange keys, StorageMetrics limit, StorageMetrics estimated )
{
loop {
state vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT, false, &StorageServerInterface::splitMetrics, TransactionInfo(TaskDataDistribution) ) );
state vector< pair<KeyRange, Reference<LocationInfo>> > locations = wait( getKeyRangeLocations( cx, keys, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT, false, &StorageServerInterface::splitMetrics, TransactionInfo(TaskPriority::DataDistribution) ) );
state StorageMetrics used;
state Standalone<VectorRef<KeyRef>> results;
//SOMEDAY: Right now, if there are too many shards we delay and check again later. There may be a better solution to this.
if(locations.size() == CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT) {
wait(delay(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskDataDistribution));
wait(delay(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskPriority::DataDistribution));
cx->invalidateCache(keys);
}
else {
@ -3341,7 +3336,7 @@ ACTOR Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( Database cx,
state int i = 0;
for(; i<locations.size(); i++) {
SplitMetricsRequest req( locations[i].first, limit, used, estimated, i == locations.size() - 1 );
SplitMetricsReply res = wait( loadBalance( locations[i].second, &StorageServerInterface::splitMetrics, req, TaskDataDistribution ) );
SplitMetricsReply res = wait( loadBalance( locations[i].second, &StorageServerInterface::splitMetrics, req, TaskPriority::DataDistribution ) );
if( res.splits.size() && res.splits[0] <= results.back() ) { // split points are out of order, possibly because of moving data, throw error to retry
ASSERT_WE_THINK(false); // FIXME: This seems impossible and doesn't seem to be covered by testing
throw all_alternatives_failed();
@ -3367,7 +3362,7 @@ ACTOR Future< Standalone<VectorRef<KeyRef>> > splitStorageMetrics( Database cx,
throw;
}
cx->invalidateCache( keys );
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskDataDistribution));
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
}
}
}

View File

@ -145,9 +145,8 @@ struct StorageMetrics;
struct TransactionOptions {
double maxBackoff;
uint32_t maxRetries;
uint32_t getReadVersionFlags;
uint32_t customTransactionSizeLimit;
uint32_t sizeLimit;
bool checkWritesEnabled : 1;
bool causalWriteRisky : 1;
bool commitOnFirstProxy : 1;
@ -164,10 +163,10 @@ struct TransactionOptions {
struct TransactionInfo {
Optional<UID> debugID;
int taskID;
TaskPriority taskID;
bool useProvisionalProxies;
explicit TransactionInfo( int taskID ) : taskID(taskID), useProvisionalProxies(false) {}
explicit TransactionInfo( TaskPriority taskID ) : taskID(taskID), useProvisionalProxies(false) {}
};
struct TransactionLogInfo : public ReferenceCounted<TransactionLogInfo>, NonCopyable {
@ -287,11 +286,10 @@ public:
void flushTrLogsIfEnabled();
// These are to permit use as state variables in actors:
Transaction() : info( TaskDefaultEndpoint ) {}
Transaction() : info( TaskPriority::DefaultEndpoint ) {}
void operator=(Transaction&& r) BOOST_NOEXCEPT;
void reset();
void onErrorReset();
void fullReset();
double getBackoff(int errCode);
void debugTransaction(UID dID) { info.debugID = dID; }
@ -302,7 +300,6 @@ public:
TransactionInfo info;
int numErrors;
int numRetries;
std::vector<Reference<Watch>> watches;

View File

@ -126,7 +126,7 @@ public:
void getWriteConflicts( KeyRangeMap<bool> *result );
Database getDatabase() {
Database getDatabase() const {
return tr.getDatabase();
}
private:

View File

@ -249,8 +249,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"storage_server_min_free_space",
"storage_server_min_free_space_ratio",
"log_server_min_free_space",
"log_server_min_free_space_ratio",
"storage_server_read_load"
"log_server_min_free_space_ratio"
]
},
"description":"The database is not being saturated by the workload."
@ -269,8 +268,7 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"storage_server_min_free_space",
"storage_server_min_free_space_ratio",
"log_server_min_free_space",
"log_server_min_free_space_ratio",
"storage_server_read_load"
"log_server_min_free_space_ratio"
]
},
"description":"The database is not being saturated by the workload."

View File

@ -291,7 +291,7 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<C
state vector<Future<Optional<LeaderInfo>>> leaderServers;
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, GetLeaderRequest(coord.clusterKey, UID()), TaskCoordinationReply));
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, GetLeaderRequest(coord.clusterKey, UID()), TaskPriority::CoordinationReply));
wait( smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) || delay(2.0) );

View File

@ -80,9 +80,9 @@ struct StorageServerInterface {
bool operator == (StorageServerInterface const& s) const { return uniqueID == s.uniqueID; }
bool operator < (StorageServerInterface const& s) const { return uniqueID < s.uniqueID; }
void initEndpoints() {
getValue.getEndpoint( TaskLoadBalancedEndpoint );
getKey.getEndpoint( TaskLoadBalancedEndpoint );
getKeyValues.getEndpoint( TaskLoadBalancedEndpoint );
getValue.getEndpoint( TaskPriority::LoadBalancedEndpoint );
getKey.getEndpoint( TaskPriority::LoadBalancedEndpoint );
getKeyValues.getEndpoint( TaskPriority::LoadBalancedEndpoint );
}
};

View File

@ -31,7 +31,7 @@
#include "flow/actorcompiler.h" // This must be the last #include.
ACTOR template <class Tree>
Future<Void> deferredCleanupActor( std::vector<Tree> toFree, int taskID = 7000 ) {
Future<Void> deferredCleanupActor( std::vector<Tree> toFree, TaskPriority taskID = TaskPriority::DefaultYield ) {
state int freeCount = 0;
while (!toFree.empty()) {
Tree a = std::move( toFree.back() );

View File

@ -511,7 +511,7 @@ public:
oldestVersion = newOldestVersion;
}
Future<Void> forgetVersionsBeforeAsync( Version newOldestVersion, int taskID = 7000 ) {
Future<Void> forgetVersionsBeforeAsync( Version newOldestVersion, TaskPriority taskID = TaskPriority::DefaultYield ) {
ASSERT( newOldestVersion <= latestVersion );
roots[newOldestVersion] = getRoot(newOldestVersion);

View File

@ -155,6 +155,9 @@ description is not currently required but encouraged.
<Option name="transaction_max_retry_delay" code="502"
paramType="Int" paramDescription="value in milliseconds of maximum delay"
description="Set the maximum amount of backoff delay incurred in the call to ``onError`` if the error is retryable. This sets the ``max_retry_delay`` option of each transaction created by this database. See the transaction option description for more information." />
<Option name="transaction_size_limit" code="503"
paramType="Int" paramDescription="value in bytes"
description="Set the maximum transaction size which, if exceeded, will cause the transaction to be cancelled. Default to 10,000,000 bytes." />
<Option name="snapshot_ryw_enable" code="26"
description="Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior." />
<Option name="snapshot_ryw_disable" code="27"
@ -210,6 +213,9 @@ description is not currently required but encouraged.
<Option name="max_retry_delay" code="502"
paramType="Int" paramDescription="value in milliseconds of maximum delay"
description="Set the maximum amount of backoff delay incurred in the call to ``onError`` if the error is retryable. Defaults to 1000 ms. Valid parameter values are ``[0, INT_MAX]``. If the maximum retry delay is less than the current retry delay of the transaction, then the current retry delay will be clamped to the maximum retry delay. Prior to API version 610, like all other transaction options, the maximum retry delay must be reset after a call to ``onError``. If the API version is 610 or greater, the retry limit is not reset after an ``onError`` call. Note that at all API versions, it is safe and legal to set the maximum retry delay each time the transaction begins, so most code written assuming the older behavior can be upgraded to the newer behavior without requiring any modification, and the caller is not required to implement special logic in retry loops to only conditionally set this option."/>
<Option name="size_limit" code="503"
paramType="Int" paramDescription="value in bytes"
description="Set the maximum transaction size which, if exceeded, will cause the transaction to be cancelled. Valid parameter values are ``[32, 10,000,000]```." />
<Option name="snapshot_ryw_enable" code="600"
description="Snapshot read operations will see the results of writes done in the same transaction. This is the default behavior." />
<Option name="snapshot_ryw_disable" code="601"

View File

@ -266,7 +266,7 @@ private:
}
ACTOR static Future<int> read_impl( int fd, void* data, int length, int64_t offset ) {
state int taskID = g_network->getCurrentTask();
state TaskPriority taskID = g_network->getCurrentTask();
state Promise<Void> p;
//fprintf(stderr, "eio_read (fd=%d length=%d offset=%lld)\n", fd, length, offset);
state eio_req* r = eio_read(fd, data, length, offset, 0, eio_callback, &p);
@ -289,7 +289,7 @@ private:
}
ACTOR static Future<Void> write_impl( int fd, Reference<ErrorInfo> err, StringRef data, int64_t offset ) {
state int taskID = g_network->getCurrentTask();
state TaskPriority taskID = g_network->getCurrentTask();
state Promise<Void> p;
state eio_req* r = eio_write(fd, (void*)data.begin(), data.size(), offset, 0, eio_callback, &p);
try { wait( p.getFuture() ); } catch (...) { g_network->setCurrentTask( taskID ); eio_cancel(r); throw; }
@ -299,7 +299,7 @@ private:
}
ACTOR static Future<Void> truncate_impl( int fd, Reference<ErrorInfo> err, int64_t size ) {
state int taskID = g_network->getCurrentTask();
state TaskPriority taskID = g_network->getCurrentTask();
state Promise<Void> p;
state eio_req* r = eio_ftruncate(fd, size, 0, eio_callback, &p);
try { wait( p.getFuture() ); } catch (...) { g_network->setCurrentTask( taskID ); eio_cancel(r); throw; }
@ -330,7 +330,7 @@ private:
}
ACTOR static Future<Void> sync_impl( int fd, Reference<ErrorInfo> err, bool sync_metadata=false ) {
state int taskID = g_network->getCurrentTask();
state TaskPriority taskID = g_network->getCurrentTask();
state Promise<Void> p;
state eio_req* r = start_fsync( fd, p, sync_metadata );
@ -350,7 +350,7 @@ private:
}
ACTOR static Future<int64_t> size_impl( int fd ) {
state int taskID = g_network->getCurrentTask();
state TaskPriority taskID = g_network->getCurrentTask();
state Promise<Void> p;
state eio_req* r = eio_fstat( fd, 0, eio_callback, &p );
try { wait( p.getFuture() ); } catch (...) { g_network->setCurrentTask( taskID ); eio_cancel(r); throw; }
@ -363,7 +363,7 @@ private:
}
ACTOR static Future<EIO_STRUCT_STAT> stat_impl( std::string filename ) {
state int taskID = g_network->getCurrentTask();
state TaskPriority taskID = g_network->getCurrentTask();
state Promise<Void> p;
state EIO_STRUCT_STAT statdata;
state eio_req* r = eio_stat( filename.c_str(), 0, eio_callback, &p );
@ -377,7 +377,7 @@ private:
ACTOR template <class R> static Future<R> dispatch_impl( std::function<R()> func) {
state Dispatch<R> data( func );
state int taskID = g_network->getCurrentTask();
state TaskPriority taskID = g_network->getCurrentTask();
state eio_req* r = eio_custom( [](eio_req* req) {
// Runs on the eio thread pool
@ -418,7 +418,7 @@ private:
static void eio_want_poll() {
want_poll = 1;
// SOMEDAY: NULL for deferred error, no analysis of correctness (itp)
onMainThreadVoid([](){ poll_eio(); }, NULL, TaskPollEIO);
onMainThreadVoid([](){ poll_eio(); }, NULL, TaskPriority::PollEIO);
}
static int eio_callback( eio_req* req ) {

View File

@ -472,9 +472,9 @@ private:
#endif
}
int getTask() const { return (prio>>32)+1; }
TaskPriority getTask() const { return static_cast<TaskPriority>((prio>>32)+1); }
ACTOR static void deliver( Promise<int> result, bool failed, int r, int task ) {
ACTOR static void deliver( Promise<int> result, bool failed, int r, TaskPriority task ) {
wait( delay(0, task) );
if (failed) result.sendError(io_timeout());
else if (r < 0) result.sendError(io_error());
@ -649,7 +649,7 @@ private:
loop {
wait(success(ev->read()));
wait(delay(0, TaskDiskIOComplete));
wait(delay(0, TaskPriority::DiskIOComplete));
linux_ioresult ev[FLOW_KNOBS->MAX_OUTSTANDING];
timespec tm; tm.tv_sec = 0; tm.tv_nsec = 0;

View File

@ -23,13 +23,13 @@
std::map<std::string, Future<Void>> AsyncFileNonDurable::filesBeingDeleted;
ACTOR Future<Void> sendOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, int taskID ) {
ACTOR Future<Void> sendOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, TaskPriority taskID ) {
wait( g_simulator.onProcess( process, taskID ) );
promise.send(Void());
return Void();
}
ACTOR Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, Error e, int taskID ) {
ACTOR Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, Error e, TaskPriority taskID ) {
wait( g_simulator.onProcess( process, taskID ) );
promise.sendError(e);
return Void();

View File

@ -38,8 +38,8 @@
#undef max
#undef min
Future<Void> sendOnProcess( ISimulator::ProcessInfo* const& process, Promise<Void> const& promise, int const& taskID );
Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* const& process, Promise<Void> const& promise, Error const& e, int const& taskID );
ACTOR Future<Void> sendOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, TaskPriority taskID );
ACTOR Future<Void> sendErrorOnProcess( ISimulator::ProcessInfo* process, Promise<Void> promise, Error e, TaskPriority taskID );
ACTOR template <class T>
Future<T> sendErrorOnShutdown( Future<T> in ) {
@ -198,7 +198,7 @@ public:
//Creates a new AsyncFileNonDurable which wraps the provided IAsyncFile
ACTOR static Future<Reference<IAsyncFile>> open(std::string filename, std::string actualFilename, Future<Reference<IAsyncFile>> wrappedFile, Reference<DiskParameters> diskParameters) {
state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
state int currentTaskID = g_network->getCurrentTask();
state TaskPriority currentTaskID = g_network->getCurrentTask();
state Future<Void> shutdown = success(currentProcess->shutdownSignal.getFuture());
//TraceEvent("AsyncFileNonDurableOpenBegin").detail("Filename", filename).detail("Addr", g_simulator.getCurrentProcess()->address);
@ -391,7 +391,7 @@ private:
ACTOR Future<int> read(AsyncFileNonDurable *self, void *data, int length, int64_t offset) {
state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
state int currentTaskID = g_network->getCurrentTask();
state TaskPriority currentTaskID = g_network->getCurrentTask();
wait( g_simulator.onMachine( currentProcess ) );
try {
@ -411,7 +411,7 @@ private:
//or none of the write. It may also corrupt parts of sectors which have not been written correctly
ACTOR Future<Void> write(AsyncFileNonDurable *self, Promise<Void> writeStarted, Future<Future<Void>> ownFuture, void const* data, int length, int64_t offset) {
state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
state int currentTaskID = g_network->getCurrentTask();
state TaskPriority currentTaskID = g_network->getCurrentTask();
wait( g_simulator.onMachine( currentProcess ) );
state double delayDuration = deterministicRandom()->random01() * self->maxWriteDelay;
@ -535,7 +535,7 @@ private:
//If a kill interrupts the delay, then the truncate may or may not be performed
ACTOR Future<Void> truncate(AsyncFileNonDurable *self, Promise<Void> truncateStarted, Future<Future<Void>> ownFuture, int64_t size) {
state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
state int currentTaskID = g_network->getCurrentTask();
state TaskPriority currentTaskID = g_network->getCurrentTask();
wait( g_simulator.onMachine( currentProcess ) );
state double delayDuration = deterministicRandom()->random01() * self->maxWriteDelay;
@ -573,8 +573,8 @@ private:
}
}
if(g_network->check_yield(TaskDefaultYield)) {
wait(delay(0, TaskDefaultYield));
if(g_network->check_yield(TaskPriority::DefaultYield)) {
wait(delay(0, TaskPriority::DefaultYield));
}
//If performing a durable truncate, then pass it through to the file. Otherwise, pass it through with a 1/2 chance
@ -663,7 +663,7 @@ private:
ACTOR Future<Void> sync(AsyncFileNonDurable *self, bool durable) {
state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
state int currentTaskID = g_network->getCurrentTask();
state TaskPriority currentTaskID = g_network->getCurrentTask();
wait( g_simulator.onMachine( currentProcess ) );
try {
@ -695,7 +695,7 @@ private:
ACTOR Future<int64_t> size(AsyncFileNonDurable *self) {
state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
state int currentTaskID = g_network->getCurrentTask();
state TaskPriority currentTaskID = g_network->getCurrentTask();
wait( g_simulator.onMachine( currentProcess ) );
@ -714,7 +714,7 @@ private:
//Finishes all outstanding actors on an AsyncFileNonDurable and then deletes it
ACTOR Future<Void> deleteFile(AsyncFileNonDurable *self) {
state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
state int currentTaskID = g_network->getCurrentTask();
state TaskPriority currentTaskID = g_network->getCurrentTask();
state std::string filename = self->filename;
wait( g_simulator.onMachine( currentProcess ) );

View File

@ -172,28 +172,29 @@ struct YieldMockNetwork : INetwork, ReferenceCounted<YieldMockNetwork> {
t.send(Void());
}
virtual Future<class Void> delay(double seconds, int taskID) {
virtual Future<class Void> delay(double seconds, TaskPriority taskID) {
return nextTick.getFuture();
}
virtual Future<class Void> yield(int taskID) {
virtual Future<class Void> yield(TaskPriority taskID) {
if (check_yield(taskID))
return delay(0,taskID);
return Void();
}
virtual bool check_yield(int taskID) {
virtual bool check_yield(TaskPriority taskID) {
if (nextYield > 0) --nextYield;
return nextYield == 0;
}
// Delegate everything else. TODO: Make a base class NetworkWrapper for delegating everything in INetwork
virtual int getCurrentTask() { return baseNetwork->getCurrentTask(); }
virtual void setCurrentTask(int taskID) { baseNetwork->setCurrentTask(taskID); }
virtual TaskPriority getCurrentTask() { return baseNetwork->getCurrentTask(); }
virtual void setCurrentTask(TaskPriority taskID) { baseNetwork->setCurrentTask(taskID); }
virtual double now() { return baseNetwork->now(); }
virtual void stop() { return baseNetwork->stop(); }
virtual bool isSimulated() const { return baseNetwork->isSimulated(); }
virtual void onMainThread(Promise<Void>&& signal, int taskID) { return baseNetwork->onMainThread(std::move(signal), taskID); }
virtual void onMainThread(Promise<Void>&& signal, TaskPriority taskID) { return baseNetwork->onMainThread(std::move(signal), taskID); }
bool isOnMainThread() const override { return baseNetwork->isOnMainThread(); }
virtual THREAD_HANDLE startThread(THREAD_FUNC_RETURN(*func) (void *), void *arg) { return baseNetwork->startThread(func,arg); }
virtual Future< Reference<class IAsyncFile> > open(std::string filename, int64_t flags, int64_t mode) { return IAsyncFileSystem::filesystem()->open(filename,flags,mode); }
virtual Future< Void > deleteFile(std::string filename, bool mustBeDurable) { return IAsyncFileSystem::filesystem()->deleteFile(filename,mustBeDurable); }

View File

@ -18,22 +18,25 @@
* limitations under the License.
*/
#include "flow/flow.h"
#include "fdbrpc/FlowTransport.h"
#include "fdbrpc/genericactors.actor.h"
#include "fdbrpc/fdbrpc.h"
#include "flow/Net2Packet.h"
#include "flow/ActorCollection.h"
#include "flow/TDMetric.actor.h"
#include "flow/ObjectSerializer.h"
#include "fdbrpc/FailureMonitor.h"
#include "fdbrpc/crc32c.h"
#include "fdbrpc/simulator.h"
#include <unordered_map>
#include <unordered_map>
#if VALGRIND
#include <memcheck.h>
#endif
#include "fdbrpc/crc32c.h"
#include "fdbrpc/fdbrpc.h"
#include "fdbrpc/FailureMonitor.h"
#include "fdbrpc/genericactors.actor.h"
#include "fdbrpc/simulator.h"
#include "flow/ActorCollection.h"
#include "flow/Error.h"
#include "flow/flow.h"
#include "flow/Net2Packet.h"
#include "flow/TDMetric.actor.h"
#include "flow/ObjectSerializer.h"
#include "flow/ProtocolVersion.h"
#include "flow/actorcompiler.h" // This must be the last #include.
static NetworkAddressList g_currentDeliveryPeerAddress = NetworkAddressList();
@ -47,9 +50,9 @@ const uint64_t TOKEN_STREAM_FLAG = 1;
class EndpointMap : NonCopyable {
public:
EndpointMap();
void insert( NetworkMessageReceiver* r, Endpoint::Token& token, uint32_t priority );
void insert( NetworkMessageReceiver* r, Endpoint::Token& token, TaskPriority priority );
NetworkMessageReceiver* get( Endpoint::Token const& token );
uint32_t getPriority( Endpoint::Token const& token );
TaskPriority getPriority( Endpoint::Token const& token );
void remove( Endpoint::Token const& token, NetworkMessageReceiver* r );
private:
@ -83,12 +86,12 @@ void EndpointMap::realloc() {
firstFree = oldSize;
}
void EndpointMap::insert( NetworkMessageReceiver* r, Endpoint::Token& token, uint32_t priority ) {
void EndpointMap::insert( NetworkMessageReceiver* r, Endpoint::Token& token, TaskPriority priority ) {
if (firstFree == uint32_t(-1)) realloc();
int index = firstFree;
firstFree = data[index].nextFree;
token = Endpoint::Token( token.first(), (token.second()&0xffffffff00000000LL) | index );
data[index].token() = Endpoint::Token( token.first(), (token.second()&0xffffffff00000000LL) | priority );
data[index].token() = Endpoint::Token( token.first(), (token.second()&0xffffffff00000000LL) | static_cast<uint32_t>(priority) );
data[index].receiver = r;
}
@ -99,11 +102,11 @@ NetworkMessageReceiver* EndpointMap::get( Endpoint::Token const& token ) {
return 0;
}
uint32_t EndpointMap::getPriority( Endpoint::Token const& token ) {
TaskPriority EndpointMap::getPriority( Endpoint::Token const& token ) {
uint32_t index = token.second();
if ( index < data.size() && data[index].token().first() == token.first() && ((data[index].token().second()&0xffffffff00000000LL)|index)==token.second() )
return data[index].token().second();
return TaskUnknownEndpoint;
return static_cast<TaskPriority>(data[index].token().second());
return TaskPriority::UnknownEndpoint;
}
void EndpointMap::remove( Endpoint::Token const& token, NetworkMessageReceiver* r ) {
@ -119,7 +122,7 @@ struct EndpointNotFoundReceiver : NetworkMessageReceiver {
EndpointNotFoundReceiver(EndpointMap& endpoints) {
//endpoints[WLTOKEN_ENDPOINT_NOT_FOUND] = this;
Endpoint::Token e = WLTOKEN_ENDPOINT_NOT_FOUND;
endpoints.insert(this, e, TaskDefaultEndpoint);
endpoints.insert(this, e, TaskPriority::DefaultEndpoint);
ASSERT( e == WLTOKEN_ENDPOINT_NOT_FOUND );
}
virtual void receive( ArenaReader& reader ) {
@ -138,7 +141,7 @@ struct EndpointNotFoundReceiver : NetworkMessageReceiver {
struct PingReceiver : NetworkMessageReceiver {
PingReceiver(EndpointMap& endpoints) {
Endpoint::Token e = WLTOKEN_PING_PACKET;
endpoints.insert(this, e, TaskReadSocket);
endpoints.insert(this, e, TaskPriority::ReadSocket);
ASSERT( e == WLTOKEN_PING_PACKET );
}
virtual void receive( ArenaReader& reader ) {
@ -339,7 +342,7 @@ struct Peer : NonCopyable {
pkt.connectionId = transport->transportId;
PacketBuffer* pb_first = new PacketBuffer;
PacketWriter wr( pb_first, NULL, Unversioned() );
PacketWriter wr( pb_first, nullptr, Unversioned() );
pkt.serialize(wr);
unsent.prependWriteBuffer(pb_first, wr.finish());
}
@ -351,7 +354,7 @@ struct Peer : NonCopyable {
// If there are reliable packets, compact reliable packets into a new unsent range
if(!reliable.empty()) {
PacketBuffer* pb = unsent.getWriteBuffer();
pb = reliable.compact(pb, NULL);
pb = reliable.compact(pb, nullptr);
unsent.setWriteBuffer(pb);
}
}
@ -435,16 +438,16 @@ struct Peer : NonCopyable {
ACTOR static Future<Void> connectionWriter( Peer* self, Reference<IConnection> conn ) {
state double lastWriteTime = now();
loop {
//wait( delay(0, TaskWriteSocket) );
wait( delayJittered(std::max<double>(FLOW_KNOBS->MIN_COALESCE_DELAY, FLOW_KNOBS->MAX_COALESCE_DELAY - (now() - lastWriteTime)), TaskWriteSocket) );
//wait( delay(500e-6, TaskWriteSocket) );
//wait( yield(TaskWriteSocket) );
//wait( delay(0, TaskPriority::WriteSocket) );
wait( delayJittered(std::max<double>(FLOW_KNOBS->MIN_COALESCE_DELAY, FLOW_KNOBS->MAX_COALESCE_DELAY - (now() - lastWriteTime)), TaskPriority::WriteSocket) );
//wait( delay(500e-6, TaskPriority::WriteSocket) );
//wait( yield(TaskPriority::WriteSocket) );
// Send until there is nothing left to send
loop {
lastWriteTime = now();
int sent = conn->write( self->unsent.getUnsent() );
int sent = conn->write(self->unsent.getUnsent(), /* limit= */ FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
if (sent) {
self->transport->bytesSent += sent;
self->unsent.sent(sent);
@ -453,7 +456,7 @@ struct Peer : NonCopyable {
TEST(true); // We didn't write everything, so apparently the write buffer is full. Wait for it to be nonfull.
wait( conn->onWritable() );
wait( yield(TaskWriteSocket) );
wait( yield(TaskPriority::WriteSocket) );
}
// Wait until there is something to send
@ -599,8 +602,8 @@ TransportData::~TransportData() {
}
ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader reader, bool inReadSocket) {
int priority = self->endpoints.getPriority(destination.token);
if (priority < TaskReadSocket || !inReadSocket) {
TaskPriority priority = self->endpoints.getPriority(destination.token);
if (priority < TaskPriority::ReadSocket || !inReadSocket) {
wait( delay(0, priority) );
} else {
g_network->setCurrentTask( priority );
@ -634,21 +637,17 @@ ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader
}
if( inReadSocket )
g_network->setCurrentTask( TaskReadSocket );
g_network->setCurrentTask( TaskPriority::ReadSocket );
}
static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, uint8_t* e, Arena& arena,
static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, const uint8_t* e, Arena& arena,
NetworkAddress const& peerAddress, ProtocolVersion peerProtocolVersion) {
// Find each complete packet in the given byte range and queue a ready task to deliver it.
// Remove the complete packets from the range by increasing unprocessed_begin.
// There won't be more than 64K of data plus one packet, so this shouldn't take a long time.
uint8_t* p = unprocessed_begin;
bool checksumEnabled = true;
if (peerAddress.isTLS()) {
checksumEnabled = false;
}
const bool checksumEnabled = !peerAddress.isTLS();
loop {
uint32_t packetLen, packetChecksum;
@ -734,6 +733,23 @@ static void scanPackets(TransportData* transport, uint8_t*& unprocessed_begin, u
}
}
// Given unprocessed buffer [begin, end), check if next packet size is known and return
// enough size for the next packet, whose format is: {size, optional_checksum, data} +
// next_packet_size.
static int getNewBufferSize(const uint8_t* begin, const uint8_t* end, const NetworkAddress& peerAddress) {
const int len = end - begin;
if (len < sizeof(uint32_t)) {
return FLOW_KNOBS->MIN_PACKET_BUFFER_BYTES;
}
const uint32_t packetLen = *(uint32_t*)begin;
if (packetLen > FLOW_KNOBS->PACKET_LIMIT) {
TraceEvent(SevError, "Net2_PacketLimitExceeded").detail("FromPeer", peerAddress.toString()).detail("Length", (int)packetLen);
throw platform_error();
}
return std::max<uint32_t>(FLOW_KNOBS->MIN_PACKET_BUFFER_BYTES,
packetLen + sizeof(uint32_t) * (peerAddress.isTLS() ? 2 : 3));
}
ACTOR static Future<Void> connectionReader(
TransportData* transport,
Reference<IConnection> conn,
@ -741,12 +757,12 @@ ACTOR static Future<Void> connectionReader(
Promise<Peer*> onConnected)
{
// This actor exists whenever there is an open or opening connection, whether incoming or outgoing
// For incoming connections conn is set and peer is initially NULL; for outgoing connections it is the reverse
// For incoming connections conn is set and peer is initially nullptr; for outgoing connections it is the reverse
state Arena arena;
state uint8_t* unprocessed_begin = NULL;
state uint8_t* unprocessed_end = NULL;
state uint8_t* buffer_end = NULL;
state uint8_t* unprocessed_begin = nullptr;
state uint8_t* unprocessed_end = nullptr;
state uint8_t* buffer_end = nullptr;
state bool expectConnectPacket = true;
state bool compatible = false;
state bool incompatiblePeerCounted = false;
@ -761,12 +777,12 @@ ACTOR static Future<Void> connectionReader(
try {
loop {
loop {
int readAllBytes = buffer_end - unprocessed_end;
if (readAllBytes < 4096) {
state int readAllBytes = buffer_end - unprocessed_end;
if (readAllBytes < FLOW_KNOBS->MIN_PACKET_BUFFER_FREE_BYTES) {
Arena newArena;
int unproc_len = unprocessed_end - unprocessed_begin;
int len = std::max( 65536, unproc_len*2 );
uint8_t* newBuffer = new (newArena) uint8_t[ len ];
const int unproc_len = unprocessed_end - unprocessed_begin;
const int len = getNewBufferSize(unprocessed_begin, unprocessed_end, peerAddress);
uint8_t* const newBuffer = new (newArena) uint8_t[ len ];
memcpy( newBuffer, unprocessed_begin, unproc_len );
arena = newArena;
unprocessed_begin = newBuffer;
@ -775,13 +791,21 @@ ACTOR static Future<Void> connectionReader(
readAllBytes = buffer_end - unprocessed_end;
}
int readBytes = conn->read( unprocessed_end, buffer_end );
if(peer) {
peer->bytesReceived += readBytes;
}
if (!readBytes) break;
state bool readWillBlock = readBytes != readAllBytes;
state int totalReadBytes = 0;
while (true) {
const int len = std::min<int>(buffer_end - unprocessed_end, FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
if (len == 0) break;
state int readBytes = conn->read(unprocessed_end, unprocessed_end + len);
if (readBytes == 0) break;
wait(yield(TaskPriority::ReadSocket));
totalReadBytes += readBytes;
unprocessed_end += readBytes;
}
if (peer) {
peer->bytesReceived += totalReadBytes;
}
if (totalReadBytes == 0) break;
state bool readWillBlock = totalReadBytes != readAllBytes;
if (expectConnectPacket && unprocessed_end-unprocessed_begin>=CONNECT_PACKET_V0_SIZE) {
// At the beginning of a connection, we expect to receive a packet containing the protocol version and the listening port of the remote process
@ -884,11 +908,11 @@ ACTOR static Future<Void> connectionReader(
if (readWillBlock)
break;
wait(yield(TaskReadSocket));
wait(yield(TaskPriority::ReadSocket));
}
wait( conn->onReadable() );
wait(delay(0, TaskReadSocket)); // We don't want to call conn->read directly from the reactor - we could get stuck in the reactor reading 1 packet at a time
wait(delay(0, TaskPriority::ReadSocket)); // We don't want to call conn->read directly from the reactor - we could get stuck in the reactor reading 1 packet at a time
}
}
catch (Error& e) {
@ -932,7 +956,7 @@ ACTOR static Future<Void> listen( TransportData* self, NetworkAddress listenAddr
.detail("FromAddress", conn->getPeerAddress())
.detail("ListenAddress", listenAddr.toString());
incoming.add( connectionIncoming(self, conn) );
wait(delay(0) || delay(FLOW_KNOBS->CONNECTION_ACCEPT_DELAY, TaskWriteSocket));
wait(delay(0) || delay(FLOW_KNOBS->CONNECTION_ACCEPT_DELAY, TaskPriority::WriteSocket));
}
} catch (Error& e) {
TraceEvent(SevError, "ListenError").error(e);
@ -946,7 +970,7 @@ Peer* TransportData::getPeer( NetworkAddress const& address, bool openConnection
return peer->second;
}
if(!openConnection) {
return NULL;
return nullptr;
}
Peer* newPeer = new Peer(this, address);
peers[address] = newPeer;
@ -1054,7 +1078,7 @@ void FlowTransport::removePeerReference( const Endpoint& endpoint, NetworkMessag
}
}
void FlowTransport::addEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, uint32_t taskID ) {
void FlowTransport::addEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, TaskPriority taskID ) {
endpoint.token = deterministicRandom()->randomUniqueID();
if (receiver->isStream()) {
endpoint.addresses = self->localAddresses;
@ -1070,7 +1094,7 @@ void FlowTransport::removeEndpoint( const Endpoint& endpoint, NetworkMessageRece
self->endpoints.remove(endpoint.token, receiver);
}
void FlowTransport::addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, uint32_t taskID ) {
void FlowTransport::addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver* receiver, TaskPriority taskID ) {
endpoint.addresses = self->localAddresses;
ASSERT( ((endpoint.token.first() & TOKEN_STREAM_FLAG)!=0) == receiver->isStream() );
Endpoint::Token otoken = endpoint.token;
@ -1100,13 +1124,9 @@ static PacketID sendPacket( TransportData* self, ISerializeSource const& what, c
ASSERT(copy.size() > 0);
deliver(self, destination, ArenaReader(copy.arena(), copy, AssumeVersion(currentProtocolVersion)), false);
return (PacketID)NULL;
return (PacketID)nullptr;
} else {
bool checksumEnabled = true;
if (destination.getPrimaryAddress().isTLS()) {
checksumEnabled = false;
}
const bool checksumEnabled = !destination.getPrimaryAddress().isTLS();
++self->countPacketsGenerated;
Peer* peer = self->getPeer(destination.getPrimaryAddress(), openConnection);
@ -1114,7 +1134,7 @@ static PacketID sendPacket( TransportData* self, ISerializeSource const& what, c
// If there isn't an open connection, a public address, or the peer isn't compatible, we can't send
if (!peer || (peer->outgoingConnectionIdle && !destination.getPrimaryAddress().isPublic()) || (peer->incompatibleProtocolVersionNewer && destination.token != WLTOKEN_PING_PACKET)) {
TEST(true); // Can't send to private address without a compatible open connection
return (PacketID)NULL;
return (PacketID)nullptr;
}
bool firstUnsent = peer->unsent.empty();

View File

@ -23,6 +23,7 @@
#pragma once
#include <algorithm>
#include "flow/genericactors.actor.h"
#include "flow/network.h"
#include "flow/FileIdentifier.h"
@ -109,7 +110,7 @@ public:
FlowTransport(uint64_t transportId);
~FlowTransport();
static void createInstance(bool isClient, uint64_t transportId = 0);
static void createInstance(bool isClient, uint64_t transportId);
// Creates a new FlowTransport and makes FlowTransport::transport() return it. This uses g_network->global() variables,
// so it will be private to a simulation.
@ -137,13 +138,13 @@ public:
void removePeerReference( const Endpoint&, NetworkMessageReceiver* );
// Signal that a peer connection is no longer being used
void addEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, uint32_t taskID );
void addEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, TaskPriority taskID );
// Sets endpoint to be a new local endpoint which delivers messages to the given receiver
void removeEndpoint( const Endpoint&, NetworkMessageReceiver* );
// The given local endpoint no longer delivers messages to the given receiver or uses resources
void addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, uint32_t taskID );
void addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageReceiver*, TaskPriority taskID );
// Sets endpoint to a new local endpoint (without changing its token) which delivers messages to the given receiver
// Implementations may have limitations on when this function is called and what endpoint.token may be!

View File

@ -179,7 +179,7 @@ Future< REPLY_TYPE(Request) > loadBalance(
Reference<MultiInterface<Multi>> alternatives,
RequestStream<Request> Interface::* channel,
Request request = Request(),
int taskID = TaskDefaultPromiseEndpoint,
TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint,
bool atMostOnce = false, // if true, throws request_maybe_delivered() instead of retrying automatically
QueueModel* model = NULL)
{

View File

@ -70,6 +70,13 @@ struct IReplicationPolicy : public ReferenceCounted<IReplicationPolicy> {
return keys;
}
virtual void attributeKeys(std::set<std::string>*) const = 0;
// For flatbuffers, IReplicationPolicy is just encoded as a string using
// |serializeReplicationPolicy|. |writer| is a member of IReplicationPolicy
// so that this string outlives all calls to
// dynamic_size_traits<Reference<IReplicationPolicy>>::save
mutable BinaryWriter writer{ IncludeVersion() };
mutable bool alreadyWritten = false;
};
template <class Archive>
@ -276,12 +283,28 @@ void serializeReplicationPolicy(Ar& ar, Reference<IReplicationPolicy>& policy) {
template <>
struct dynamic_size_traits<Reference<IReplicationPolicy>> : std::true_type {
static WriteRawMemory save(const Reference<IReplicationPolicy>& value) {
BinaryWriter writer(IncludeVersion());
static Block save(const Reference<IReplicationPolicy>& value) {
if (value.getPtr() == nullptr) {
static BinaryWriter writer{ IncludeVersion() };
writer = BinaryWriter{ IncludeVersion() };
serializeReplicationPolicy(writer, const_cast<Reference<IReplicationPolicy>&>(value));
std::unique_ptr<uint8_t[]> memory(new uint8_t[writer.getLength()]);
memcpy(memory.get(), writer.getData(), writer.getLength());
return std::make_pair<OwnershipErasedPtr<const uint8_t>, size_t>(ownedPtr(const_cast<const uint8_t*>(memory.release())), writer.getLength());
return unownedPtr(const_cast<const uint8_t*>(reinterpret_cast<uint8_t*>(writer.getData())),
writer.getLength());
}
if (!value->alreadyWritten) {
serializeReplicationPolicy(value->writer, const_cast<Reference<IReplicationPolicy>&>(value));
value->alreadyWritten = true;
}
return unownedPtr(const_cast<const uint8_t*>(reinterpret_cast<uint8_t*>(value->writer.getData())),
value->writer.getLength());
}
static void serialization_done(const Reference<IReplicationPolicy>& value) {
if (value.getPtr() == nullptr) {
return;
}
value->alreadyWritten = false;
value->writer = BinaryWriter{ IncludeVersion() };
}
// Context is an arbitrary type that is plumbed by reference throughout the
@ -294,5 +317,6 @@ struct dynamic_size_traits<Reference<IReplicationPolicy>> : std::true_type {
}
};
static_assert(detail::has_serialization_done<dynamic_size_traits<Reference<IReplicationPolicy>>>::value);
#endif

View File

@ -47,7 +47,7 @@ bool firstInBatch(CommitTransactionRequest x) {
}
ACTOR template <class X>
Future<Void> batcher(PromiseStream<std::pair<std::vector<X>, int> > out, FutureStream<X> in, double avgMinDelay, double* avgMaxDelay, double emptyBatchTimeout, int maxCount, int desiredBytes, int maxBytes, Optional<PromiseStream<Void>> batchStartedStream, int64_t *commitBatchesMemBytesCount, int64_t commitBatchesMemBytesLimit, int taskID = TaskDefaultDelay, Counter* counter = 0)
Future<Void> batcher(PromiseStream<std::pair<std::vector<X>, int> > out, FutureStream<X> in, double avgMinDelay, double* avgMaxDelay, double emptyBatchTimeout, int maxCount, int desiredBytes, int maxBytes, Optional<PromiseStream<Void>> batchStartedStream, int64_t *commitBatchesMemBytesCount, int64_t commitBatchesMemBytesLimit, TaskPriority taskID = TaskPriority::DefaultDelay, Counter* counter = 0)
{
wait( delayJittered(*avgMaxDelay, taskID) ); // smooth out
// This is set up to deliver even zero-size batches if emptyBatchTimeout elapses, because that's what master proxy wants. The source control history

View File

@ -48,7 +48,7 @@ struct FlowReceiver : private NetworkMessageReceiver {
// If already a remote endpoint, returns that. Otherwise makes this
// a local endpoint and returns that.
const Endpoint& getEndpoint(int taskID) {
const Endpoint& getEndpoint(TaskPriority taskID) {
if (!endpoint.isValid()) {
m_isLocalEndpoint = true;
FlowTransport::transport().addEndpoint(endpoint, this, taskID);
@ -56,7 +56,7 @@ struct FlowReceiver : private NetworkMessageReceiver {
return endpoint;
}
void makeWellKnownEndpoint(Endpoint::Token token, int taskID) {
void makeWellKnownEndpoint(Endpoint::Token token, TaskPriority taskID) {
ASSERT(!endpoint.isValid());
m_isLocalEndpoint = true;
endpoint.token = token;
@ -128,7 +128,7 @@ public:
~ReplyPromise() { if (sav) sav->delPromiseRef(); }
ReplyPromise(const Endpoint& endpoint) : sav(new NetSAV<T>(0, 1, endpoint)) {}
const Endpoint& getEndpoint(int taskID = TaskDefaultPromiseEndpoint) const { return sav->getEndpoint(taskID); }
const Endpoint& getEndpoint(TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint) const { return sav->getEndpoint(taskID); }
void operator=(const ReplyPromise& rhs) {
if (rhs.sav) rhs.sav->addPromiseRef();
@ -204,19 +204,19 @@ template <class Reply>
void resetReply(ReplyPromise<Reply> & p) { p.reset(); }
template <class Request>
void resetReply(Request& r, int taskID) { r.reply.reset(); r.reply.getEndpoint(taskID); }
void resetReply(Request& r, TaskPriority taskID) { r.reply.reset(); r.reply.getEndpoint(taskID); }
template <class Reply>
void resetReply(ReplyPromise<Reply> & p, int taskID) { p.reset(); p.getEndpoint(taskID); }
void resetReply(ReplyPromise<Reply> & p, TaskPriority taskID) { p.reset(); p.getEndpoint(taskID); }
template <class Request>
void setReplyPriority(Request& r, int taskID) { r.reply.getEndpoint(taskID); }
void setReplyPriority(Request& r, TaskPriority taskID) { r.reply.getEndpoint(taskID); }
template <class Reply>
void setReplyPriority(ReplyPromise<Reply> & p, int taskID) { p.getEndpoint(taskID); }
void setReplyPriority(ReplyPromise<Reply> & p, TaskPriority taskID) { p.getEndpoint(taskID); }
template <class Reply>
void setReplyPriority(const ReplyPromise<Reply> & p, int taskID) { p.getEndpoint(taskID); }
void setReplyPriority(const ReplyPromise<Reply> & p, TaskPriority taskID) { p.getEndpoint(taskID); }
@ -281,7 +281,7 @@ public:
return reportEndpointFailure(getReplyPromise(value).getFuture(), getEndpoint());
}
template <class X>
Future<REPLY_TYPE(X)> getReply(const X& value, int taskID) const {
Future<REPLY_TYPE(X)> getReply(const X& value, TaskPriority taskID) const {
setReplyPriority(value, taskID);
return getReply(value);
}
@ -290,7 +290,7 @@ public:
return getReply(ReplyPromise<X>());
}
template <class X>
Future<X> getReplyWithTaskID(int taskID) const {
Future<X> getReplyWithTaskID(TaskPriority taskID) const {
ReplyPromise<X> reply;
reply.getEndpoint(taskID);
return getReply(reply);
@ -302,7 +302,7 @@ public:
// If cancelled or returns failure, request was or will be delivered zero or one times.
// The caller must be capable of retrying if this request returns failure
template <class X>
Future<ErrorOr<REPLY_TYPE(X)>> tryGetReply(const X& value, int taskID) const {
Future<ErrorOr<REPLY_TYPE(X)>> tryGetReply(const X& value, TaskPriority taskID) const {
setReplyPriority(value, taskID);
if (queue->isRemoteEndpoint()) {
Future<Void> disc = makeDependent<T>(IFailureMonitor::failureMonitor()).onDisconnectOrFailure(getEndpoint(taskID));
@ -344,7 +344,7 @@ public:
// If it returns failure, the failure detector considers the endpoint failed permanently or for the given amount of time
// See IFailureMonitor::onFailedFor() for an explanation of the duration and slope parameters.
template <class X>
Future<ErrorOr<REPLY_TYPE(X)>> getReplyUnlessFailedFor(const X& value, double sustainedFailureDuration, double sustainedFailureSlope, int taskID) const {
Future<ErrorOr<REPLY_TYPE(X)>> getReplyUnlessFailedFor(const X& value, double sustainedFailureDuration, double sustainedFailureSlope, TaskPriority taskID) const {
// If it is local endpoint, no need for failure monitoring
return waitValueOrSignal(getReply(value, taskID),
makeDependent<T>(IFailureMonitor::failureMonitor()).onFailedFor(getEndpoint(taskID), sustainedFailureDuration, sustainedFailureSlope),
@ -388,8 +388,8 @@ public:
//queue = (NetNotifiedQueue<T>*)0xdeadbeef;
}
Endpoint getEndpoint(int taskID = TaskDefaultEndpoint) const { return queue->getEndpoint(taskID); }
void makeWellKnownEndpoint(Endpoint::Token token, int taskID) {
Endpoint getEndpoint(TaskPriority taskID = TaskPriority::DefaultEndpoint) const { return queue->getEndpoint(taskID); }
void makeWellKnownEndpoint(Endpoint::Token token, TaskPriority taskID) {
queue->makeWellKnownEndpoint(token, taskID);
}
@ -425,7 +425,10 @@ struct serializable_traits<RequestStream<T>> : std::true_type {
} else {
const auto& ep = stream.getEndpoint();
serializer(ar, ep);
UNSTOPPABLE_ASSERT(ep.getPrimaryAddress().isValid()); // No serializing PromiseStreams on a client with no public address
if constexpr (Archiver::isSerializing) { // Don't assert this when collecting vtable for flatbuffers
UNSTOPPABLE_ASSERT(ep.getPrimaryAddress()
.isValid()); // No serializing PromiseStreams on a client with no public address
}
}
}
};

View File

@ -50,7 +50,7 @@ Future<REPLY_TYPE(Req)> retryBrokenPromise( RequestStream<Req> to, Req request )
}
ACTOR template <class Req>
Future<REPLY_TYPE(Req)> retryBrokenPromise( RequestStream<Req> to, Req request, int taskID ) {
Future<REPLY_TYPE(Req)> retryBrokenPromise( RequestStream<Req> to, Req request, TaskPriority taskID ) {
// Like to.getReply(request), except that a broken_promise exception results in retrying request immediately.
// Suitable for use with well known endpoints, which are likely to return to existence after the other process restarts.
// Not normally useful for ordinary endpoints, which conventionally are permanently destroyed after replying with broken_promise.

View File

@ -423,7 +423,7 @@ public:
ACTOR static Future<Reference<IAsyncFile>> open( std::string filename, int flags, int mode,
Reference<DiskParameters> diskParameters = Reference<DiskParameters>(new DiskParameters(25000, 150000000)), bool delayOnWrite = true ) {
state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
state int currentTaskID = g_network->getCurrentTask();
state TaskPriority currentTaskID = g_network->getCurrentTask();
if(++openCount >= 3000) {
TraceEvent(SevError, "TooManyFiles");
@ -742,11 +742,11 @@ public:
// Everything actually network related is delegated to the Sim2Net class; Sim2 is only concerned with simulating machines and time
virtual double now() { return time; }
virtual Future<class Void> delay( double seconds, int taskID ) {
ASSERT(taskID >= TaskMinPriority && taskID <= TaskMaxPriority);
virtual Future<class Void> delay( double seconds, TaskPriority taskID ) {
ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max);
return delay( seconds, taskID, currentProcess );
}
Future<class Void> delay( double seconds, int taskID, ProcessInfo* machine ) {
Future<class Void> delay( double seconds, TaskPriority taskID, ProcessInfo* machine ) {
ASSERT( seconds >= -0.0001 );
seconds = std::max(0.0, seconds);
Future<Void> f;
@ -761,13 +761,13 @@ public:
return f;
}
ACTOR static Future<Void> checkShutdown(Sim2 *self, int taskID) {
ACTOR static Future<Void> checkShutdown(Sim2 *self, TaskPriority taskID) {
wait(success(self->getCurrentProcess()->shutdownSignal.getFuture()));
self->setCurrentTask(taskID);
return Void();
}
virtual Future<class Void> yield( int taskID ) {
if (taskID == TaskDefaultYield) taskID = currentTaskID;
virtual Future<class Void> yield( TaskPriority taskID ) {
if (taskID == TaskPriority::DefaultYield) taskID = currentTaskID;
if (check_yield(taskID)) {
// We want to check that yielders can handle actual time elapsing (it sometimes will outside simulation), but
// don't want to prevent instantaneous shutdown of "rebooted" machines.
@ -776,7 +776,7 @@ public:
setCurrentTask(taskID);
return Void();
}
virtual bool check_yield( int taskID ) {
virtual bool check_yield( TaskPriority taskID ) {
if (yielded) return true;
if (--yield_limit <= 0) {
yield_limit = deterministicRandom()->randomInt(1, 150); // If yield returns false *too* many times in a row, there could be a stack overflow, since we can't deterministically check stack size as the real network does
@ -784,10 +784,10 @@ public:
}
return yielded = BUGGIFY_WITH_PROB(0.01);
}
virtual int getCurrentTask() {
virtual TaskPriority getCurrentTask() {
return currentTaskID;
}
virtual void setCurrentTask(int taskID ) {
virtual void setCurrentTask(TaskPriority taskID ) {
currentTaskID = taskID;
}
// Sets the taskID/priority of the current task, without yielding
@ -924,7 +924,7 @@ public:
}
if ( mustBeDurable || deterministicRandom()->random01() < 0.5 ) {
state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
state int currentTaskID = g_network->getCurrentTask();
state TaskPriority currentTaskID = g_network->getCurrentTask();
wait( g_simulator.onMachine( currentProcess ) );
try {
wait( ::delay(0.05 * deterministicRandom()->random01()) );
@ -950,7 +950,7 @@ public:
ACTOR static Future<Void> runLoop(Sim2 *self) {
state ISimulator::ProcessInfo *callingMachine = self->currentProcess;
while ( !self->isStopped ) {
wait( self->net2->yield(TaskDefaultYield) );
wait( self->net2->yield(TaskPriority::DefaultYield) );
self->mutex.enter();
if( self->tasks.size() == 0 ) {
@ -1580,23 +1580,23 @@ public:
machines.erase(machineId);
}
Sim2(bool objSerializer) : time(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(-1) {
Sim2(bool objSerializer) : time(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(TaskPriority::Zero) {
// Not letting currentProcess be NULL eliminates some annoying special cases
currentProcess = new ProcessInfo("NoMachine", LocalityData(Optional<Standalone<StringRef>>(), StringRef(), StringRef(), StringRef()), ProcessClass(), {NetworkAddress()}, this, "", "");
g_network = net2 = newNet2(false, true, objSerializer);
Net2FileSystem::newFileSystem();
check_yield(0);
check_yield(TaskPriority::Zero);
}
// Implementation
struct Task {
int taskID;
TaskPriority taskID;
double time;
uint64_t stable;
ProcessInfo* machine;
Promise<Void> action;
Task( double time, int taskID, uint64_t stable, ProcessInfo* machine, Promise<Void>&& action ) : time(time), taskID(taskID), stable(stable), machine(machine), action(std::move(action)) {}
Task( double time, int taskID, uint64_t stable, ProcessInfo* machine, Future<Void>& future ) : time(time), taskID(taskID), stable(stable), machine(machine) { future = action.getFuture(); }
Task( double time, TaskPriority taskID, uint64_t stable, ProcessInfo* machine, Promise<Void>&& action ) : time(time), taskID(taskID), stable(stable), machine(machine), action(std::move(action)) {}
Task( double time, TaskPriority taskID, uint64_t stable, ProcessInfo* machine, Future<Void>& future ) : time(time), taskID(taskID), stable(stable), machine(machine) { future = action.getFuture(); }
Task(Task&& rhs) BOOST_NOEXCEPT : time(rhs.time), taskID(rhs.taskID), stable(rhs.stable), machine(rhs.machine), action(std::move(rhs.action)) {}
void operator= ( Task const& rhs ) { taskID = rhs.taskID; time = rhs.time; stable = rhs.stable; machine = rhs.machine; action = rhs.action; }
Task( Task const& rhs ) : taskID(rhs.taskID), time(rhs.time), stable(rhs.stable), machine(rhs.machine), action(rhs.action) {}
@ -1643,20 +1643,23 @@ public:
}
}
virtual void onMainThread( Promise<Void>&& signal, int taskID ) {
virtual void onMainThread( Promise<Void>&& signal, TaskPriority taskID ) {
// This is presumably coming from either a "fake" thread pool thread, i.e. it is actually on this thread
// or a thread created with g_network->startThread
ASSERT(getCurrentProcess());
mutex.enter();
ASSERT(taskID >= TaskMinPriority && taskID <= TaskMaxPriority);
ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max);
tasks.push( Task( time, taskID, taskCount++, getCurrentProcess(), std::move(signal) ) );
mutex.leave();
}
virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, int taskID ) {
bool isOnMainThread() const override {
return net2->isOnMainThread();
}
virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, TaskPriority taskID ) {
return delay( 0, taskID, process );
}
virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, int taskID ) {
virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, TaskPriority taskID ) {
if( process->machine == 0 )
return Void();
return delay( 0, taskID, process->machine->machineProcess );
@ -1665,7 +1668,7 @@ public:
//time is guarded by ISimulator::mutex. It is not necessary to guard reads on the main thread because
//time should only be modified from the main thread.
double time;
int currentTaskID;
TaskPriority currentTaskID;
//taskCount is guarded by ISimulator::mutex
uint64_t taskCount;
@ -1695,9 +1698,9 @@ void startNewSimulator(bool objSerializer) {
}
ACTOR void doReboot( ISimulator::ProcessInfo *p, ISimulator::KillType kt ) {
TraceEvent("RebootingProcessAttempt").detail("ZoneId", p->locality.zoneId()).detail("KillType", kt).detail("Process", p->toString()).detail("StartingClass", p->startingClass.toString()).detail("Failed", p->failed).detail("Excluded", p->excluded).detail("Cleared", p->cleared).detail("Rebooting", p->rebooting).detail("TaskDefaultDelay", TaskDefaultDelay);
TraceEvent("RebootingProcessAttempt").detail("ZoneId", p->locality.zoneId()).detail("KillType", kt).detail("Process", p->toString()).detail("StartingClass", p->startingClass.toString()).detail("Failed", p->failed).detail("Excluded", p->excluded).detail("Cleared", p->cleared).detail("Rebooting", p->rebooting).detail("TaskPriorityDefaultDelay", TaskPriority::DefaultDelay);
wait( g_sim2.delay( 0, TaskDefaultDelay, p ) ); // Switch to the machine in question
wait( g_sim2.delay( 0, TaskPriority::DefaultDelay, p ) ); // Switch to the machine in question
try {
ASSERT( kt == ISimulator::RebootProcess || kt == ISimulator::Reboot || kt == ISimulator::RebootAndDelete || kt == ISimulator::RebootProcessAndDelete );

View File

@ -137,8 +137,8 @@ public:
ProcessInfo* getProcess( Endpoint const& endpoint ) { return getProcessByAddress(endpoint.getPrimaryAddress()); }
ProcessInfo* getCurrentProcess() { return currentProcess; }
virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, int taskID = -1 ) = 0;
virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, int taskID = -1 ) = 0;
virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0;
virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0;
virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, uint16_t listenPerProcess,
LocalityData locality, ProcessClass startingClass, const char* dataFolder,

View File

@ -107,6 +107,7 @@ set(FDBSERVER_SRCS
workloads/ChangeConfig.actor.cpp
workloads/ClientTransactionProfileCorrectness.actor.cpp
workloads/TriggerRecovery.actor.cpp
workloads/SuspendProcesses.actor.cpp
workloads/CommitBugCheck.actor.cpp
workloads/ConfigureDatabase.actor.cpp
workloads/ConflictRange.actor.cpp
@ -118,6 +119,7 @@ set(FDBSERVER_SRCS
workloads/DiskDurability.actor.cpp
workloads/DiskDurabilityTest.actor.cpp
workloads/DummyWorkload.actor.cpp
workloads/ExternalWorkload.actor.cpp
workloads/FastTriggeredWatches.actor.cpp
workloads/FileSystem.actor.cpp
workloads/Fuzz.cpp
@ -191,26 +193,15 @@ if(NOT WIN32)
target_compile_options(fdb_sqlite BEFORE PRIVATE -w) # disable warnings for third party
endif()
set(java_workload_docstring "Build the Java workloads (makes fdbserver link against JNI)")
set(WITH_JAVA_WORKLOAD OFF CACHE BOOL "${java_workload_docstring}")
if(WITH_JAVA_WORKLOAD)
list(APPEND FDBSERVER_SRCS workloads/JavaWorkload.actor.cpp)
endif()
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/workloads)
add_flow_target(EXECUTABLE NAME fdbserver SRCS ${FDBSERVER_SRCS})
target_include_directories(fdbserver PRIVATE
${CMAKE_SOURCE_DIR}/bindings/c
${CMAKE_BINARY_DIR}/bindings/c
${CMAKE_CURRENT_BINARY_DIR}/workloads
${CMAKE_CURRENT_SOURCE_DIR}/workloads)
target_link_libraries(fdbserver PRIVATE fdbclient fdb_sqlite)
if(WITH_JAVA_WORKLOAD)
if(NOT JNI_FOUND)
message(SEND_ERROR "Trying to build Java workload but couldn't find JNI")
endif()
target_include_directories(fdbserver PRIVATE "${JNI_INCLUDE_DIRS}")
target_link_libraries(fdbserver PRIVATE "${JNI_LIBRARIES}")
endif()
if (GPERFTOOLS_FOUND)
add_compile_definitions(USE_GPERFTOOLS)
target_link_libraries(fdbserver PRIVATE gperftools)

View File

@ -107,7 +107,7 @@ public:
DBInfo() : masterRegistrationCount(0), recoveryStalled(false), forceRecovery(false), unfinishedRecoveries(0), logGenerations(0),
clientInfo( new AsyncVar<ClientDBInfo>( ClientDBInfo() ) ),
serverInfo( new AsyncVar<ServerDBInfo>( ServerDBInfo() ) ),
db( DatabaseContext::create( clientInfo, Future<Void>(), LocalityData(), true, TaskDefaultEndpoint, true ) ) // SOMEDAY: Locality!
db( DatabaseContext::create( clientInfo, Future<Void>(), LocalityData(), true, TaskPriority::DefaultEndpoint, true ) ) // SOMEDAY: Locality!
{
}
@ -1171,7 +1171,7 @@ public:
serverInfo.clusterInterface = ccInterface;
serverInfo.myLocality = locality;
db.serverInfo->set( serverInfo );
cx = openDBOnServer(db.serverInfo, TaskDefaultEndpoint, true, true);
cx = openDBOnServer(db.serverInfo, TaskPriority::DefaultEndpoint, true, true);
}
~ClusterControllerData() {

View File

@ -63,13 +63,13 @@ struct ClusterControllerFullInterface {
void initEndpoints() {
clientInterface.initEndpoints();
recruitFromConfiguration.getEndpoint( TaskClusterController );
recruitRemoteFromConfiguration.getEndpoint( TaskClusterController );
recruitStorage.getEndpoint( TaskClusterController );
registerWorker.getEndpoint( TaskClusterController );
getWorkers.getEndpoint( TaskClusterController );
registerMaster.getEndpoint( TaskClusterController );
getServerDBInfo.getEndpoint( TaskClusterController );
recruitFromConfiguration.getEndpoint( TaskPriority::ClusterController );
recruitRemoteFromConfiguration.getEndpoint( TaskPriority::ClusterController );
recruitStorage.getEndpoint( TaskPriority::ClusterController );
registerWorker.getEndpoint( TaskPriority::ClusterController );
getWorkers.getEndpoint( TaskPriority::ClusterController );
registerMaster.getEndpoint( TaskPriority::ClusterController );
getServerDBInfo.getEndpoint( TaskPriority::ClusterController );
}
template <class Ar>

View File

@ -20,8 +20,9 @@
#include "fdbserver/CoordinationInterface.h"
#include "fdbserver/IKeyValueStore.h"
#include "flow/ActorCollection.h"
#include "fdbserver/Knobs.h"
#include "fdbserver/WorkerInterface.actor.h"
#include "flow/ActorCollection.h"
#include "flow/UnitTest.h"
#include "flow/IndexedSet.h"
#include "flow/actorcompiler.h" // This must be the last #include.
@ -52,8 +53,8 @@ GenerationRegInterface::GenerationRegInterface( NetworkAddress remote )
GenerationRegInterface::GenerationRegInterface( INetwork* local )
{
read.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_READ, TaskCoordination );
write.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_WRITE, TaskCoordination );
read.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_READ, TaskPriority::Coordination );
write.makeWellKnownEndpoint( WLTOKEN_GENERATIONREG_WRITE, TaskPriority::Coordination );
}
LeaderElectionRegInterface::LeaderElectionRegInterface(NetworkAddress remote)
@ -67,9 +68,9 @@ LeaderElectionRegInterface::LeaderElectionRegInterface(NetworkAddress remote)
LeaderElectionRegInterface::LeaderElectionRegInterface(INetwork* local)
: ClientLeaderRegInterface(local)
{
candidacy.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_CANDIDACY, TaskCoordination );
leaderHeartbeat.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT, TaskCoordination );
forward.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_FORWARD, TaskCoordination );
candidacy.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_CANDIDACY, TaskPriority::Coordination );
leaderHeartbeat.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT, TaskPriority::Coordination );
forward.makeWellKnownEndpoint( WLTOKEN_LEADERELECTIONREG_FORWARD, TaskPriority::Coordination );
}
ServerCoordinators::ServerCoordinators( Reference<ClusterConnectionFile> cf )
@ -159,7 +160,7 @@ ACTOR Future<Void> localGenerationReg( GenerationRegInterface interf, OnDemandSt
}
}
}
};
}
TEST_CASE("/fdbserver/Coordination/localGenerationReg/simple") {
state GenerationRegInterface reg;
@ -360,11 +361,11 @@ struct LeaderRegisterCollection {
return Void();
}
LeaderElectionRegInterface& getInterface(KeyRef key) {
LeaderElectionRegInterface& getInterface(KeyRef key, UID id) {
auto i = registerInterfaces.find( key );
if (i == registerInterfaces.end()) {
Key k = key;
Future<Void> a = wrap(this, k, leaderRegister(registerInterfaces[k], k) );
Future<Void> a = wrap(this, k, leaderRegister(registerInterfaces[k], k), id);
if (a.isError()) throw a.getError();
ASSERT( !a.isReady() );
actors.add( a );
@ -374,11 +375,15 @@ struct LeaderRegisterCollection {
return i->value;
}
ACTOR static Future<Void> wrap( LeaderRegisterCollection* self, Key key, Future<Void> actor ) {
ACTOR static Future<Void> wrap( LeaderRegisterCollection* self, Key key, Future<Void> actor, UID id ) {
state Error e;
try {
// FIXME: Get worker ID here
startRole(Role::COORDINATOR, id, UID());
wait(actor);
endRole(Role::COORDINATOR, id, "Coordinator changed");
} catch (Error& err) {
endRole(Role::COORDINATOR, id, err.what(), err.code() == error_code_actor_cancelled, err);
if (err.code() == error_code_actor_cancelled)
throw;
e = err;
@ -392,7 +397,7 @@ struct LeaderRegisterCollection {
// leaderServer multiplexes multiple leaderRegisters onto a single LeaderElectionRegInterface,
// creating and destroying them on demand.
ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore *pStore) {
ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore *pStore, UID id) {
state LeaderRegisterCollection regs( pStore );
state ActorCollection forwarders(false);
@ -404,21 +409,21 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
if( forward.present() )
req.reply.send( forward.get() );
else
regs.getInterface(req.key).getLeader.send( req );
regs.getInterface(req.key, id).getLeader.send( req );
}
when ( CandidacyRequest req = waitNext( interf.candidacy.getFuture() ) ) {
Optional<LeaderInfo> forward = regs.getForward(req.key);
if( forward.present() )
req.reply.send( forward.get() );
else
regs.getInterface(req.key).candidacy.send(req);
regs.getInterface(req.key, id).candidacy.send(req);
}
when ( LeaderHeartbeatRequest req = waitNext( interf.leaderHeartbeat.getFuture() ) ) {
Optional<LeaderInfo> forward = regs.getForward(req.key);
if( forward.present() )
req.reply.send( false );
else
regs.getInterface(req.key).leaderHeartbeat.send(req);
regs.getInterface(req.key, id).leaderHeartbeat.send(req);
}
when ( ForwardRequest req = waitNext( interf.forward.getFuture() ) ) {
Optional<LeaderInfo> forward = regs.getForward(req.key);
@ -426,7 +431,7 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
req.reply.send( Void() );
else {
forwarders.add( LeaderRegisterCollection::setForward( &regs, req.key, ClusterConnectionString(req.conn.toString()) ) );
regs.getInterface(req.key).forward.send(req);
regs.getInterface(req.key, id).forward.send(req);
}
}
when( wait( forwarders.getResult() ) ) { ASSERT(false); throw internal_error(); }
@ -442,7 +447,7 @@ ACTOR Future<Void> coordinationServer(std::string dataFolder) {
TraceEvent("CoordinationServer", myID).detail("MyInterfaceAddr", myInterface.read.getEndpoint().getPrimaryAddress()).detail("Folder", dataFolder);
try {
wait( localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store) || store.getError() );
wait( localGenerationReg(myInterface, &store) || leaderServer(myLeaderInterface, &store, myID) || store.getError() );
throw internal_error();
} catch (Error& e) {
TraceEvent("CoordinationServerError", myID).error(e, true);

View File

@ -263,7 +263,7 @@ typedef WorkPool<Coroutine, ThreadUnsafeSpinLock, true> CoroPool;
ACTOR void coroSwitcher( Future<Void> what, int taskID, Coro* coro ) {
ACTOR void coroSwitcher( Future<Void> what, TaskPriority taskID, Coro* coro ) {
try {
// state double t = now();
wait(what);

View File

@ -88,7 +88,7 @@ struct TCMachineInfo : public ReferenceCounted<TCMachineInfo> {
ACTOR Future<Void> updateServerMetrics( TCServerInfo *server ) {
state StorageServerInterface ssi = server->lastKnownInterface;
state Future<ErrorOr<GetPhysicalMetricsReply>> metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskDataDistributionLaunch );
state Future<ErrorOr<GetPhysicalMetricsReply>> metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskPriority::DataDistributionLaunch );
state Future<Void> resetRequest = Never();
state Future<std::pair<StorageServerInterface, ProcessClass>> interfaceChanged( server->onInterfaceChanged );
state Future<Void> serverRemoved( server->onRemoved );
@ -104,7 +104,7 @@ ACTOR Future<Void> updateServerMetrics( TCServerInfo *server ) {
return Void();
}
metricsRequest = Never();
resetRequest = delay( SERVER_KNOBS->METRIC_DELAY, TaskDataDistributionLaunch );
resetRequest = delay( SERVER_KNOBS->METRIC_DELAY, TaskPriority::DataDistributionLaunch );
}
when( std::pair<StorageServerInterface,ProcessClass> _ssi = wait( interfaceChanged ) ) {
ssi = _ssi.first;
@ -120,7 +120,7 @@ ACTOR Future<Void> updateServerMetrics( TCServerInfo *server ) {
}
else {
resetRequest = Never();
metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskDataDistributionLaunch );
metricsRequest = ssi.getPhysicalMetrics.tryGetReply( GetPhysicalMetricsRequest(), TaskPriority::DataDistributionLaunch );
}
}
}
@ -635,9 +635,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams(true), teamBuilder(Void()),
badTeamRemover(Void()), redundantTeamRemover(Void()), configuration(configuration),
readyToStart(readyToStart), clearHealthyZoneFuture(Void()),
checkTeamDelay(delay(SERVER_KNOBS->CHECK_TEAM_DELAY, TaskDataDistribution)),
checkTeamDelay(delay(SERVER_KNOBS->CHECK_TEAM_DELAY, TaskPriority::DataDistribution)),
initialFailureReactionDelay(
delayed(readyToStart, SERVER_KNOBS->INITIAL_FAILURE_REACTION_DELAY, TaskDataDistribution)),
delayed(readyToStart, SERVER_KNOBS->INITIAL_FAILURE_REACTION_DELAY, TaskPriority::DataDistribution)),
healthyTeamCount(0), storageServerSet(new LocalityMap<UID>()),
initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)),
optimalTeamCount(0), recruitingStream(0), restartRecruiting(SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY),
@ -671,7 +671,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
ACTOR static Future<Void> logOnCompletion( Future<Void> signal, DDTeamCollection* self ) {
wait(signal);
wait(delay(SERVER_KNOBS->LOG_ON_COMPLETION_DELAY, TaskDataDistribution));
wait(delay(SERVER_KNOBS->LOG_ON_COMPLETION_DELAY, TaskPriority::DataDistribution));
if(!self->primary || self->configuration.usableRegions == 1) {
TraceEvent("DDTrackerStarting", self->distributorId)
@ -1919,7 +1919,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
//Building teams can cause servers to become undesired, which can make teams unhealthy.
//Let all of these changes get worked out before responding to the get team request
wait( delay(0, TaskDataDistributionLaunch) );
wait( delay(0, TaskPriority::DataDistributionLaunch) );
return Void();
}
@ -2232,7 +2232,7 @@ ACTOR Future<Void> waitUntilHealthy(DDTeamCollection* self) {
TraceEvent("WaitUntilHealthyStalled", self->distributorId).detail("Primary", self->primary).detail("ZeroHealthy", self->zeroHealthyTeams->get()).detail("ProcessingUnhealthy", self->processingUnhealthy->get());
wait(self->zeroHealthyTeams->onChange() || self->processingUnhealthy->onChange());
}
wait(delay(SERVER_KNOBS->DD_STALL_CHECK_DELAY, TaskLowPriority)); //After the team trackers wait on the initial failure reaction delay, they yield. We want to make sure every tracker has had the opportunity to send their relocations to the queue.
wait(delay(SERVER_KNOBS->DD_STALL_CHECK_DELAY, TaskPriority::Low)); //After the team trackers wait on the initial failure reaction delay, they yield. We want to make sure every tracker has had the opportunity to send their relocations to the queue.
if(!self->zeroHealthyTeams->get() && !self->processingUnhealthy->get()) {
return Void();
}
@ -2638,7 +2638,7 @@ ACTOR Future<Void> trackExcludedServers( DDTeamCollection* self ) {
if (nchid != lastChangeID)
break;
wait( delay( SERVER_KNOBS->SERVER_LIST_DELAY, TaskDataDistribution ) ); // FIXME: make this tr.watch( excludedServersVersionKey ) instead
wait( delay( SERVER_KNOBS->SERVER_LIST_DELAY, TaskPriority::DataDistribution ) ); // FIXME: make this tr.watch( excludedServersVersionKey ) instead
tr = Transaction(self->cx);
} catch (Error& e) {
wait( tr.onError(e) );
@ -2757,14 +2757,14 @@ ACTOR Future<Void> serverMetricsPolling( TCServerInfo *server) {
state double lastUpdate = now();
loop {
wait( updateServerMetrics( server ) );
wait( delayUntil( lastUpdate + SERVER_KNOBS->STORAGE_METRICS_POLLING_DELAY + SERVER_KNOBS->STORAGE_METRICS_RANDOM_DELAY * deterministicRandom()->random01(), TaskDataDistributionLaunch ) );
wait( delayUntil( lastUpdate + SERVER_KNOBS->STORAGE_METRICS_POLLING_DELAY + SERVER_KNOBS->STORAGE_METRICS_RANDOM_DELAY * deterministicRandom()->random01(), TaskPriority::DataDistributionLaunch ) );
lastUpdate = now();
}
}
//Returns the KeyValueStoreType of server if it is different from self->storeType
ACTOR Future<KeyValueStoreType> keyValueStoreTypeTracker(DDTeamCollection* self, TCServerInfo *server) {
state KeyValueStoreType type = wait(brokenPromiseToNever(server->lastKnownInterface.getKeyValueStoreType.getReplyWithTaskID<KeyValueStoreType>(TaskDataDistribution)));
state KeyValueStoreType type = wait(brokenPromiseToNever(server->lastKnownInterface.getKeyValueStoreType.getReplyWithTaskID<KeyValueStoreType>(TaskPriority::DataDistribution)));
if(type == self->configuration.storageServerStoreType && (self->includedDCs.empty() || std::find(self->includedDCs.begin(), self->includedDCs.end(), server->lastKnownInterface.locality.dcId()) != self->includedDCs.end()) )
wait(Future<Void>(Never()));
@ -2787,7 +2787,7 @@ ACTOR Future<Void> waitForAllDataRemoved( Database cx, UID serverID, Version add
}
// Wait for any change to the serverKeys for this server
wait( delay(SERVER_KNOBS->ALL_DATA_REMOVED_DELAY, TaskDataDistribution) );
wait( delay(SERVER_KNOBS->ALL_DATA_REMOVED_DELAY, TaskPriority::DataDistribution) );
tr.reset();
} catch (Error& e) {
wait( tr.onError(e) );
@ -2830,7 +2830,7 @@ ACTOR Future<Void> storageServerFailureTracker(
ASSERT(!inHealthyZone);
healthChanged = IFailureMonitor::failureMonitor().onStateEqual( interf.waitFailure.getEndpoint(), FailureStatus(false));
} else if(!inHealthyZone) {
healthChanged = waitFailureClientStrict(interf.waitFailure, SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME, TaskDataDistribution);
healthChanged = waitFailureClientStrict(interf.waitFailure, SERVER_KNOBS->DATA_DISTRIBUTION_FAILURE_REACTION_TIME, TaskPriority::DataDistribution);
}
choose {
when ( wait(healthChanged) ) {
@ -3120,7 +3120,7 @@ ACTOR Future<Void> monitorStorageServerRecruitment(DDTeamCollection* self) {
loop {
choose {
when( wait( self->recruitingStream.onChange() ) ) {}
when( wait( self->recruitingStream.get() == 0 ? delay(SERVER_KNOBS->RECRUITMENT_IDLE_DELAY, TaskDataDistribution) : Future<Void>(Never()) ) ) { break; }
when( wait( self->recruitingStream.get() == 0 ? delay(SERVER_KNOBS->RECRUITMENT_IDLE_DELAY, TaskPriority::DataDistribution) : Future<Void>(Never()) ) ) { break; }
}
}
TraceEvent("StorageServerRecruitment", self->distributorId)
@ -3147,12 +3147,12 @@ ACTOR Future<Void> initializeStorage( DDTeamCollection* self, RecruitStorageRepl
self->recruitingIds.insert(interfaceId);
self->recruitingLocalities.insert(candidateWorker.worker.address());
state ErrorOr<InitializeStorageReply> newServer = wait( candidateWorker.worker.storage.tryGetReply( isr, TaskDataDistribution ) );
state ErrorOr<InitializeStorageReply> newServer = wait( candidateWorker.worker.storage.tryGetReply( isr, TaskPriority::DataDistribution ) );
if(newServer.isError()) {
TraceEvent(SevWarn, "DDRecruitmentError").error(newServer.getError());
if( !newServer.isError( error_code_recruitment_failed ) && !newServer.isError( error_code_request_maybe_delivered ) )
throw newServer.getError();
wait( delay(SERVER_KNOBS->STORAGE_RECRUITMENT_DELAY, TaskDataDistribution) );
wait( delay(SERVER_KNOBS->STORAGE_RECRUITMENT_DELAY, TaskPriority::DataDistribution) );
}
self->recruitingIds.erase(interfaceId);
self->recruitingLocalities.erase(candidateWorker.worker.address());
@ -3217,7 +3217,7 @@ ACTOR Future<Void> storageRecruiter( DDTeamCollection* self, Reference<AsyncVar<
if(!fCandidateWorker.isValid() || fCandidateWorker.isReady() || rsr.excludeAddresses != lastRequest.excludeAddresses || rsr.criticalRecruitment != lastRequest.criticalRecruitment) {
lastRequest = rsr;
fCandidateWorker = brokenPromiseToNever( db->get().clusterInterface.recruitStorage.getReply( rsr, TaskDataDistribution ) );
fCandidateWorker = brokenPromiseToNever( db->get().clusterInterface.recruitStorage.getReply( rsr, TaskPriority::DataDistribution ) );
}
choose {
@ -3388,7 +3388,7 @@ ACTOR Future<Void> dataDistributionTeamCollection(
ACTOR Future<Void> waitForDataDistributionEnabled( Database cx ) {
state Transaction tr(cx);
loop {
wait(delay(SERVER_KNOBS->DD_ENABLED_CHECK_DELAY, TaskDataDistribution));
wait(delay(SERVER_KNOBS->DD_ENABLED_CHECK_DELAY, TaskPriority::DataDistribution));
try {
Optional<Value> mode = wait( tr.get( dataDistributionModeKey ) );
@ -3516,7 +3516,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
state double lastLimited = 0;
self->addActor.send( monitorBatchLimitedTime(self->dbInfo, &lastLimited) );
state Database cx = openDBOnServer(self->dbInfo, TaskDataDistributionLaunch, true, true);
state Database cx = openDBOnServer(self->dbInfo, TaskPriority::DataDistributionLaunch, true, true);
cx->locationCacheSize = SERVER_KNOBS->DD_LOCATION_CACHE_SIZE;
//cx->setOption( FDBDatabaseOptions::LOCATION_CACHE_SIZE, StringRef((uint8_t*) &SERVER_KNOBS->DD_LOCATION_CACHE_SIZE, 8) );
@ -3646,7 +3646,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
}
output.send( RelocateShard( keys, unhealthy ? PRIORITY_TEAM_UNHEALTHY : PRIORITY_RECOVER_MOVE ) );
}
wait( yield(TaskDataDistribution) );
wait( yield(TaskPriority::DataDistribution) );
}
vector<TeamCollectionInterface> tcis;

View File

@ -512,9 +512,9 @@ struct DDQueueData {
// FIXME: is the merge case needed
if( input.priority == PRIORITY_MERGE_SHARD ) {
wait( delay( 0.5, TaskDataDistribution - 2 ) );
wait( delay( 0.5, decrementPriority(decrementPriority(TaskPriority::DataDistribution )) ) );
} else {
wait( delay( 0.0001, TaskDataDistributionLaunch ) );
wait( delay( 0.0001, TaskPriority::DataDistributionLaunch ) );
}
loop {
@ -933,7 +933,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
.detail("Count", stuckCount)
.detail("TeamCollectionId", tciIndex)
.detail("NumOfTeamCollections", self->teamCollections.size());
wait( delay( SERVER_KNOBS->BEST_TEAM_STUCK_DELAY, TaskDataDistributionLaunch ) );
wait( delay( SERVER_KNOBS->BEST_TEAM_STUCK_DELAY, TaskPriority::DataDistributionLaunch ) );
}
state std::vector<UID> destIds;
@ -993,7 +993,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
state Error error = success();
state Promise<Void> dataMovementComplete;
state Future<Void> doMoveKeys = moveKeys(self->cx, rd.keys, destIds, healthyIds, self->lock, dataMovementComplete, &self->startMoveKeysParallelismLock, &self->finishMoveKeysParallelismLock, self->teamCollections.size() > 1, relocateShardInterval.pairID );
state Future<Void> pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskDataDistributionLaunch );
state Future<Void> pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskPriority::DataDistributionLaunch );
try {
loop {
choose {
@ -1016,7 +1016,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
self->dataTransferComplete.send(rd);
}
}
pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskDataDistributionLaunch );
pollHealth = signalledTransferComplete ? Never() : delay( SERVER_KNOBS->HEALTH_POLL_TIME, TaskPriority::DataDistributionLaunch );
}
when( wait( signalledTransferComplete ? Never() : dataMovementComplete.getFuture() ) ) {
self->fetchKeysComplete.insert( rd );
@ -1066,7 +1066,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
} else {
TEST(true); // move to removed server
healthyDestinations.addDataInFlightToTeam( -metrics.bytes );
wait( delay( SERVER_KNOBS->RETRY_RELOCATESHARD_DELAY, TaskDataDistributionLaunch ) );
wait( delay( SERVER_KNOBS->RETRY_RELOCATESHARD_DELAY, TaskPriority::DataDistributionLaunch ) );
}
}
} catch (Error& e) {
@ -1125,7 +1125,7 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
state double checkDelay = SERVER_KNOBS->BG_DD_POLLING_INTERVAL;
state int resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
loop {
wait( delay(checkDelay, TaskDataDistributionLaunch) );
wait( delay(checkDelay, TaskPriority::DataDistributionLaunch) );
if (self->priority_relocations[PRIORITY_REBALANCE_OVERUTILIZED_TEAM] < SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
state Optional<Reference<IDataDistributionTeam>> randomTeam = wait( brokenPromiseToNever( self->teamCollections[teamCollectionIndex].getTeam.getReply( GetTeamRequest( true, false, true ) ) ) );
if( randomTeam.present() ) {
@ -1160,7 +1160,7 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
state double checkDelay = SERVER_KNOBS->BG_DD_POLLING_INTERVAL;
state int resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
loop {
wait( delay(checkDelay, TaskDataDistributionLaunch) );
wait( delay(checkDelay, TaskPriority::DataDistributionLaunch) );
if (self->priority_relocations[PRIORITY_REBALANCE_UNDERUTILIZED_TEAM] < SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
state Optional<Reference<IDataDistributionTeam>> randomTeam = wait( brokenPromiseToNever( self->teamCollections[teamCollectionIndex].getTeam.getReply( GetTeamRequest( true, false, false ) ) ) );
if( randomTeam.present() ) {
@ -1244,7 +1244,7 @@ ACTOR Future<Void> dataDistributionQueue(
bool wasEmpty = serversToLaunchFrom.empty();
self.queueRelocation( rs, serversToLaunchFrom );
if(wasEmpty && !serversToLaunchFrom.empty())
launchQueuedWorkTimeout = delay(0, TaskDataDistributionLaunch);
launchQueuedWorkTimeout = delay(0, TaskPriority::DataDistributionLaunch);
}
when ( wait(launchQueuedWorkTimeout) ) {
self.launchQueuedWork( serversToLaunchFrom );
@ -1258,7 +1258,7 @@ ACTOR Future<Void> dataDistributionQueue(
when ( RelocateData done = waitNext( self.dataTransferComplete.getFuture() ) ) {
complete( done, self.busymap );
if(serversToLaunchFrom.empty() && !done.src.empty())
launchQueuedWorkTimeout = delay(0, TaskDataDistributionLaunch);
launchQueuedWorkTimeout = delay(0, TaskPriority::DataDistributionLaunch);
serversToLaunchFrom.insert(done.src.begin(), done.src.end());
}
when ( RelocateData done = waitNext( self.relocationComplete.getFuture() ) ) {
@ -1266,7 +1266,7 @@ ACTOR Future<Void> dataDistributionQueue(
self.finishRelocation(done.priority);
self.fetchKeysComplete.erase( done );
//self.logRelocation( done, "ShardRelocatorDone" );
actors.add( tag( delay(0, TaskDataDistributionLaunch), done.keys, rangesComplete ) );
actors.add( tag( delay(0, TaskPriority::DataDistributionLaunch), done.keys, rangesComplete ) );
if( g_network->isSimulated() && debug_isCheckRelocationDuration() && now() - done.startTime > 60 ) {
TraceEvent(SevWarnAlways, "RelocationDurationTooLong").detail("Duration", now() - done.startTime);
debug_setCheckRelocationDuration(false);

View File

@ -140,7 +140,7 @@ ACTOR Future<Void> trackShardBytes(
Reference<AsyncVar<Optional<StorageMetrics>>> shardSize,
bool addToSizeEstimate = true)
{
wait( delay( 0, TaskDataDistribution ) );
wait( delay( 0, TaskPriority::DataDistribution ) );
/*TraceEvent("TrackShardBytesStarting")
.detail("TrackerID", trackerID)
@ -260,7 +260,7 @@ ACTOR Future<Void> changeSizes( DataDistributionTracker* self, KeyRangeRef keys,
}
wait( waitForAll( sizes ) );
wait( yield(TaskDataDistribution) );
wait( yield(TaskPriority::DataDistribution) );
int64_t newShardsStartingSize = 0;
for ( int i = 0; i < sizes.size(); i++ )
@ -281,7 +281,7 @@ struct HasBeenTrueFor : NonCopyable {
Future<Void> set() {
if( !trigger.isValid() ) {
cleared = Promise<Void>();
trigger = delayJittered( SERVER_KNOBS->DD_MERGE_COALESCE_DELAY, TaskDataDistribution - 1 ) || cleared.getFuture();
trigger = delayJittered( SERVER_KNOBS->DD_MERGE_COALESCE_DELAY, decrementPriority(TaskPriority::DataDistribution) ) || cleared.getFuture();
}
return trigger;
}
@ -361,7 +361,7 @@ ACTOR Future<Void> shardSplitter(
self->sizeChanges.add( changeSizes( self, keys, shardSize->get().get().bytes ) );
} else {
wait( delay(1.0, TaskDataDistribution) ); //In case the reason the split point was off was due to a discrepancy between storage servers
wait( delay(1.0, TaskPriority::DataDistribution) ); //In case the reason the split point was off was due to a discrepancy between storage servers
}
return Void();
}
@ -529,7 +529,7 @@ ACTOR Future<Void> shardTracker(
wait( yieldedFuture(self->maxShardSize->onChange()) );
// Since maxShardSize will become present for all shards at once, avoid slow tasks with a short delay
wait( delay( 0, TaskDataDistribution ) );
wait( delay( 0, TaskPriority::DataDistribution ) );
/*TraceEvent("ShardTracker", self->distributorId)
.detail("Begin", keys.begin)
@ -546,7 +546,7 @@ ACTOR Future<Void> shardTracker(
// We could have a lot of actors being released from the previous wait at the same time. Immediately calling
// delay(0) mitigates the resulting SlowTask
wait( delay(0, TaskDataDistribution) );
wait( delay(0, TaskPriority::DataDistribution) );
}
} catch (Error& e) {
if (e.code() != error_code_actor_cancelled)
@ -593,12 +593,12 @@ ACTOR Future<Void> trackInitialShards(DataDistributionTracker *self, Reference<I
//This line reduces the priority of shard initialization to prevent interference with failure monitoring.
//SOMEDAY: Figure out what this priority should actually be
wait( delay( 0.0, TaskDataDistribution ) );
wait( delay( 0.0, TaskPriority::DataDistribution ) );
state int s;
for(s=0; s<initData->shards.size()-1; s++) {
restartShardTrackers( self, KeyRangeRef( initData->shards[s].key, initData->shards[s+1].key ) );
wait( yield( TaskDataDistribution ) );
wait( yield( TaskPriority::DataDistribution ) );
}
Future<Void> initialSize = changeSizes( self, KeyRangeRef(allKeys.begin, allKeys.end), 0 );

412
fdbserver/DeltaTree.h Normal file
View File

@ -0,0 +1,412 @@
/*
* MutablePrefixTree.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "flow/flow.h"
#include "flow/Arena.h"
#include "fdbclient/FDBTypes.h"
#include "fdbserver/Knobs.h"
#include "fdbserver/PrefixTree.h"
#include <string.h>
// Delta Tree is a memory mappable binary tree of T objects such that each node's item is
// stored as a Delta which can reproduce the node's T item given the node's greatest
// lesser ancestor and the node's least greater ancestor.
//
// The Delta type is intended to make use of ordered prefix compression and borrow all
// available prefix bytes from the ancestor T which shares the most prefix bytes with
// the item T being encoded.
//
// T requirements
//
// Must be compatible with Standalone<T> and must implement the following additional methods:
//
// // Writes to d a delta which can create *this from base
// // commonPrefix can be passed in if known
// void writeDelta(dT &d, const T &base, int commonPrefix = -1) const;
//
// // Compare *this to t, returns < 0 for less than, 0 for equal, > 0 for greater than
// int compare(const T &rhs) const;
//
// // Get the common prefix bytes between *this and base
// // skip is a hint of how many prefix bytes are already known to be the same
// int getCommonPrefixLen(const T &base, int skip) const;
//
// // Returns the size of the delta object needed to make *this from base
// // TODO: Explain contract required for deltaSize to be used to predict final
// // balanced tree size incrementally while adding sorted items to a build set
// int deltaSize(const T &base) const;
//
// DeltaT requirements
//
// // Returns the size of this dT instance
// int size();
//
// // Returns the T created by applying the delta to prev or next
// T apply(const T &base, Arena &localStorage) const;
//
// // Stores a boolean which DeltaTree will later use to determine the base node for a node's delta
// void setPrefixSource(bool val);
//
// // Retrieves the previously stored boolean
// bool getPrefixSource() const;
//
#pragma pack(push,1)
template <typename T, typename DeltaT = typename T::Delta, typename OffsetT = uint16_t>
struct DeltaTree {
static int MaximumTreeSize() {
return std::numeric_limits<OffsetT>::max();
};
struct Node {
OffsetT leftChildOffset;
OffsetT rightChildOffset;
inline DeltaT & delta() {
return *(DeltaT *)(this + 1);
};
inline const DeltaT & delta() const {
return *(const DeltaT *)(this + 1);
};
Node * rightChild() const {
//printf("Node(%p): leftOffset=%d rightOffset=%d deltaSize=%d\n", this, (int)leftChildOffset, (int)rightChildOffset, (int)delta().size());
return rightChildOffset == 0 ? nullptr : (Node *)((uint8_t *)&delta() + rightChildOffset);
}
Node * leftChild() const {
//printf("Node(%p): leftOffset=%d rightOffset=%d deltaSize=%d\n", this, (int)leftChildOffset, (int)rightChildOffset, (int)delta().size());
return leftChildOffset == 0 ? nullptr : (Node *)((uint8_t *)&delta() + leftChildOffset);
}
int size() const {
return sizeof(Node) + delta().size();
}
};
struct {
OffsetT nodeBytes; // Total size of all Nodes including the root
uint8_t initialDepth; // Levels in the tree as of the last rebuild
};
#pragma pack(pop)
inline Node & root() {
return *(Node *)(this + 1);
}
inline const Node & root() const {
return *(const Node *)(this + 1);
}
int size() const {
return sizeof(DeltaTree) + nodeBytes;
}
public:
// Get count of total overhead bytes (everything but the user-formatted Delta) for a tree given size n
static inline int GetTreeOverhead(int n = 0) {
return sizeof(DeltaTree) + (n * sizeof(Node));
}
struct DecodedNode {
DecodedNode(Node *raw, const T *prev, const T *next, Arena &arena)
: raw(raw), parent(nullptr), left(nullptr), right(nullptr), prev(prev), next(next),
item(raw->delta().apply(raw->delta().getPrefixSource() ? *prev : *next, arena))
{
//printf("DecodedNode1 raw=%p delta=%s\n", raw, raw->delta().toString().c_str());
}
DecodedNode(Node *raw, DecodedNode *parent, bool left, Arena &arena)
: parent(parent), raw(raw), left(nullptr), right(nullptr),
prev(left ? parent->prev : &parent->item),
next(left ? &parent->item : parent->next),
item(raw->delta().apply(raw->delta().getPrefixSource() ? *prev : *next, arena))
{
//printf("DecodedNode2 raw=%p delta=%s\n", raw, raw->delta().toString().c_str());
}
Node *raw;
DecodedNode *parent;
DecodedNode *left;
DecodedNode *right;
const T *prev; // greatest ancestor to the left
const T *next; // least ancestor to the right
T item;
DecodedNode *getRight(Arena &arena) {
if(right == nullptr) {
Node *n = raw->rightChild();
if(n != nullptr) {
right = new (arena) DecodedNode(n, this, false, arena);
}
}
return right;
}
DecodedNode *getLeft(Arena &arena) {
if(left == nullptr) {
Node *n = raw->leftChild();
if(n != nullptr) {
left = new (arena) DecodedNode(n, this, true, arena);
}
}
return left;
}
};
struct Cursor;
// A Reader is used to read a Tree by getting cursors into it.
// Any node decoded by any cursor is placed in cache for use
// by other cursors.
struct Reader : FastAllocated<Reader> {
Reader(const void *treePtr = nullptr, const T *lowerBound = nullptr, const T *upperBound = nullptr)
: tree((DeltaTree *)treePtr), lower(lowerBound), upper(upperBound) {
// TODO: Remove these copies into arena and require users of Reader to keep prev and next alive during its lifetime
lower = new(arena) T(arena, *lower);
upper = new(arena) T(arena, *upper);
root = (tree->nodeBytes == 0) ? nullptr : new (arena) DecodedNode(&tree->root(), lower, upper, arena);
}
const T *lowerBound() const {
return lower;
}
const T *upperBound() const {
return upper;
}
Arena arena;
DeltaTree *tree;
DecodedNode *root;
const T *lower;
const T *upper;
Cursor getCursor() {
return Cursor(this);
}
};
// Cursor provides a way to seek into a PrefixTree and iterate over its contents
// All Cursors from a Reader share the same decoded node 'cache' (tree of DecodedNodes)
struct Cursor {
Cursor() : reader(nullptr), node(nullptr) {
}
Cursor(Reader *r) : reader(r), node(reader->root) {
}
Reader *reader;
DecodedNode *node;
bool valid() const {
return node != nullptr;
}
const T & get() const {
return node->item;
}
const T & getOrUpperBound() const {
return valid() ? node->item : *reader->upperBound();
}
// Moves the cursor to the node with the greatest key less than or equal to s. If successful,
// returns true, otherwise returns false and the cursor will be at the node with the next key
// greater than s.
bool seekLessThanOrEqual(const T &s) {
node = nullptr;
DecodedNode *n = reader->root;
while(n != nullptr) {
int cmp = s.compare(n->item);
if(cmp == 0) {
node = n;
return true;
}
if(cmp < 0) {
n = n->getLeft(reader->arena);
}
else {
// n < s so store it in node as a potential result
node = n;
n = n->getRight(reader->arena);
}
}
return node != nullptr;
}
bool moveFirst() {
DecodedNode *n = reader->root;
node = n;
while(n != nullptr) {
n = n->getLeft(reader->arena);
if(n != nullptr)
node = n;
}
return node != nullptr;
}
bool moveLast() {
DecodedNode *n = reader->root;
node = n;
while(n != nullptr) {
n = n->getRight(reader->arena);
if(n != nullptr)
node = n;
}
return node != nullptr;
}
bool moveNext() {
// Try to go right
DecodedNode *n = node->getRight(reader->arena);
if(n != nullptr) {
// Go left as far as possible
while(n != nullptr) {
node = n;
n = n->getLeft(reader->arena);
}
return true;
}
// Follow parent links until a greater parent is found
while(node->parent != nullptr) {
bool greaterParent = node->parent->left == node;
node = node->parent;
if(greaterParent) {
return true;
}
}
node = nullptr;
return false;
}
bool movePrev() {
// Try to go left
DecodedNode *n = node->getLeft(reader->arena);
if(n != nullptr) {
// Go right as far as possible
while(n != nullptr) {
node = n;
n = n->getRight(reader->arena);
}
return true;
}
// Follow parent links until a lesser parent is found
while(node->parent != nullptr) {
bool lesserParent = node->parent->right == node;
node = node->parent;
if(lesserParent) {
return true;
}
}
node = nullptr;
return false;
}
};
// Returns number of bytes written
int build(const T *begin, const T *end, const T *prev, const T *next) {
//printf("tree size: %d node size: %d\n", sizeof(DeltaTree), sizeof(Node));
int count = end - begin;
initialDepth = (uint8_t)log2(count) + 1;
// The boundary leading to the new page acts as the last time we branched right
if(begin != end) {
nodeBytes = build(root(), begin, end, prev, next);
}
else {
nodeBytes = 0;
}
return size();
}
private:
static OffsetT build(Node &root, const T *begin, const T *end, const T *prev, const T *next) {
//printf("build: %s to %s\n", begin->toString().c_str(), (end - 1)->toString().c_str());
//printf("build: root at %p sizeof(Node) %d delta at %p \n", &root, sizeof(Node), &root.delta());
ASSERT(end != begin);
int count = end - begin;
// Find key to be stored in root
int mid = perfectSubtreeSplitPointCached(count);
const T &item = begin[mid];
// Get the common prefix length between next and prev
// Since mid is between them, we can skip that length to determine the common prefix length
// between mid and prev and between mid and next.
int nextPrevCommon = prev->getCommonPrefixLen(*next, 0);
int commonWithPrev = item.getCommonPrefixLen(*prev, nextPrevCommon);
int commonWithNext = item.getCommonPrefixLen(*next, nextPrevCommon);
bool prefixSourcePrev;
int commonPrefix;
const T *base;
if(commonWithPrev >= commonWithNext) {
prefixSourcePrev = true;
commonPrefix = commonWithPrev;
base = prev;
}
else {
prefixSourcePrev = false;
commonPrefix = commonWithNext;
base = next;
}
int deltaSize = item.writeDelta(root.delta(), *base, commonPrefix);
root.delta().setPrefixSource(prefixSourcePrev);
//printf("Serialized %s to %p\n", item.toString().c_str(), &root.delta());
// Continue writing after the serialized Delta.
uint8_t *wptr = (uint8_t *)&root.delta() + deltaSize;
// Serialize left child
if(count > 1) {
wptr += build(*(Node *)wptr, begin, begin + mid, prev, &item);
root.leftChildOffset = deltaSize;
}
else {
root.leftChildOffset = 0;
}
// Serialize right child
if(count > 2) {
root.rightChildOffset = wptr - (uint8_t *)&root.delta();
wptr += build(*(Node *)wptr, begin + mid + 1, end, &item, next);
}
else {
root.rightChildOffset = 0;
}
return wptr - (uint8_t *)&root;
}
};

View File

@ -31,8 +31,7 @@
#define debug_printf_always(...) { fprintf(stdout, "%s %f ", g_network->getLocalAddress().toString().c_str(), now()), fprintf(stdout, __VA_ARGS__); fflush(stdout); }
template <class... T>
void debug_printf_noop(T&&...) {}
#define debug_printf_noop(...)
#if REDWOOD_DEBUG
#define debug_printf debug_printf_always
@ -42,11 +41,18 @@ void debug_printf_noop(T&&...) {}
#define BEACON fprintf(stderr, "%s: %s line %d \n", __FUNCTION__, __FILE__, __LINE__)
#ifndef VALGRIND
#define VALGRIND_MAKE_MEM_UNDEFINED(x, y)
#define VALGRIND_MAKE_MEM_DEFINED(x, y)
#endif
typedef uint32_t LogicalPageID; // uint64_t?
static const int invalidLogicalPageID = LogicalPageID(-1);
class IPage {
public:
IPage() : userData(nullptr) {}
virtual uint8_t const* begin() const = 0;
virtual uint8_t* mutate() = 0;
@ -57,10 +63,17 @@ public:
return StringRef(begin(), size());
}
virtual ~IPage() {}
virtual ~IPage() {
if(userData != nullptr && userDataDestructor != nullptr) {
userDataDestructor(userData);
}
}
virtual void addref() const = 0;
virtual void delref() const = 0;
mutable void *userData;
mutable void (*userDataDestructor)(void *);
};
class IPagerSnapshot {

View File

@ -44,6 +44,8 @@ public:
virtual void addref() = 0;
virtual void delref() = 0;
virtual std::string toString() const = 0;
};
class IVersionedStore : public IClosable {

View File

@ -23,12 +23,12 @@
#include "flow/UnitTest.h"
#include "flow/actorcompiler.h"
#include "fdbrpc/crc32c.h"
struct SumType {
bool operator==(const SumType &rhs) const { return part1 == rhs.part1 && part2 == rhs.part2; }
uint32_t part1;
uint32_t part2;
std::string toString() { return format("0x%08x%08x", part1, part2); }
bool operator==(const SumType &rhs) const { return crc == rhs.crc; }
uint32_t crc;
std::string toString() { return format("0x%08x", crc); }
};
bool checksum(IAsyncFile *file, uint8_t *page, int pageSize, LogicalPageID logical, PhysicalPageID physical, bool write) {
@ -41,15 +41,17 @@ bool checksum(IAsyncFile *file, uint8_t *page, int pageSize, LogicalPageID logic
pageSize -= IndirectShadowPage::PAGE_OVERHEAD_BYTES;
SumType sum;
SumType *pSumInPage = (SumType *)(page + pageSize);
// Write sum directly to page or to sum variable based on mode
SumType *sumOut = write ? pSumInPage : &sum;
sumOut->part1 = physical;
sumOut->part2 = logical;
hashlittle2(page, pageSize, &sumOut->part1, &sumOut->part2);
sumOut->crc = crc32c_append(logical, page, pageSize);
VALGRIND_MAKE_MEM_DEFINED(sumOut, sizeof(SumType));
debug_printf("checksum %s%s logical %d physical %d size %d checksums page %s calculated %s data at %p %s\n",
write ? "write" : "read", (!write && sum != *pSumInPage) ? " MISMATCH" : "", logical, physical, pageSize, write ? "NA" : pSumInPage->toString().c_str(), sumOut->toString().c_str(), page, "" /*StringRef((uint8_t *)page, pageSize).toHexString().c_str()*/);
write ? "write" : "read",
(!write && sum != *pSumInPage) ? " MISMATCH" : "",
logical, physical, pageSize,
write ? "NA" : pSumInPage->toString().c_str(),
sumOut->toString().c_str(), page, "");
// Verify if not in write mode
if(!write && sum != *pSumInPage) {
@ -75,10 +77,6 @@ inline void checksumWrite(IAsyncFile *file, uint8_t *page, int pageSize, Logical
IndirectShadowPage::IndirectShadowPage() : fastAllocated(true) {
data = (uint8_t*)FastAllocator<4096>::allocate();
#if VALGRIND
// Prevent valgrind errors caused by writing random unneeded bytes to disk.
memset(data, 0, size());
#endif
}
IndirectShadowPage::~IndirectShadowPage() {
@ -276,7 +274,7 @@ ACTOR Future<Void> recover(IndirectShadowPager *pager) {
ACTOR Future<Void> housekeeper(IndirectShadowPager *pager) {
wait(pager->recovery);
wait(Never());
loop {
state LogicalPageID pageID = 0;
for(; pageID < pager->pageTable.size(); ++pageID) {

View File

@ -400,7 +400,7 @@ private:
bool ok = count < 1e6;
if( !ok ) {
TraceEvent(/*ok ? SevInfo : */SevWarnAlways, "KVSMemCommit_queue", id)
TraceEvent(/*ok ? SevInfo : */SevWarnAlways, "KVSMemCommitQueue", id)
.detail("Bytes", total)
.detail("Log", log)
.detail("Ops", count)
@ -715,7 +715,7 @@ KeyValueStoreMemory::KeyValueStoreMemory( IDiskQueue* log, UID id, int64_t memor
IKeyValueStore* keyValueStoreMemory( std::string const& basename, UID logID, int64_t memoryLimit, std::string ext ) {
TraceEvent("KVSMemOpening", logID).detail("Basename", basename).detail("MemoryLimit", memoryLimit);
IDiskQueue *log = openDiskQueue( basename, ext, logID, DiskQueueVersion::V0 );
IDiskQueue *log = openDiskQueue( basename, ext, logID, DiskQueueVersion::V1 );
return new KeyValueStoreMemory( log, logID, memoryLimit, false, false, false );
}

View File

@ -1937,8 +1937,8 @@ KeyValueStoreSQLite::KeyValueStoreSQLite(std::string const& filename, UID id, Ke
readCursors.resize(64); //< number of read threads
sqlite3_soft_heap_limit64( SERVER_KNOBS->SOFT_HEAP_LIMIT ); // SOMEDAY: Is this a performance issue? Should we drop the cache sizes for individual threads?
int taskId = g_network->getCurrentTask();
g_network->setCurrentTask(TaskDiskWrite);
TaskPriority taskId = g_network->getCurrentTask();
g_network->setCurrentTask(TaskPriority::DiskWrite);
writeThread->addThread( new Writer(filename, type==KeyValueStoreType::SSD_BTREE_V2, checkChecksums, checkIntegrity, writesComplete, springCleaningStats, diskBytesUsed, freeListPages, id, &readCursors) );
g_network->setCurrentTask(taskId);
auto p = new Writer::InitAction();
@ -1963,8 +1963,8 @@ StorageBytes KeyValueStoreSQLite::getStorageBytes() {
void KeyValueStoreSQLite::startReadThreads() {
int nReadThreads = readCursors.size();
int taskId = g_network->getCurrentTask();
g_network->setCurrentTask(TaskDiskRead);
TaskPriority taskId = g_network->getCurrentTask();
g_network->setCurrentTask(TaskPriority::DiskRead);
for(int i=0; i<nReadThreads; i++)
readThreads->addThread( new Reader(filename, type==KeyValueStoreType::SSD_BTREE_V2, readsComplete, logID, &readCursors[i]) );
g_network->setCurrentTask(taskId);

View File

@ -100,7 +100,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
// Data distribution
init( RETRY_RELOCATESHARD_DELAY, 0.1 );
init( DATA_DISTRIBUTION_FAILURE_REACTION_TIME, 10.0 ); if( randomize && BUGGIFY ) DATA_DISTRIBUTION_FAILURE_REACTION_TIME = 1.0;
init( DATA_DISTRIBUTION_FAILURE_REACTION_TIME, 60.0 ); if( randomize && BUGGIFY ) DATA_DISTRIBUTION_FAILURE_REACTION_TIME = 1.0;
bool buggifySmallShards = randomize && BUGGIFY;
init( MIN_SHARD_BYTES, 200000 ); if( buggifySmallShards ) MIN_SHARD_BYTES = 40000; //FIXME: data distribution tracker (specifically StorageMetrics) relies on this number being larger than the maximum size of a key value pair
init( SHARD_BYTES_RATIO, 4 );
@ -371,8 +371,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( TARGET_BYTES_PER_STORAGE_SERVER_BATCH, 500e6 ); if( smallStorageTarget ) TARGET_BYTES_PER_STORAGE_SERVER_BATCH = 1500e3;
init( SPRING_BYTES_STORAGE_SERVER_BATCH, 50e6 ); if( smallStorageTarget ) SPRING_BYTES_STORAGE_SERVER_BATCH = 150e3;
init( STORAGE_HARD_LIMIT_BYTES, 1500e6 ); if( smallStorageTarget ) STORAGE_HARD_LIMIT_BYTES = 4500e3;
init( STORAGE_DURABILITY_LAG_SOFT_MAX, 20e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_SOFT_MAX = 10e6;
init( STORAGE_DURABILITY_LAG_HARD_MAX, 200e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_HARD_MAX = 100e6;
init( STORAGE_DURABILITY_LAG_HARD_MAX, 2000e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_HARD_MAX = 100e6;
init( STORAGE_DURABILITY_LAG_SOFT_MAX, 200e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_SOFT_MAX = 10e6;
bool smallTlogTarget = randomize && BUGGIFY;
init( TARGET_BYTES_PER_TLOG, 2400e6 ); if( smallTlogTarget ) TARGET_BYTES_PER_TLOG = 2000e3;
@ -410,6 +410,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( FETCH_KEYS_PARALLELISM_BYTES, 4e6 ); if( randomize && BUGGIFY ) FETCH_KEYS_PARALLELISM_BYTES = 3e6;
init( BUGGIFY_BLOCK_BYTES, 10000 );
init( STORAGE_COMMIT_BYTES, 10000000 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_BYTES = 2000000;
init( STORAGE_DURABILITY_LAG_REJECT_THRESHOLD, 0.25 );
init( STORAGE_DURABILITY_LAG_MIN_RATE, 0.1 );
init( STORAGE_COMMIT_INTERVAL, 0.5 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_INTERVAL = 2.0;
init( UPDATE_SHARD_VERSION_INTERVAL, 0.25 ); if( randomize && BUGGIFY ) UPDATE_SHARD_VERSION_INTERVAL = 1.0;
init( BYTE_SAMPLING_FACTOR, 250 ); //cannot buggify because of differences in restarting tests
@ -419,7 +421,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( LONG_BYTE_SAMPLE_RECOVERY_DELAY, 60.0 );
init( BYTE_SAMPLE_LOAD_PARALLELISM, 8 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_PARALLELISM = 1;
init( BYTE_SAMPLE_LOAD_DELAY, 0.0 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_DELAY = 0.1;
init( BYTE_SAMPLE_START_DELAY, 1.0 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_LOAD_DELAY = 0.0;
init( BYTE_SAMPLE_START_DELAY, 1.0 ); if( randomize && BUGGIFY ) BYTE_SAMPLE_START_DELAY = 0.0;
init( UPDATE_STORAGE_PROCESS_STATS_INTERVAL, 5.0 );
//Wait Failure

View File

@ -346,8 +346,10 @@ public:
int FETCH_KEYS_PARALLELISM_BYTES;
int BUGGIFY_BLOCK_BYTES;
int64_t STORAGE_HARD_LIMIT_BYTES;
int64_t STORAGE_DURABILITY_LAG_SOFT_MAX;
int64_t STORAGE_DURABILITY_LAG_HARD_MAX;
int64_t STORAGE_DURABILITY_LAG_SOFT_MAX;
double STORAGE_DURABILITY_LAG_REJECT_THRESHOLD;
double STORAGE_DURABILITY_LAG_MIN_RATE;
int STORAGE_COMMIT_BYTES;
double STORAGE_COMMIT_INTERVAL;
double UPDATE_SHARD_VERSION_INTERVAL;

View File

@ -30,7 +30,7 @@ Optional<std::pair<LeaderInfo, bool>> getLeader( const vector<Optional<LeaderInf
ACTOR Future<Void> submitCandidacy( Key key, LeaderElectionRegInterface coord, LeaderInfo myInfo, UID prevChangeID, Reference<AsyncVar<vector<Optional<LeaderInfo>>>> nominees, int index ) {
loop {
auto const& nom = nominees->get()[index];
Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.candidacy, CandidacyRequest( key, myInfo, nom.present() ? nom.get().changeID : UID(), prevChangeID ), TaskCoordinationReply ) );
Optional<LeaderInfo> li = wait( retryBrokenPromise( coord.candidacy, CandidacyRequest( key, myInfo, nom.present() ? nom.get().changeID : UID(), prevChangeID ), TaskPriority::CoordinationReply ) );
if (li != nominees->get()[index]) {
vector<Optional<LeaderInfo>> v = nominees->get();
@ -150,7 +150,7 @@ ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators, Valu
// we might be breaking the leader election process for someone with better communications but lower ID, so change IDs.
if ((!leader.present() || !leader.get().second) && std::count( nominees->get().begin(), nominees->get().end(), myInfo )) {
if (!badCandidateTimeout.isValid())
badCandidateTimeout = delay( SERVER_KNOBS->POLLING_FREQUENCY*2, TaskCoordinationReply );
badCandidateTimeout = delay( SERVER_KNOBS->POLLING_FREQUENCY*2, TaskPriority::CoordinationReply );
} else
badCandidateTimeout = Future<Void>();
@ -183,12 +183,12 @@ ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators, Valu
state vector<Future<Void>> true_heartbeats;
state vector<Future<Void>> false_heartbeats;
for(int i=0; i<coordinators.leaderElectionServers.size(); i++) {
Future<bool> hb = retryBrokenPromise( coordinators.leaderElectionServers[i].leaderHeartbeat, LeaderHeartbeatRequest( coordinators.clusterKey, myInfo, prevChangeID ), TaskCoordinationReply );
Future<bool> hb = retryBrokenPromise( coordinators.leaderElectionServers[i].leaderHeartbeat, LeaderHeartbeatRequest( coordinators.clusterKey, myInfo, prevChangeID ), TaskPriority::CoordinationReply );
true_heartbeats.push_back( onEqual(hb, true) );
false_heartbeats.push_back( onEqual(hb, false) );
}
state Future<Void> rate = delay( SERVER_KNOBS->HEARTBEAT_FREQUENCY, TaskCoordinationReply ) || asyncPriorityInfo->onChange(); // SOMEDAY: Move to server side?
state Future<Void> rate = delay( SERVER_KNOBS->HEARTBEAT_FREQUENCY, TaskPriority::CoordinationReply ) || asyncPriorityInfo->onChange(); // SOMEDAY: Move to server side?
choose {
when ( wait( quorum( true_heartbeats, true_heartbeats.size()/2+1 ) ) ) {

View File

@ -51,7 +51,7 @@ struct LogRouterData {
}
// Erase messages not needed to update *from* versions >= before (thus, messages with toversion <= before)
ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, LogRouterData *tlogData, int taskID ) {
ACTOR Future<Void> eraseMessagesBefore( TagData *self, Version before, LogRouterData *tlogData, TaskPriority taskID ) {
while(!self->version_messages.empty() && self->version_messages.front().first < before) {
Version version = self->version_messages.front().first;
int64_t messagesErased = 0;
@ -68,7 +68,7 @@ struct LogRouterData {
return Void();
}
Future<Void> eraseMessagesBefore(Version before, LogRouterData *tlogData, int taskID) {
Future<Void> eraseMessagesBefore(Version before, LogRouterData *tlogData, TaskPriority taskID) {
return eraseMessagesBefore(this, before, tlogData, taskID);
}
};
@ -197,7 +197,7 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
while(self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS < ver) {
if(self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS > self->version.get()) {
self->version.set( self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS );
wait(yield(TaskTLogCommit));
wait(yield(TaskPriority::TLogCommit));
} else {
wait(self->minPopped.whenAtLeast((self->minPopped.get()+1)));
}
@ -220,7 +220,7 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
loop {
loop {
choose {
when(wait( r ? r->getMore(TaskTLogCommit) : Never() ) ) {
when(wait( r ? r->getMore(TaskPriority::TLogCommit) : Never() ) ) {
break;
}
when( wait( dbInfoChange ) ) { //FIXME: does this actually happen?
@ -247,7 +247,7 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
commitMessages(self, ver, messages);
self->version.set( ver );
wait(yield(TaskTLogCommit));
wait(yield(TaskPriority::TLogCommit));
//TraceEvent("LogRouterVersion").detail("Ver",ver);
}
lastVer = ver;
@ -260,7 +260,7 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
wait( waitForVersion(self, ver) );
self->version.set( ver );
wait(yield(TaskTLogCommit));
wait(yield(TaskPriority::TLogCommit));
}
break;
}
@ -357,6 +357,7 @@ ACTOR Future<Void> logRouterPeekMessages( LogRouterData* self, TLogPeekRequest r
reply.messages = messages.toValue();
reply.popped = self->minPopped.get() >= self->startVersion ? self->minPopped.get() : 0;
reply.end = endVersion;
reply.onlySpilled = false;
req.reply.send( reply );
//TraceEvent("LogRouterPeek4", self->dbgid);
@ -370,7 +371,7 @@ ACTOR Future<Void> logRouterPop( LogRouterData* self, TLogPopRequest req ) {
} else if (req.to > tagData->popped) {
tagData->popped = req.to;
tagData->durableKnownCommittedVersion = req.durableKnownCommittedVersion;
wait(tagData->eraseMessagesBefore( req.to, self, TaskTLogPop ));
wait(tagData->eraseMessagesBefore( req.to, self, TaskPriority::TLogPop ));
}
state Version minPopped = std::numeric_limits<Version>::max();
@ -384,7 +385,7 @@ ACTOR Future<Void> logRouterPop( LogRouterData* self, TLogPopRequest req ) {
while(!self->messageBlocks.empty() && self->messageBlocks.front().first < minPopped) {
self->messageBlocks.pop_front();
wait(yield(TaskTLogPop));
wait(yield(TaskPriority::TLogPop));
}
self->poppedVersion = std::min(minKnownCommittedVersion, self->minKnownCommittedVersion);

View File

@ -340,8 +340,8 @@ struct ILogSystem {
virtual void advanceTo(LogMessageVersion n) = 0;
//returns immediately if hasMessage() returns true.
//returns when either the result of hasMessage() or version() has changed.
virtual Future<Void> getMore(int taskID = TaskTLogPeekReply) = 0;
//returns when either the result of hasMessage() or version() has changed, or a cursor has internally been exhausted.
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply) = 0;
//returns when the failure monitor detects that the servers associated with the cursor are failed
virtual Future<Void> onFailed() = 0;
@ -388,6 +388,7 @@ struct ILogSystem {
UID randomID;
bool returnIfBlocked;
bool onlySpilled;
bool parallelGetMore;
int sequence;
Deque<Future<TLogPeekReply>> futureResults;
@ -406,7 +407,7 @@ struct ILogSystem {
virtual StringRef getMessageWithTags();
virtual const std::vector<Tag>& getTags();
virtual void advanceTo(LogMessageVersion n);
virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
virtual Future<Void> onFailed();
virtual bool isActive();
virtual bool isExhausted();
@ -454,7 +455,7 @@ struct ILogSystem {
virtual StringRef getMessageWithTags();
virtual const std::vector<Tag>& getTags();
virtual void advanceTo(LogMessageVersion n);
virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
virtual Future<Void> onFailed();
virtual bool isActive();
virtual bool isExhausted();
@ -499,7 +500,7 @@ struct ILogSystem {
virtual StringRef getMessageWithTags();
virtual const std::vector<Tag>& getTags();
virtual void advanceTo(LogMessageVersion n);
virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
virtual Future<Void> onFailed();
virtual bool isActive();
virtual bool isExhausted();
@ -534,7 +535,7 @@ struct ILogSystem {
virtual StringRef getMessageWithTags();
virtual const std::vector<Tag>& getTags();
virtual void advanceTo(LogMessageVersion n);
virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
virtual Future<Void> onFailed();
virtual bool isActive();
virtual bool isExhausted();
@ -595,7 +596,7 @@ struct ILogSystem {
virtual StringRef getMessageWithTags();
virtual const std::vector<Tag>& getTags();
virtual void advanceTo(LogMessageVersion n);
virtual Future<Void> getMore(int taskID = TaskTLogPeekReply);
virtual Future<Void> getMore(TaskPriority taskID = TaskPriority::TLogPeekReply);
virtual Future<Void> onFailed();
virtual bool isActive();
virtual bool isExhausted();

View File

@ -25,14 +25,14 @@
#include "flow/actorcompiler.h" // has to be last include
ILogSystem::ServerPeekCursor::ServerPeekCursor( Reference<AsyncVar<OptionalInterface<TLogInterface>>> const& interf, Tag tag, Version begin, Version end, bool returnIfBlocked, bool parallelGetMore )
: interf(interf), tag(tag), messageVersion(begin), end(end), hasMsg(false), rd(results.arena, results.messages, Unversioned()), randomID(deterministicRandom()->randomUniqueID()), poppedVersion(0), returnIfBlocked(returnIfBlocked), sequence(0), parallelGetMore(parallelGetMore) {
: interf(interf), tag(tag), messageVersion(begin), end(end), hasMsg(false), rd(results.arena, results.messages, Unversioned()), randomID(deterministicRandom()->randomUniqueID()), poppedVersion(0), returnIfBlocked(returnIfBlocked), sequence(0), onlySpilled(false), parallelGetMore(parallelGetMore) {
this->results.maxKnownVersion = 0;
this->results.minKnownCommittedVersion = 0;
//TraceEvent("SPC_Starting", randomID).detail("Tag", tag.toString()).detail("Begin", begin).detail("End", end).backtrace();
}
ILogSystem::ServerPeekCursor::ServerPeekCursor( TLogPeekReply const& results, LogMessageVersion const& messageVersion, LogMessageVersion const& end, int32_t messageLength, int32_t rawLength, bool hasMsg, Version poppedVersion, Tag tag )
: results(results), tag(tag), rd(results.arena, results.messages, Unversioned()), messageVersion(messageVersion), end(end), messageLength(messageLength), rawLength(rawLength), hasMsg(hasMsg), randomID(deterministicRandom()->randomUniqueID()), poppedVersion(poppedVersion), returnIfBlocked(false), sequence(0), parallelGetMore(false)
: results(results), tag(tag), rd(results.arena, results.messages, Unversioned()), messageVersion(messageVersion), end(end), messageLength(messageLength), rawLength(rawLength), hasMsg(hasMsg), randomID(deterministicRandom()->randomUniqueID()), poppedVersion(poppedVersion), returnIfBlocked(false), sequence(0), onlySpilled(false), parallelGetMore(false)
{
//TraceEvent("SPC_Clone", randomID);
this->results.maxKnownVersion = 0;
@ -133,7 +133,7 @@ void ILogSystem::ServerPeekCursor::advanceTo(LogMessageVersion n) {
}
}
ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self, int taskID ) {
ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self, TaskPriority taskID ) {
if( !self->interf || self->messageVersion >= self->end ) {
wait( Future<Void>(Never()));
throw internal_error();
@ -146,8 +146,12 @@ ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self
loop {
state Version expectedBegin = self->messageVersion.version;
try {
if (self->parallelGetMore || self->onlySpilled) {
while(self->futureResults.size() < SERVER_KNOBS->PARALLEL_GET_MORE_REQUESTS && self->interf->get().present()) {
self->futureResults.push_back( brokenPromiseToNever( self->interf->get().interf().peekMessages.getReply(TLogPeekRequest(self->messageVersion.version,self->tag,self->returnIfBlocked, std::make_pair(self->randomID, self->sequence++)), taskID) ) );
self->futureResults.push_back( brokenPromiseToNever( self->interf->get().interf().peekMessages.getReply(TLogPeekRequest(self->messageVersion.version,self->tag,self->returnIfBlocked, self->onlySpilled, std::make_pair(self->randomID, self->sequence++)), taskID) ) );
}
} else if (self->futureResults.size() == 0) {
return Void();
}
choose {
@ -158,6 +162,7 @@ ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self
expectedBegin = res.end;
self->futureResults.pop_front();
self->results = res;
self->onlySpilled = res.onlySpilled;
if(res.popped.present())
self->poppedVersion = std::min( std::max(self->poppedVersion, res.popped.get()), self->end.version );
self->rd = ArenaReader( self->results.arena, self->results.messages, Unversioned() );
@ -172,6 +177,7 @@ ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self
self->interfaceChanged = self->interf->onChange();
self->randomID = deterministicRandom()->randomUniqueID();
self->sequence = 0;
self->onlySpilled = false;
self->futureResults.clear();
}
}
@ -192,7 +198,7 @@ ACTOR Future<Void> serverPeekParallelGetMore( ILogSystem::ServerPeekCursor* self
}
}
ACTOR Future<Void> serverPeekGetMore( ILogSystem::ServerPeekCursor* self, int taskID ) {
ACTOR Future<Void> serverPeekGetMore( ILogSystem::ServerPeekCursor* self, TaskPriority taskID ) {
if( !self->interf || self->messageVersion >= self->end ) {
wait( Future<Void>(Never()));
throw internal_error();
@ -201,8 +207,9 @@ ACTOR Future<Void> serverPeekGetMore( ILogSystem::ServerPeekCursor* self, int ta
loop {
choose {
when( TLogPeekReply res = wait( self->interf->get().present() ?
brokenPromiseToNever( self->interf->get().interf().peekMessages.getReply(TLogPeekRequest(self->messageVersion.version,self->tag,self->returnIfBlocked), taskID) ) : Never() ) ) {
brokenPromiseToNever( self->interf->get().interf().peekMessages.getReply(TLogPeekRequest(self->messageVersion.version,self->tag,self->returnIfBlocked, self->onlySpilled), taskID) ) : Never() ) ) {
self->results = res;
self->onlySpilled = res.onlySpilled;
if(res.popped.present())
self->poppedVersion = std::min( std::max(self->poppedVersion, res.popped.get()), self->end.version );
self->rd = ArenaReader( self->results.arena, self->results.messages, Unversioned() );
@ -213,7 +220,9 @@ ACTOR Future<Void> serverPeekGetMore( ILogSystem::ServerPeekCursor* self, int ta
//TraceEvent("SPC_GetMoreB", self->randomID).detail("Has", self->hasMessage()).detail("End", res.end).detail("Popped", res.popped.present() ? res.popped.get() : 0);
return Void();
}
when( wait( self->interf->onChange() ) ) {}
when( wait( self->interf->onChange() ) ) {
self->onlySpilled = false;
}
}
}
} catch( Error &e ) {
@ -225,12 +234,16 @@ ACTOR Future<Void> serverPeekGetMore( ILogSystem::ServerPeekCursor* self, int ta
}
}
Future<Void> ILogSystem::ServerPeekCursor::getMore(int taskID) {
Future<Void> ILogSystem::ServerPeekCursor::getMore(TaskPriority taskID) {
//TraceEvent("SPC_GetMore", randomID).detail("HasMessage", hasMessage()).detail("More", !more.isValid() || more.isReady()).detail("MessageVersion", messageVersion.toString()).detail("End", end.toString());
if( hasMessage() )
return Void();
if( !more.isValid() || more.isReady() ) {
more = parallelGetMore ? serverPeekParallelGetMore(this, taskID) : serverPeekGetMore(this, taskID);
if (parallelGetMore || onlySpilled || futureResults.size()) {
more = serverPeekParallelGetMore(this, taskID);
} else {
more = serverPeekGetMore(this, taskID);
}
}
return more;
}
@ -431,7 +444,7 @@ void ILogSystem::MergedPeekCursor::advanceTo(LogMessageVersion n) {
}
}
ACTOR Future<Void> mergedPeekGetMore(ILogSystem::MergedPeekCursor* self, LogMessageVersion startVersion, int taskID) {
ACTOR Future<Void> mergedPeekGetMore(ILogSystem::MergedPeekCursor* self, LogMessageVersion startVersion, TaskPriority taskID) {
loop {
//TraceEvent("MPC_GetMoreA", self->randomID).detail("Start", startVersion.toString());
if(self->bestServer >= 0 && self->serverCursors[self->bestServer]->isActive()) {
@ -452,7 +465,7 @@ ACTOR Future<Void> mergedPeekGetMore(ILogSystem::MergedPeekCursor* self, LogMess
}
}
Future<Void> ILogSystem::MergedPeekCursor::getMore(int taskID) {
Future<Void> ILogSystem::MergedPeekCursor::getMore(TaskPriority taskID) {
if(!serverCursors.size())
return Never();
@ -692,7 +705,7 @@ void ILogSystem::SetPeekCursor::advanceTo(LogMessageVersion n) {
}
}
ACTOR Future<Void> setPeekGetMore(ILogSystem::SetPeekCursor* self, LogMessageVersion startVersion, int taskID) {
ACTOR Future<Void> setPeekGetMore(ILogSystem::SetPeekCursor* self, LogMessageVersion startVersion, TaskPriority taskID) {
loop {
//TraceEvent("LPC_GetMore1", self->randomID).detail("Start", startVersion.toString()).detail("Tag", self->tag);
if(self->bestServer >= 0 && self->bestSet >= 0 && self->serverCursors[self->bestSet][self->bestServer]->isActive()) {
@ -753,7 +766,7 @@ ACTOR Future<Void> setPeekGetMore(ILogSystem::SetPeekCursor* self, LogMessageVer
}
}
Future<Void> ILogSystem::SetPeekCursor::getMore(int taskID) {
Future<Void> ILogSystem::SetPeekCursor::getMore(TaskPriority taskID) {
auto startVersion = version();
calcHasMessage();
if( hasMessage() )
@ -848,7 +861,7 @@ void ILogSystem::MultiCursor::advanceTo(LogMessageVersion n) {
cursors.back()->advanceTo(n);
}
Future<Void> ILogSystem::MultiCursor::getMore(int taskID) {
Future<Void> ILogSystem::MultiCursor::getMore(TaskPriority taskID) {
LogMessageVersion startVersion = cursors.back()->version();
while( cursors.size() > 1 && cursors.back()->version() >= epochEnds.back() ) {
if(needsPopped) poppedVersion = std::max(poppedVersion, cursors.back()->popped());
@ -967,7 +980,7 @@ void ILogSystem::BufferedCursor::advanceTo(LogMessageVersion n) {
ASSERT(false);
}
ACTOR Future<Void> bufferedGetMoreLoader( ILogSystem::BufferedCursor* self, Reference<ILogSystem::IPeekCursor> cursor, Version maxVersion, int taskID ) {
ACTOR Future<Void> bufferedGetMoreLoader( ILogSystem::BufferedCursor* self, Reference<ILogSystem::IPeekCursor> cursor, Version maxVersion, TaskPriority taskID ) {
loop {
wait(yield());
if(cursor->version().version >= maxVersion) {
@ -984,7 +997,7 @@ ACTOR Future<Void> bufferedGetMoreLoader( ILogSystem::BufferedCursor* self, Refe
}
}
ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, int taskID ) {
ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, TaskPriority taskID ) {
if( self->messageVersion.version >= self->end ) {
wait( Future<Void>(Never()));
throw internal_error();
@ -1022,7 +1035,7 @@ ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, int taskID
return Void();
}
Future<Void> ILogSystem::BufferedCursor::getMore(int taskID) {
Future<Void> ILogSystem::BufferedCursor::getMore(TaskPriority taskID) {
if( hasMessage() )
return Void();
return bufferedGetMore(this, taskID);

View File

@ -50,7 +50,7 @@ struct MasterInterface {
}
void initEndpoints() {
getCommitVersion.getEndpoint( TaskProxyGetConsistentReadVersion );
getCommitVersion.getEndpoint( TaskPriority::ProxyGetConsistentReadVersion );
}
};

View File

@ -158,7 +158,7 @@ ACTOR Future<Void> queueTransactionStartRequests(
if (now() - *lastGRVTime > *GRVBatchTime)
*lastGRVTime = now() - *GRVBatchTime;
forwardPromise(GRVTimer, delayJittered(*GRVBatchTime - (now() - *lastGRVTime), TaskProxyGRVTimer));
forwardPromise(GRVTimer, delayJittered(*GRVBatchTime - (now() - *lastGRVTime), TaskPriority::ProxyGRVTimer));
}
transactionQueue->push(std::make_pair(req, counter--));
@ -263,7 +263,7 @@ struct ProxyCommitData {
lastVersionTime(0), commitVersionRequestNumber(1), mostRecentProcessedRequestNumber(0),
getConsistentReadVersion(getConsistentReadVersion), commit(commit), lastCoalesceTime(0),
localCommitBatchesStarted(0), locked(false), commitBatchInterval(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_MIN),
firstProxy(firstProxy), cx(openDBOnServer(db, TaskDefaultEndpoint, true, true)), db(db),
firstProxy(firstProxy), cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, true, true)), db(db),
singleKeyMutationEvent(LiteralStringRef("SingleKeyMutation")), commitBatchesMemBytesCount(0), lastTxsPop(0)
{}
};
@ -350,7 +350,7 @@ struct ResolutionRequestBuilder {
};
ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std::pair<std::vector<CommitTransactionRequest>, int> > out, FutureStream<CommitTransactionRequest> in, int desiredBytes, int64_t memBytesLimit) {
wait(delayJittered(commitData->commitBatchInterval, TaskProxyCommitBatcher));
wait(delayJittered(commitData->commitBatchInterval, TaskPriority::ProxyCommitBatcher));
state double lastBatch = 0;
@ -363,7 +363,7 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std:
timeout = Never();
}
else {
timeout = delayJittered(SERVER_KNOBS->MAX_COMMIT_BATCH_INTERVAL, TaskProxyCommitBatcher);
timeout = delayJittered(SERVER_KNOBS->MAX_COMMIT_BATCH_INTERVAL, TaskPriority::ProxyCommitBatcher);
}
while(!timeout.isReady() && !(batch.size() == SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_COUNT_MAX || batchBytes >= desiredBytes)) {
@ -387,10 +387,10 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std:
if(!batch.size()) {
commitData->commitBatchStartNotifications.send(Void());
if(now() - lastBatch > commitData->commitBatchInterval) {
timeout = delayJittered(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_FROM_IDLE, TaskProxyCommitBatcher);
timeout = delayJittered(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_FROM_IDLE, TaskPriority::ProxyCommitBatcher);
}
else {
timeout = delayJittered(commitData->commitBatchInterval - (now() - lastBatch), TaskProxyCommitBatcher);
timeout = delayJittered(commitData->commitBatchInterval - (now() - lastBatch), TaskPriority::ProxyCommitBatcher);
}
}
@ -398,7 +398,7 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData *commitData, PromiseStream<std:
out.send({ batch, batchBytes });
lastBatch = now();
commitData->commitBatchStartNotifications.send(Void());
timeout = delayJittered(commitData->commitBatchInterval, TaskProxyCommitBatcher);
timeout = delayJittered(commitData->commitBatchInterval, TaskPriority::ProxyCommitBatcher);
batch = std::vector<CommitTransactionRequest>();
batchBytes = 0;
}
@ -457,7 +457,7 @@ ACTOR Future<Void> commitBatch(
ASSERT(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS <= SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT); // since we are using just the former to limit the number of versions actually in flight!
// Active load balancing runs at a very high priority (to obtain accurate estimate of memory used by commit batches) so we need to downgrade here
wait(delay(0, TaskProxyCommit));
wait(delay(0, TaskPriority::ProxyCommit));
self->lastVersionTime = t1;
@ -534,7 +534,7 @@ ACTOR Future<Void> commitBatch(
vector< Future<ResolveTransactionBatchReply> > replies;
for (int r = 0; r<self->resolvers.size(); r++) {
requests.requests[r].debugID = debugID;
replies.push_back(brokenPromiseToNever(self->resolvers[r].resolve.getReply(requests.requests[r], TaskProxyResolverReply)));
replies.push_back(brokenPromiseToNever(self->resolvers[r].resolve.getReply(requests.requests[r], TaskPriority::ProxyResolverReply)));
}
state vector<vector<int>> transactionResolverMap = std::move( requests.transactionResolverMap );
@ -1135,7 +1135,7 @@ ACTOR Future<GetReadVersionReply> getLiveCommittedVersion(ProxyCommitData* commi
state vector<Future<GetReadVersionReply>> proxyVersions;
for (auto const& p : *otherProxies)
proxyVersions.push_back(brokenPromiseToNever(p.getRawCommittedVersion.getReply(GetRawCommittedVersionRequest(debugID), TaskTLogConfirmRunningReply)));
proxyVersions.push_back(brokenPromiseToNever(p.getRawCommittedVersion.getReply(GetRawCommittedVersionRequest(debugID), TaskPriority::TLogConfirmRunningReply)));
if (!(flags&GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY))
{
@ -1292,7 +1292,7 @@ ACTOR static Future<Void> transactionStarter(
}
if (!transactionQueue.empty())
forwardPromise(GRVTimer, delayJittered(SERVER_KNOBS->START_TRANSACTION_BATCH_QUEUE_CHECK_INTERVAL, TaskProxyGRVTimer));
forwardPromise(GRVTimer, delayJittered(SERVER_KNOBS->START_TRANSACTION_BATCH_QUEUE_CHECK_INTERVAL, TaskPriority::ProxyGRVTimer));
/*TraceEvent("GRVBatch", proxy.id())
.detail("Elapsed", elapsed)

View File

@ -130,12 +130,12 @@ ACTOR Future<vector<UID>> addReadWriteDestinations(KeyRangeRef shard, vector<Sto
state vector< Future<Optional<UID>> > srcChecks;
for(int s=0; s<srcInterfs.size(); s++) {
srcChecks.push_back( checkReadWrite( srcInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskMoveKeys ), srcInterfs[s].id(), 0 ) );
srcChecks.push_back( checkReadWrite( srcInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskPriority::MoveKeys ), srcInterfs[s].id(), 0 ) );
}
state vector< Future<Optional<UID>> > destChecks;
for(int s=0; s<destInterfs.size(); s++) {
destChecks.push_back( checkReadWrite( destInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskMoveKeys ), destInterfs[s].id(), version ) );
destChecks.push_back( checkReadWrite( destInterfs[s].getShardState.getReplyUnlessFailedFor( GetShardStateRequest( shard, GetShardStateRequest::NO_WAIT), SERVER_KNOBS->SERVER_READY_QUORUM_INTERVAL, 0, TaskPriority::MoveKeys ), destInterfs[s].id(), version ) );
}
wait( waitForAll(srcChecks) && waitForAll(destChecks) );
@ -225,7 +225,7 @@ ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> serve
state TraceInterval interval("RelocateShard_StartMoveKeys");
//state TraceInterval waitInterval("");
wait( startMoveKeysLock->take( TaskDataDistributionLaunch ) );
wait( startMoveKeysLock->take( TaskPriority::DataDistributionLaunch ) );
state FlowLock::Releaser releaser( *startMoveKeysLock );
TraceEvent(SevDebug, interval.begin(), relocationIntervalId);
@ -255,7 +255,7 @@ ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> serve
//Keep track of shards for all src servers so that we can preserve their values in serverKeys
state Map<UID, VectorRef<KeyRangeRef>> shardMap;
tr.info.taskID = TaskMoveKeys;
tr.info.taskID = TaskPriority::MoveKeys;
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
wait( checkMoveKeysLock(&tr, lock) );
@ -394,11 +394,11 @@ ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> serve
ACTOR Future<Void> waitForShardReady( StorageServerInterface server, KeyRange keys, Version minVersion, GetShardStateRequest::waitMode mode ) {
loop {
try {
std::pair<Version,Version> rep = wait( server.getShardState.getReply( GetShardStateRequest(keys, mode), TaskMoveKeys ) );
std::pair<Version,Version> rep = wait( server.getShardState.getReply( GetShardStateRequest(keys, mode), TaskPriority::MoveKeys ) );
if (rep.first >= minVersion) {
return Void();
}
wait( delayJittered( SERVER_KNOBS->SHARD_READY_DELAY, TaskMoveKeys ) );
wait( delayJittered( SERVER_KNOBS->SHARD_READY_DELAY, TaskPriority::MoveKeys ) );
}
catch (Error& e) {
if( e.code() != error_code_timed_out ) {
@ -419,7 +419,7 @@ ACTOR Future<Void> checkFetchingState( Database cx, vector<UID> dest, KeyRange k
try {
if (BUGGIFY) wait(delay(5));
tr.info.taskID = TaskMoveKeys;
tr.info.taskID = TaskPriority::MoveKeys;
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
vector< Future< Optional<Value> > > serverListEntries;
@ -439,7 +439,7 @@ ACTOR Future<Void> checkFetchingState( Database cx, vector<UID> dest, KeyRange k
}
wait( timeoutError( waitForAll( requests ),
SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, TaskMoveKeys ) );
SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, TaskPriority::MoveKeys ) );
dataMovementComplete.send(Void());
return Void();
@ -480,11 +480,11 @@ ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> dest
//printf("finishMoveKeys( '%s'-'%s' )\n", keys.begin.toString().c_str(), keys.end.toString().c_str());
loop {
try {
tr.info.taskID = TaskMoveKeys;
tr.info.taskID = TaskPriority::MoveKeys;
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
releaser.release();
wait( finishMoveKeysParallelismLock->take( TaskDataDistributionLaunch ) );
wait( finishMoveKeysParallelismLock->take( TaskPriority::DataDistributionLaunch ) );
releaser = FlowLock::Releaser( *finishMoveKeysParallelismLock );
wait( checkMoveKeysLock(&tr, lock) );
@ -632,7 +632,7 @@ ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> dest
for(int s=0; s<storageServerInterfaces.size(); s++)
serverReady.push_back( waitForShardReady( storageServerInterfaces[s], keys, tr.getReadVersion().get(), GetShardStateRequest::READABLE) );
wait( timeout( waitForAll( serverReady ), SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, Void(), TaskMoveKeys ) );
wait( timeout( waitForAll( serverReady ), SERVER_KNOBS->SERVER_READY_QUORUM_TIMEOUT, Void(), TaskPriority::MoveKeys ) );
int count = dest.size() - newDestinations.size();
for(int s=0; s<serverReady.size(); s++)
count += serverReady[s].isReady() && !serverReady[s].isError();
@ -808,7 +808,7 @@ ACTOR Future<Void> removeStorageServer( Database cx, UID serverID, MoveKeysLock
if (!canRemove) {
TEST(true); // The caller had a transaction in flight that assigned keys to the server. Wait for it to reverse its mistake.
TraceEvent(SevWarn,"NoCanRemove").detail("Count", noCanRemoveCount++).detail("ServerID", serverID);
wait( delayJittered(SERVER_KNOBS->REMOVE_RETRY_DELAY, TaskDataDistributionLaunch) );
wait( delayJittered(SERVER_KNOBS->REMOVE_RETRY_DELAY, TaskPriority::DataDistributionLaunch) );
tr.reset();
TraceEvent("RemoveStorageServerRetrying").detail("CanRemove", canRemove);
} else {

Some files were not shown because too many files have changed in this diff Show More