Merge branch 'main' into speedup_tail_latency

This commit is contained in:
Josh Slocum 2022-04-29 16:05:12 -05:00
commit e5840d3a38
97 changed files with 6049 additions and 1920 deletions

View File

@ -10,3 +10,4 @@ set(SRCS
add_library(FDBLibTLS STATIC ${SRCS})
target_link_libraries(FDBLibTLS PUBLIC OpenSSL::SSL boost_target PRIVATE flow)
target_include_directories(FDBLibTLS INTERFACE OpenSSL::SSL boost_target PRIVATE flow)

View File

@ -22,6 +22,9 @@
#include "FDBLibTLS/FDBLibTLSSession.h"
#include "flow/Trace.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/bio.h>
#include <openssl/err.h>
#include <openssl/evp.h>

View File

@ -23,6 +23,9 @@
#include "flow/flow.h"
#include "flow/Trace.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/bio.h>
#include <openssl/err.h>
#include <openssl/pem.h>

View File

@ -20,6 +20,9 @@
#include "FDBLibTLS/FDBLibTLSVerify.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/objects.h>
#include <algorithm>

View File

@ -25,6 +25,9 @@
#include <string.h>
#include <boost/lexical_cast.hpp>
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/objects.h>
#include "fdbrpc/ITLSPlugin.h"

View File

@ -37,55 +37,71 @@ private:
enum OpType { OP_INSERT, OP_CLEAR, OP_CLEAR_RANGE, OP_READ, OP_GET_RANGES, OP_LAST = OP_GET_RANGES };
std::vector<OpType> excludedOpTypes;
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
// FIXME: should still guarantee a read succeeds eventually somehow
bool seenReadSuccess = false;
void randomReadOp(TTaskFct cont) {
std::string begin = randomKeyName();
std::string end = randomKeyName();
auto results = std::make_shared<std::vector<KeyValue>>();
auto tooOld = std::make_shared<bool>(false);
if (begin > end) {
std::swap(begin, end);
}
execTransaction(
[begin, end, results](auto ctx) {
[this, begin, end, results, tooOld](auto ctx) {
ctx->tx()->setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE);
KeyValuesResult res = ctx->tx()->readBlobGranules(begin, end, ctx->getBGBasePath());
bool more;
(*results) = res.getKeyValues(&more);
ASSERT(!more);
if (res.getError() != error_code_success) {
if (res.getError() == error_code_blob_granule_transaction_too_old) {
info("BlobGranuleCorrectness::randomReadOp bg too old\n");
ASSERT(!seenReadSuccess);
*tooOld = true;
ctx->done();
} else if (res.getError() != error_code_success) {
ctx->onError(res.getError());
} else {
if (!seenReadSuccess) {
info("BlobGranuleCorrectness::randomReadOp first success\n");
}
seenReadSuccess = true;
ctx->done();
}
},
[this, begin, end, results, cont]() {
std::vector<KeyValue> expected = store.getRange(begin, end, store.size(), false);
if (results->size() != expected.size()) {
error(fmt::format("randomReadOp result size mismatch. expected: {} actual: {}",
expected.size(),
results->size()));
}
ASSERT(results->size() == expected.size());
for (int i = 0; i < results->size(); i++) {
if ((*results)[i].key != expected[i].key) {
error(fmt::format("randomReadOp key mismatch at {}/{}. expected: {} actual: {}",
i,
results->size(),
expected[i].key,
(*results)[i].key));
[this, begin, end, results, tooOld, cont]() {
if (!*tooOld) {
std::vector<KeyValue> expected = store.getRange(begin, end, store.size(), false);
if (results->size() != expected.size()) {
error(fmt::format("randomReadOp result size mismatch. expected: {} actual: {}",
expected.size(),
results->size()));
}
ASSERT((*results)[i].key == expected[i].key);
ASSERT(results->size() == expected.size());
if ((*results)[i].value != expected[i].value) {
error(
fmt::format("randomReadOp value mismatch at {}/{}. key: {} expected: {:.80} actual: {:.80}",
i,
results->size(),
expected[i].key,
expected[i].value,
(*results)[i].value));
for (int i = 0; i < results->size(); i++) {
if ((*results)[i].key != expected[i].key) {
error(fmt::format("randomReadOp key mismatch at {}/{}. expected: {} actual: {}",
i,
results->size(),
expected[i].key,
(*results)[i].key));
}
ASSERT((*results)[i].key == expected[i].key);
if ((*results)[i].value != expected[i].value) {
error(fmt::format(
"randomReadOp value mismatch at {}/{}. key: {} expected: {:.80} actual: {:.80}",
i,
results->size(),
expected[i].key,
expected[i].value,
(*results)[i].value));
}
ASSERT((*results)[i].value == expected[i].value);
}
ASSERT((*results)[i].value == expected[i].value);
}
schedule(cont);
});
@ -110,9 +126,11 @@ private:
true);
},
[this, begin, end, results, cont]() {
ASSERT(results->size() > 0);
ASSERT(results->front().key <= begin);
ASSERT(results->back().value >= end);
if (seenReadSuccess) {
ASSERT(results->size() > 0);
ASSERT(results->front().key <= begin);
ASSERT(results->back().value >= end);
}
for (int i = 0; i < results->size(); i++) {
// no empty or inverted ranges

View File

@ -21,22 +21,39 @@ endif()
include(CheckSymbolExists)
set(DISABLE_TLS OFF CACHE BOOL "Don't try to find OpenSSL and always build without TLS support")
set(USE_WOLFSSL OFF CACHE BOOL "Build against WolfSSL instead of OpenSSL")
set(USE_OPENSSL ON CACHE BOOL "Build against OpenSSL")
if(DISABLE_TLS)
set(WITH_TLS OFF)
else()
set(OPENSSL_USE_STATIC_LIBS TRUE)
if(WIN32)
set(OPENSSL_MSVC_STATIC_RT ON)
endif()
find_package(OpenSSL)
if(OPENSSL_FOUND)
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
set(WITH_TLS ON)
add_compile_options(-DHAVE_OPENSSL)
else()
message(STATUS "OpenSSL was not found - Will compile without TLS Support")
message(STATUS "You can set OPENSSL_ROOT_DIR to help cmake find it")
set(WITH_TLS OFF)
if(USE_WOLFSSL)
set(WOLFSSL_USE_STATIC_LIBS TRUE)
find_package(WolfSSL)
if(WOLFSSL_FOUND)
set(CMAKE_REQUIRED_INCLUDES ${WOLFSSL_INCLUDE_DIR})
set(WITH_TLS ON)
add_compile_options(-DHAVE_OPENSSL)
add_compile_options(-DHAVE_WOLFSSL)
else()
message(STATUS "WolfSSL was not found - Will compile without TLS Support")
message(STATUS "You can set WOLFSSL_ROOT_DIR to help cmake find it")
set(WITH_TLS OFF)
endif()
elseif(USE_OPENSSL)
set(OPENSSL_USE_STATIC_LIBS TRUE)
if(WIN32)
set(OPENSSL_MSVC_STATIC_RT ON)
endif()
find_package(OpenSSL)
if(OPENSSL_FOUND)
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
set(WITH_TLS ON)
add_compile_options(-DHAVE_OPENSSL)
else()
message(STATUS "OpenSSL was not found - Will compile without TLS Support")
message(STATUS "You can set OPENSSL_ROOT_DIR to help cmake find it")
set(WITH_TLS OFF)
endif()
endif()
endif()

63
cmake/FindWolfSSL.cmake Normal file
View File

@ -0,0 +1,63 @@
# FindWolfSSL
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
if(WOLFSSL_USE_STATIC_LIBS)
if(WIN32)
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
else()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a)
endif()
endif()
find_path(WOLFSSL_ROOT_DIR
NAMES
include/wolfssl/options.h
)
find_path(WOLFSSL_INCLUDE_DIR
NAMES
wolfssl/ssl.h
PATHS
${WOLFSSL_ROOT_DIR}/include
)
find_library(WOLFSSL_LIBRARY
NAMES
wolfssl
PATHS
${WOLFSSL_ROOT_DIR}/lib
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(WolfSSL
REQUIRED_VARS
WOLFSSL_LIBRARY
WOLFSSL_INCLUDE_DIR
FAIL_MESSAGE
"Could NOT find WolfSSL"
)
mark_as_advanced(
WOLFSSL_ROOT_DIR
WOLFSSL_LIBRARY
WOLFSSL_INCLUDE_DIR
)
if(WOLFSSL_FOUND)
message(STATUS "Found wolfssl library: ${WOLFSSL_LIBRARY}")
message(STATUS "Found wolfssl includes: ${WOLFSSL_INCLUDE_DIR}")
set(WOLFSSL_INCLUDE_DIRS ${WOLFSSL_INCLUDE_DIR})
set(WOLFSSL_LIBRARIES ${WOLFSSL_LIBRARY})
add_library(WolfSSL UNKNOWN IMPORTED GLOBAL)
add_library(OpenSSL::SSL ALIAS WolfSSL)
add_library(OpenSSL::CRYPTO ALIAS WolfSSL)
target_include_directories(WolfSSL INTERFACE "${WOLFSSL_INCLUDE_DIR}")
target_link_libraries(WolfSSL INTERFACE "${WOLFSSL_TLS_LIBRARY}" "${WOLFSSL_SSL_LIBRARY}" "${WOLFSSL_CRYPTO_LIBRARY}")
set_target_properties(WolfSSL PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${WOLFSSL_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${WOLFSSL_LIBRARY}")
endif()

View File

@ -1189,7 +1189,6 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
ClusterConnectionFile::lookupClusterFileName(opt.clusterFile);
try {
ccf = makeReference<ClusterConnectionFile>(resolvedClusterFile.first);
wait(ccf->resolveHostnames());
} catch (Error& e) {
if (e.code() == error_code_operation_cancelled) {
throw;

View File

@ -28,28 +28,46 @@
#include "fdbclient/CoordinationInterface.h"
// Determine public IP address by calling the first coordinator.
// Determine public IP address by calling the first available coordinator.
// If fail connecting all coordinators, throw bind_failed().
IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) {
try {
using namespace boost::asio;
int size = ccs.coordinators().size() + ccs.hostnames.size();
int index = 0;
loop {
try {
using namespace boost::asio;
io_service ioService;
ip::udp::socket socket(ioService);
io_service ioService;
ip::udp::socket socket(ioService);
ccs.resolveHostnamesBlocking();
const auto& coordAddr = ccs.coordinators()[0];
const auto boostIp = coordAddr.ip.isV6() ? ip::address(ip::address_v6(coordAddr.ip.toV6()))
: ip::address(ip::address_v4(coordAddr.ip.toV4()));
NetworkAddress coordAddr;
// Try coords first, because they don't need to be resolved.
if (index < ccs.coordinators().size()) {
coordAddr = ccs.coordinators()[index];
} else {
Hostname& h = ccs.hostnames[index - ccs.coordinators().size()];
Optional<NetworkAddress> resolvedAddr = h.resolveBlocking();
if (!resolvedAddr.present()) {
throw lookup_failed();
}
coordAddr = resolvedAddr.get();
}
const auto boostIp = coordAddr.ip.isV6() ? ip::address(ip::address_v6(coordAddr.ip.toV6()))
: ip::address(ip::address_v4(coordAddr.ip.toV4()));
ip::udp::endpoint endpoint(boostIp, coordAddr.port);
socket.connect(endpoint);
IPAddress ip = coordAddr.ip.isV6() ? IPAddress(socket.local_endpoint().address().to_v6().to_bytes())
: IPAddress(socket.local_endpoint().address().to_v4().to_ulong());
socket.close();
ip::udp::endpoint endpoint(boostIp, coordAddr.port);
socket.connect(endpoint);
IPAddress ip = coordAddr.ip.isV6() ? IPAddress(socket.local_endpoint().address().to_v6().to_bytes())
: IPAddress(socket.local_endpoint().address().to_v4().to_ulong());
socket.close();
return ip;
} catch (boost::system::system_error e) {
fprintf(stderr, "Error determining public address: %s\n", e.what());
throw bind_failed();
return ip;
} catch (...) {
++index;
if (index == size) {
fprintf(stderr, "Error determining public address.\n");
throw bind_failed();
}
}
}
}

View File

@ -21,6 +21,7 @@
#include "fdbclient/Knobs.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/SystemData.h"
#include "fdbclient/Tenant.h"
#include "flow/UnitTest.h"
#define init(...) KNOB_FN(__VA_ARGS__, INIT_ATOMIC_KNOB, INIT_KNOB)(__VA_ARGS__)
@ -82,6 +83,7 @@ void ClientKnobs::initialize(Randomize randomize) {
init( CHANGE_FEED_CACHE_SIZE, 100000 ); if( randomize && BUGGIFY ) CHANGE_FEED_CACHE_SIZE = 1;
init( CHANGE_FEED_POP_TIMEOUT, 5.0 );
init( CHANGE_FEED_STREAM_MIN_BYTES, 1e4 ); if( randomize && BUGGIFY ) CHANGE_FEED_STREAM_MIN_BYTES = 1;
init( TENANT_PREFIX_SIZE_LIMIT, 28 ); ASSERT(TENANT_PREFIX_SIZE_LIMIT >= TenantMapEntry::ROOT_PREFIX_SIZE); // includes 8-byte ID and optional tenant subspace
init( MAX_BATCH_SIZE, 1000 ); if( randomize && BUGGIFY ) MAX_BATCH_SIZE = 1;
init( GRV_BATCH_TIMEOUT, 0.005 ); if( randomize && BUGGIFY ) GRV_BATCH_TIMEOUT = 0.1;

View File

@ -81,6 +81,7 @@ public:
int64_t CHANGE_FEED_CACHE_SIZE;
double CHANGE_FEED_POP_TIMEOUT;
int64_t CHANGE_FEED_STREAM_MIN_BYTES;
int64_t TENANT_PREFIX_SIZE_LIMIT;
int MAX_BATCH_SIZE;
double GRV_BATCH_TIMEOUT;

View File

@ -61,61 +61,31 @@ struct ClientLeaderRegInterface {
// - There is no address present more than once
class ClusterConnectionString {
public:
enum ConnectionStringStatus { RESOLVED, RESOLVING, UNRESOLVED };
ClusterConnectionString() {}
ClusterConnectionString(const std::string& connStr);
ClusterConnectionString(const std::string& connectionString);
ClusterConnectionString(const std::vector<NetworkAddress>& coordinators, Key key);
ClusterConnectionString(const std::vector<Hostname>& hosts, Key key);
ClusterConnectionString(const ClusterConnectionString& rhs) { operator=(rhs); }
ClusterConnectionString& operator=(const ClusterConnectionString& rhs) {
// Copy everything except AsyncTrigger resolveFinish.
status = rhs.status;
coords = rhs.coords;
hostnames = rhs.hostnames;
networkAddressToHostname = rhs.networkAddressToHostname;
key = rhs.key;
keyDesc = rhs.keyDesc;
connectionString = rhs.connectionString;
return *this;
}
std::vector<NetworkAddress> const& coordinators() const { return coords; }
void addResolved(const Hostname& hostname, const NetworkAddress& address) {
coords.push_back(address);
networkAddressToHostname.emplace(address, hostname);
}
Key clusterKey() const { return key; }
Key clusterKeyName() const {
return keyDesc;
} // Returns the "name" or "description" part of the clusterKey (the part before the ':')
std::string toString() const;
static std::string getErrorString(std::string const& source, Error const& e);
Future<Void> resolveHostnames();
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
// should be preferred.
void resolveHostnamesBlocking();
// This function derives the member connectionString from the current key, coordinators and hostnames.
void resetConnectionString();
void resetToUnresolved();
void parseKey(const std::string& key);
ConnectionStringStatus status = RESOLVED;
AsyncTrigger resolveFinish;
// This function tries to resolve all hostnames once, and return them with coords.
// Best effort, does not guarantee that the resolves succeed.
Future<std::vector<NetworkAddress>> tryResolveHostnames();
std::vector<NetworkAddress> coords;
std::vector<Hostname> hostnames;
std::unordered_map<NetworkAddress, Hostname> networkAddressToHostname;
private:
void parseConnString();
Key key, keyDesc;
std::string connectionString;
};
FDB_DECLARE_BOOLEAN_PARAM(ConnectionStringNeedsPersisted);
@ -165,12 +135,6 @@ public:
// Signals to the connection record that it was successfully used to connect to a cluster.
void notifyConnected();
ClusterConnectionString::ConnectionStringStatus connectionStringStatus() const;
Future<Void> resolveHostnames();
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
// should be preferred.
void resolveHostnamesBlocking();
virtual void addref() = 0;
virtual void delref() = 0;
@ -275,12 +239,21 @@ struct OpenDatabaseCoordRequest {
Standalone<VectorRef<ClientVersionRef>> supportedVersions;
UID knownClientInfoID;
Key clusterKey;
std::vector<Hostname> hostnames;
std::vector<NetworkAddress> coordinators;
ReplyPromise<CachedSerialization<struct ClientDBInfo>> reply;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, issues, supportedVersions, traceLogGroup, knownClientInfoID, clusterKey, coordinators, reply);
serializer(ar,
issues,
supportedVersions,
traceLogGroup,
knownClientInfoID,
clusterKey,
hostnames,
coordinators,
reply);
}
};

View File

@ -20,6 +20,7 @@
#include "fdbclient/FDBTypes.h"
#include "fdbclient/Knobs.h"
#include "fdbclient/NativeAPI.actor.h"
KeyRef keyBetween(const KeyRangeRef& keys) {
int pos = 0; // will be the position of the first difference between keys.begin and keys.end
@ -40,16 +41,14 @@ KeyRef keyBetween(const KeyRangeRef& keys) {
}
void KeySelectorRef::setKey(KeyRef const& key) {
// There are no keys in the database with size greater than KEY_SIZE_LIMIT, so if this key selector has a key
// There are no keys in the database with size greater than the max key size, so if this key selector has a key
// which is large, then we can translate it to an equivalent key selector with a smaller key
if (key.size() >
(key.startsWith(LiteralStringRef("\xff")) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
this->key = key.substr(0,
(key.startsWith(LiteralStringRef("\xff")) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
else
int64_t maxKeySize = getMaxKeySize(key);
if (key.size() > maxKeySize) {
this->key = key.substr(0, maxKeySize + 1);
} else {
this->key = key;
}
}
void KeySelectorRef::setKeyUnlimited(KeyRef const& key) {

View File

@ -746,6 +746,17 @@ Future<Optional<TenantMapEntry>> createTenantTransaction(Transaction tr, TenantN
state Optional<Value> lastIdVal = wait(safeThreadFutureToFuture(lastIdFuture));
Optional<Value> tenantDataPrefix = wait(safeThreadFutureToFuture(tenantDataPrefixFuture));
if (tenantDataPrefix.present() &&
tenantDataPrefix.get().size() + TenantMapEntry::ROOT_PREFIX_SIZE > CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT) {
TraceEvent(SevWarnAlways, "TenantPrefixTooLarge")
.detail("TenantSubspace", tenantDataPrefix.get())
.detail("TenantSubspaceLength", tenantDataPrefix.get().size())
.detail("RootPrefixLength", TenantMapEntry::ROOT_PREFIX_SIZE)
.detail("MaxTenantPrefixSize", CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT);
throw client_invalid_operation();
}
state TenantMapEntry newTenant(lastIdVal.present() ? TenantMapEntry::prefixToId(lastIdVal.get()) + 1 : 0,
tenantDataPrefix.present() ? (KeyRef)tenantDataPrefix.get() : ""_sr);

View File

@ -782,7 +782,7 @@ ACTOR Future<std::vector<ProcessData>> getWorkers(Database cx) {
}
}
ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
ACTOR Future<Optional<ClusterConnectionString>> getConnectionString(Database cx) {
state Transaction tr(cx);
loop {
try {
@ -790,9 +790,8 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
Optional<Value> currentKey = wait(tr.get(coordinatorsKey));
if (!currentKey.present())
return std::vector<NetworkAddress>();
return ClusterConnectionString(currentKey.get().toString()).coordinators();
return Optional<ClusterConnectionString>();
return ClusterConnectionString(currentKey.get().toString());
} catch (Error& e) {
wait(tr.onError(e));
}
@ -801,7 +800,7 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
Reference<IQuorumChange> change,
ClusterConnectionString* conn) {
std::vector<NetworkAddress> desiredCoordinators) {
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
@ -812,47 +811,45 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone deleted this key entirely?
state ClusterConnectionString old(currentKey.get().toString());
wait(old.resolveHostnames());
if (tr->getDatabase()->getConnectionRecord() &&
old.clusterKeyName().toString() !=
tr->getDatabase()->getConnectionRecord()->getConnectionString().clusterKeyName())
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
if (!conn->coords.size()) {
std::vector<NetworkAddress> desiredCoordinatorAddresses = wait(change->getDesiredCoordinators(
if (!desiredCoordinators.size()) {
std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
tr,
old.coordinators(),
oldCoordinators,
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result));
conn->coords = desiredCoordinatorAddresses;
desiredCoordinators = _desiredCoordinators;
}
if (result != CoordinatorsResult::SUCCESS)
return result;
if (!conn->coordinators().size())
if (!desiredCoordinators.size())
return CoordinatorsResult::INVALID_NETWORK_ADDRESSES;
std::sort(conn->coords.begin(), conn->coords.end());
std::sort(conn->hostnames.begin(), conn->hostnames.end());
std::sort(desiredCoordinators.begin(), desiredCoordinators.end());
std::string newName = change->getDesiredClusterKeyName();
if (newName.empty())
newName = old.clusterKeyName().toString();
if (old.coordinators() == conn->coordinators() && old.clusterKeyName() == newName)
if (oldCoordinators == desiredCoordinators && old.clusterKeyName() == newName)
return CoordinatorsResult::SAME_NETWORK_ADDRESSES;
std::string key(newName + ':' + deterministicRandom()->randomAlphaNumeric(32));
conn->parseKey(key);
conn->resetConnectionString();
state ClusterConnectionString conn(desiredCoordinators,
StringRef(newName + ':' + deterministicRandom()->randomAlphaNumeric(32)));
if (g_network->isSimulated()) {
int i = 0;
int protectedCount = 0;
while ((protectedCount < ((conn->coordinators().size() / 2) + 1)) && (i < conn->coordinators().size())) {
auto process = g_simulator.getProcessByAddress(conn->coordinators()[i]);
while ((protectedCount < ((desiredCoordinators.size() / 2) + 1)) && (i < desiredCoordinators.size())) {
auto process = g_simulator.getProcessByAddress(desiredCoordinators[i]);
auto addresses = process->addresses;
if (!process->isReliable()) {
@ -864,14 +861,14 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
if (addresses.secondaryAddress.present()) {
g_simulator.protectedAddresses.insert(process->addresses.secondaryAddress.get());
}
TraceEvent("ProtectCoordinator").detail("Address", conn->coordinators()[i]).backtrace();
TraceEvent("ProtectCoordinator").detail("Address", desiredCoordinators[i]).backtrace();
protectedCount++;
i++;
}
}
std::vector<Future<Optional<LeaderInfo>>> leaderServers;
ClientCoordinators coord(Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(*conn)));
ClientCoordinators coord(Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(conn)));
leaderServers.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
@ -883,7 +880,7 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
when(wait(waitForAll(leaderServers))) {}
when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; }
}
tr->set(coordinatorsKey, conn->toString());
tr->set(coordinatorsKey, conn.toString());
return Optional<CoordinatorsResult>();
}
@ -909,11 +906,12 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
old.clusterKeyName().toString() != cx->getConnectionRecord()->getConnectionString().clusterKeyName())
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
if (!desiredCoordinators.size()) {
std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
&tr,
old.coordinators(),
oldCoordinators,
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result));
desiredCoordinators = _desiredCoordinators;
@ -937,7 +935,7 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
if (newName.empty())
newName = old.clusterKeyName().toString();
if (old.coordinators() == desiredCoordinators && old.clusterKeyName() == newName)
if (oldCoordinators == desiredCoordinators && old.clusterKeyName() == newName)
return retries ? CoordinatorsResult::SUCCESS : CoordinatorsResult::SAME_NETWORK_ADDRESSES;
state ClusterConnectionString conn(
@ -1075,9 +1073,16 @@ struct AutoQuorumChange final : IQuorumChange {
std::vector<Future<Optional<LeaderInfo>>> leaderServers;
leaderServers.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
GetLeaderRequest(coord.clusterKey, UID()),
TaskPriority::CoordinationReply));
if (coord.clientLeaderServers[i].hostname.present()) {
leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coord.clusterKey, UID()),
coord.clientLeaderServers[i].hostname.get(),
WLTOKEN_CLIENTLEADERREG_GETLEADER,
TaskPriority::CoordinationReply));
} else {
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
GetLeaderRequest(coord.clusterKey, UID()),
TaskPriority::CoordinationReply));
}
}
Optional<std::vector<Optional<LeaderInfo>>> results =
wait(timeout(getAll(leaderServers), CLIENT_KNOBS->IS_ACCEPTABLE_DELAY));

View File

@ -56,7 +56,7 @@ struct IQuorumChange : ReferenceCounted<IQuorumChange> {
// Change to use the given set of coordination servers
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
Reference<IQuorumChange> change,
ClusterConnectionString* conn);
std::vector<NetworkAddress> desiredCoordinators);
ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change);
Reference<IQuorumChange> autoQuorumChange(int desired = -1);
Reference<IQuorumChange> noQuorumChange();
@ -146,7 +146,7 @@ ACTOR Future<bool> setHealthyZone(Database cx, StringRef zoneId, double seconds,
ACTOR Future<Void> waitForPrimaryDC(Database cx, StringRef dcId);
// Gets the cluster connection string
ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx);
ACTOR Future<Optional<ClusterConnectionString>> getConnectionString(Database cx);
void schemaCoverage(std::string const& spath, bool covered = true);
bool schemaMatch(json_spirit::mValue const& schema,

View File

@ -77,18 +77,6 @@ void IClusterConnectionRecord::setPersisted() {
connectionStringNeedsPersisted = false;
}
ClusterConnectionString::ConnectionStringStatus IClusterConnectionRecord::connectionStringStatus() const {
return cs.status;
}
Future<Void> IClusterConnectionRecord::resolveHostnames() {
return cs.resolveHostnames();
}
void IClusterConnectionRecord::resolveHostnamesBlocking() {
cs.resolveHostnamesBlocking();
}
std::string ClusterConnectionString::getErrorString(std::string const& source, Error const& e) {
if (e.code() == error_code_connection_string_invalid) {
return format("Invalid connection string `%s: %d %s", source.c_str(), e.code(), e.what());
@ -97,101 +85,19 @@ std::string ClusterConnectionString::getErrorString(std::string const& source, E
}
}
ACTOR Future<Void> resolveHostnamesImpl(ClusterConnectionString* self) {
loop {
if (self->status == ClusterConnectionString::UNRESOLVED) {
self->status = ClusterConnectionString::RESOLVING;
std::vector<Future<Void>> fs;
for (auto const& hostname : self->hostnames) {
fs.push_back(map(INetworkConnections::net()->resolveTCPEndpoint(hostname.host, hostname.service),
[=](std::vector<NetworkAddress> const& addresses) -> Void {
NetworkAddress address =
addresses[deterministicRandom()->randomInt(0, addresses.size())];
address.flags = 0; // Reset the parsed address to public
address.fromHostname = NetworkAddressFromHostname::True;
if (hostname.isTLS) {
address.flags |= NetworkAddress::FLAG_TLS;
}
self->addResolved(hostname, address);
return Void();
}));
}
wait(waitForAll(fs));
std::sort(self->coords.begin(), self->coords.end());
if (std::unique(self->coords.begin(), self->coords.end()) != self->coords.end()) {
self->status = ClusterConnectionString::UNRESOLVED;
self->resolveFinish.trigger();
throw connection_string_invalid();
}
self->status = ClusterConnectionString::RESOLVED;
self->resolveFinish.trigger();
break;
} else if (self->status == ClusterConnectionString::RESOLVING) {
wait(self->resolveFinish.onTrigger());
if (self->status == ClusterConnectionString::RESOLVED) {
break;
}
// Otherwise, this means other threads failed on resolve, so here we go back to the loop and try to resolve
// again.
} else {
// status is RESOLVED, nothing to do.
break;
}
}
return Void();
}
Future<Void> ClusterConnectionString::resolveHostnames() {
return resolveHostnamesImpl(this);
}
void ClusterConnectionString::resolveHostnamesBlocking() {
if (status != RESOLVED) {
status = RESOLVING;
for (auto const& hostname : hostnames) {
std::vector<NetworkAddress> addresses =
INetworkConnections::net()->resolveTCPEndpointBlocking(hostname.host, hostname.service);
NetworkAddress address = addresses[deterministicRandom()->randomInt(0, addresses.size())];
address.flags = 0; // Reset the parsed address to public
address.fromHostname = NetworkAddressFromHostname::True;
if (hostname.isTLS) {
address.flags |= NetworkAddress::FLAG_TLS;
}
addResolved(hostname, address);
}
std::sort(coords.begin(), coords.end());
if (std::unique(coords.begin(), coords.end()) != coords.end()) {
status = UNRESOLVED;
throw connection_string_invalid();
}
status = RESOLVED;
}
}
void ClusterConnectionString::resetToUnresolved() {
if (status == RESOLVED && hostnames.size() > 0) {
coords.clear();
hostnames.clear();
networkAddressToHostname.clear();
status = UNRESOLVED;
parseConnString();
}
}
void ClusterConnectionString::resetConnectionString() {
connectionString = toString();
}
void ClusterConnectionString::parseConnString() {
ClusterConnectionString::ClusterConnectionString(const std::string& connectionString) {
auto trimmed = trim(connectionString);
// Split on '@' into key@addrs
int pAt = connectionString.find_first_of('@');
if (pAt == connectionString.npos) {
int pAt = trimmed.find_first_of('@');
if (pAt == trimmed.npos) {
throw connection_string_invalid();
}
std::string key = connectionString.substr(0, pAt);
std::string addrs = connectionString.substr(pAt + 1);
std::string key = trimmed.substr(0, pAt);
std::string addrs = trimmed.substr(pAt + 1);
parseKey(key);
std::set<Hostname> hostnameSet;
std::set<NetworkAddress> addressSet;
std::string curAddr;
for (int p = 0; p <= addrs.size();) {
int pComma = addrs.find_first_of(',', p);
@ -199,31 +105,29 @@ void ClusterConnectionString::parseConnString() {
pComma = addrs.size();
curAddr = addrs.substr(p, pComma - p);
if (Hostname::isHostname(curAddr)) {
Hostname h = Hostname::parse(curAddr);
// Check that there are no duplicate hostnames
if (hostnameSet.find(h) != hostnameSet.end()) {
throw connection_string_invalid();
}
hostnames.push_back(Hostname::parse(curAddr));
hostnameSet.insert(h);
} else {
coords.push_back(NetworkAddress::parse(curAddr));
NetworkAddress n = NetworkAddress::parse(curAddr);
// Check that there are no duplicate addresses
if (addressSet.find(n) != addressSet.end()) {
throw connection_string_invalid();
}
coords.push_back(n);
addressSet.insert(n);
}
p = pComma + 1;
}
if (hostnames.size() > 0) {
status = UNRESOLVED;
}
ASSERT((coords.size() + hostnames.size()) > 0);
std::sort(coords.begin(), coords.end());
// Check that there are no duplicate addresses
if (std::unique(coords.begin(), coords.end()) != coords.end()) {
throw connection_string_invalid();
}
}
ClusterConnectionString::ClusterConnectionString(const std::string& connStr) {
connectionString = trim(connStr);
parseConnString();
}
TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
std::string input;
state std::string input;
{
input = "asdf:2345@1.1.1.1:345";
@ -231,6 +135,15 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
ASSERT(input == cs.toString());
}
{
input = "asdf:2345@1.1.1.1:345,1.1.1.1:345";
try {
ClusterConnectionString cs(input);
} catch (Error& e) {
ASSERT(e.code() == error_code_connection_string_invalid);
}
}
{
input = "0xxdeadbeef:100100100@1.1.1.1:34534,5.1.5.3:23443";
ClusterConnectionString cs(input);
@ -274,20 +187,27 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
}
TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
std::string input;
state std::string input;
{
input = "asdf:2345@localhost:1234";
ClusterConnectionString cs(input);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 1);
ASSERT(input == cs.toString());
}
{
input = "asdf:2345@localhost:1234,localhost:1234";
try {
ClusterConnectionString cs(input);
} catch (Error& e) {
ASSERT(e.code() == error_code_connection_string_invalid);
}
}
{
input = "0xxdeadbeef:100100100@localhost:34534,host-name:23443";
ClusterConnectionString cs(input);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(input == cs.toString());
}
@ -300,7 +220,6 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
commented += "# asdfasdf ##";
ClusterConnectionString cs(commented);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(input == cs.toString());
}
@ -313,7 +232,6 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
commented += "# asdfasdf ##";
ClusterConnectionString cs(commented);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(input == cs.toString());
}
@ -321,44 +239,30 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
return Void();
}
TEST_CASE("/fdbclient/MonitorLeader/ConnectionString") {
state std::string connectionString = "TestCluster:0@localhost:1234,host-name:5678";
std::string hn1 = "localhost", port1 = "1234";
state std::string hn2 = "host-name";
state std::string port2 = "5678";
state std::vector<Hostname> hostnames;
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
hostnames.push_back(Hostname::parse(hn2 + ":" + port2));
TEST_CASE("/fdbclient/MonitorLeader/ConnectionString/hostname") {
std::string connectionString = "TestCluster:0@localhost:1234,host-name:5678";
std::string hn1 = "localhost", port1 = "1234", hn2 = "host-name", port2 = "5678";
std::vector<Hostname> hostnames;
NetworkAddress address1 = NetworkAddress::parse("127.0.0.0:1234");
NetworkAddress address2 = NetworkAddress::parse("127.0.0.1:5678");
{
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
hostnames.push_back(Hostname::parse(hn2 + ":" + port2));
INetworkConnections::net()->addMockTCPEndpoint(hn1, port1, { address1 });
INetworkConnections::net()->addMockTCPEndpoint(hn2, port2, { address2 });
ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
ASSERT(cs.hostnames.size() == 2);
ASSERT(cs.coordinators().size() == 0);
ASSERT(cs.toString() == connectionString);
}
state ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(cs.coordinators().size() == 0);
wait(cs.resolveHostnames());
ASSERT(cs.status == ClusterConnectionString::RESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(cs.coordinators().size() == 2);
ASSERT(cs.toString() == connectionString);
cs.resetToUnresolved();
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(cs.coordinators().size() == 0);
ASSERT(cs.toString() == connectionString);
INetworkConnections::net()->removeMockTCPEndpoint(hn2, port2);
NetworkAddress address3 = NetworkAddress::parse("127.0.0.0:5678");
INetworkConnections::net()->addMockTCPEndpoint(hn2, port2, { address3 });
try {
wait(cs.resolveHostnames());
} catch (Error& e) {
ASSERT(e.code() == error_code_connection_string_invalid);
{
hostnames.clear();
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
try {
ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
} catch (Error& e) {
ASSERT(e.code() == error_code_connection_string_invalid);
}
}
return Void();
@ -380,6 +284,7 @@ ACTOR Future<std::vector<NetworkAddress>> tryResolveHostnamesImpl(ClusterConnect
allCoordinatorsSet.insert(coord);
}
std::vector<NetworkAddress> allCoordinators(allCoordinatorsSet.begin(), allCoordinatorsSet.end());
std::sort(allCoordinators.begin(), allCoordinators.end());
return allCoordinators;
}
@ -484,17 +389,22 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/fuzz") {
}
ClusterConnectionString::ClusterConnectionString(const std::vector<NetworkAddress>& servers, Key key)
: status(RESOLVED), coords(servers) {
: coords(servers) {
std::set<NetworkAddress> s(servers.begin(), servers.end());
if (s.size() != servers.size()) {
throw connection_string_invalid();
}
std::string keyString = key.toString();
parseKey(keyString);
resetConnectionString();
}
ClusterConnectionString::ClusterConnectionString(const std::vector<Hostname>& hosts, Key key)
: status(UNRESOLVED), hostnames(hosts) {
ClusterConnectionString::ClusterConnectionString(const std::vector<Hostname>& hosts, Key key) : hostnames(hosts) {
std::set<Hostname> h(hosts.begin(), hosts.end());
if (h.size() != hosts.size()) {
throw connection_string_invalid();
}
std::string keyString = key.toString();
parseKey(keyString);
resetConnectionString();
}
void ClusterConnectionString::parseKey(const std::string& key) {
@ -529,13 +439,11 @@ void ClusterConnectionString::parseKey(const std::string& key) {
std::string ClusterConnectionString::toString() const {
std::string s = key.toString();
s += '@';
for (int i = 0; i < coords.size(); i++) {
if (networkAddressToHostname.find(coords[i]) == networkAddressToHostname.end()) {
if (s.find('@') != s.length() - 1) {
s += ',';
}
s += coords[i].toString();
for (auto const& coord : coords) {
if (s.find('@') != s.length() - 1) {
s += ',';
}
s += coord.toString();
}
for (auto const& host : hostnames) {
if (s.find('@') != s.length() - 1) {
@ -547,11 +455,14 @@ std::string ClusterConnectionString::toString() const {
}
ClientCoordinators::ClientCoordinators(Reference<IClusterConnectionRecord> ccr) : ccr(ccr) {
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
ClusterConnectionString cs = ccr->getConnectionString();
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s)
clientLeaderServers.push_back(ClientLeaderRegInterface(*s));
clusterKey = cs.clusterKey();
for (auto h : cs.hostnames) {
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
}
for (auto s : cs.coordinators()) {
clientLeaderServers.push_back(ClientLeaderRegInterface(s));
}
}
ClientCoordinators::ClientCoordinators(Key clusterKey, std::vector<NetworkAddress> coordinators)
@ -576,49 +487,32 @@ ClientLeaderRegInterface::ClientLeaderRegInterface(INetwork* local) {
// Nominee is the worker among all workers that are considered as leader by one coordinator
// This function contacts a coordinator coord to ask who is its nominee.
// Note: for coordinators whose NetworkAddress is parsed out of a hostname, a connection failure will cause this actor
// to throw `coordinators_changed()` error
ACTOR Future<Void> monitorNominee(Key key,
ClientLeaderRegInterface coord,
AsyncTrigger* nomineeChange,
Optional<LeaderInfo>* info,
Optional<Hostname> hostname = Optional<Hostname>()) {
Optional<LeaderInfo>* info) {
loop {
state Optional<LeaderInfo> li;
if (coord.getLeader.getEndpoint().getPrimaryAddress().fromHostname) {
state ErrorOr<Optional<LeaderInfo>> rep =
wait(coord.getLeader.tryGetReply(GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
TaskPriority::CoordinationReply));
if (rep.isError()) {
// Connecting to nominee failed, most likely due to connection failed.
TraceEvent("MonitorNomineeError")
.error(rep.getError())
.detail("Hostname", hostname.present() ? hostname.get().toString() : "UnknownHostname")
.detail("OldAddr", coord.getLeader.getEndpoint().getPrimaryAddress().toString());
if (rep.getError().code() == error_code_request_maybe_delivered) {
// Delay to prevent tight resolving loop due to outdated DNS cache
wait(delay(FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL));
throw coordinators_changed();
} else {
throw rep.getError();
}
} else if (rep.present()) {
li = rep.get();
}
if (coord.hostname.present()) {
wait(store(li,
retryGetReplyFromHostname(GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
coord.hostname.get(),
WLTOKEN_CLIENTLEADERREG_GETLEADER,
TaskPriority::CoordinationReply)));
} else {
Optional<LeaderInfo> tmp =
wait(retryBrokenPromise(coord.getLeader,
GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
TaskPriority::CoordinationReply));
li = tmp;
wait(store(li,
retryBrokenPromise(coord.getLeader,
GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
TaskPriority::CoordinationReply)));
}
wait(Future<Void>(Void())); // Make sure we weren't cancelled
TraceEvent("GetLeaderReply")
.suppressFor(1.0)
.detail("Coordinator", coord.getLeader.getEndpoint().getPrimaryAddress())
.detail("Coordinator",
coord.hostname.present() ? coord.hostname.get().toString()
: coord.getLeader.getEndpoint().getPrimaryAddress().toString())
.detail("Nominee", li.present() ? li.get().changeID : UID())
.detail("ClusterKey", key.printable());
@ -687,74 +581,54 @@ Optional<std::pair<LeaderInfo, bool>> getLeader(const std::vector<Optional<Leade
ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> outSerializedLeaderInfo,
MonitorLeaderInfo info) {
state ClientCoordinators coordinators(info.intermediateConnRecord);
state AsyncTrigger nomineeChange;
state std::vector<Optional<LeaderInfo>> nominees;
state Future<Void> allActors;
nominees.resize(coordinators.clientLeaderServers.size());
state std::vector<Future<Void>> actors;
// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator.
actors.reserve(coordinators.clientLeaderServers.size());
for (int i = 0; i < coordinators.clientLeaderServers.size(); i++) {
actors.push_back(
monitorNominee(coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i]));
}
allActors = waitForAll(actors);
loop {
wait(connRecord->resolveHostnames());
wait(info.intermediateConnRecord->resolveHostnames());
state ClientCoordinators coordinators(info.intermediateConnRecord);
state AsyncTrigger nomineeChange;
state std::vector<Optional<LeaderInfo>> nominees;
state Future<Void> allActors;
nominees.resize(coordinators.clientLeaderServers.size());
state std::vector<Future<Void>> actors;
// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator.
actors.reserve(coordinators.clientLeaderServers.size());
for (int i = 0; i < coordinators.clientLeaderServers.size(); i++) {
Optional<Hostname> hostname;
auto r = connRecord->getConnectionString().networkAddressToHostname.find(
coordinators.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress());
if (r != connRecord->getConnectionString().networkAddressToHostname.end()) {
hostname = r->second;
}
actors.push_back(monitorNominee(
coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i], hostname));
}
allActors = waitForAll(actors);
loop {
Optional<std::pair<LeaderInfo, bool>> leader = getLeader(nominees);
TraceEvent("MonitorLeaderChange")
.detail("NewLeader", leader.present() ? leader.get().first.changeID : UID(1, 1));
if (leader.present()) {
if (leader.get().first.forward) {
TraceEvent("MonitorLeaderForwarding")
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
.detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString())
.trackLatest("MonitorLeaderForwarding");
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
ClusterConnectionString(leader.get().first.serializedInfo.toString()));
return info;
}
if (connRecord != info.intermediateConnRecord) {
if (!info.hasConnected) {
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
.detail("ClusterFile", connRecord->toString())
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
.detail("CurrentConnectionString",
info.intermediateConnRecord->getConnectionString().toString());
}
connRecord->setAndPersistConnectionString(info.intermediateConnRecord->getConnectionString());
info.intermediateConnRecord = connRecord;
}
info.hasConnected = true;
connRecord->notifyConnected();
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
}
try {
wait(nomineeChange.onTrigger() || allActors);
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
TraceEvent("MonitorLeaderCoordinatorsChanged").suppressFor(1.0);
connRecord->getConnectionString().resetToUnresolved();
break;
} else {
throw e;
}
Optional<std::pair<LeaderInfo, bool>> leader = getLeader(nominees);
TraceEvent("MonitorLeaderChange")
.detail("NewLeader", leader.present() ? leader.get().first.changeID : UID(1, 1));
if (leader.present()) {
if (leader.get().first.forward) {
TraceEvent("MonitorLeaderForwarding")
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
.detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString())
.trackLatest("MonitorLeaderForwarding");
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
ClusterConnectionString(leader.get().first.serializedInfo.toString()));
return info;
}
if (connRecord != info.intermediateConnRecord) {
if (!info.hasConnected) {
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
.detail("ClusterFile", connRecord->toString())
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
.detail("CurrentConnectionString",
info.intermediateConnRecord->getConnectionString().toString());
}
connRecord->setAndPersistConnectionString(info.intermediateConnRecord->getConnectionString());
info.intermediateConnRecord = connRecord;
}
info.hasConnected = true;
connRecord->notifyConnected();
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
}
wait(nomineeChange.onTrigger() || allActors);
}
}
@ -885,10 +759,10 @@ ACTOR Future<Void> getClientInfoFromLeader(Reference<AsyncVar<Optional<ClusterCo
}
ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
std::vector<Hostname> hostnames,
std::vector<NetworkAddress> coordinators,
ClientData* clientData,
Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo,
Reference<AsyncVar<Void>> coordinatorsChanged) {
Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo) {
state std::vector<ClientLeaderRegInterface> clientLeaderServers;
state AsyncTrigger nomineeChange;
state std::vector<Optional<LeaderInfo>> nominees;
@ -896,8 +770,12 @@ ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
state Reference<AsyncVar<Optional<ClusterControllerClientInterface>>> knownLeader(
new AsyncVar<Optional<ClusterControllerClientInterface>>{});
for (auto s = coordinators.begin(); s != coordinators.end(); ++s) {
clientLeaderServers.push_back(ClientLeaderRegInterface(*s));
clientLeaderServers.reserve(hostnames.size() + coordinators.size());
for (auto h : hostnames) {
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
}
for (auto s : coordinators) {
clientLeaderServers.push_back(ClientLeaderRegInterface(s));
}
nominees.resize(clientLeaderServers.size());
@ -936,14 +814,7 @@ ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
leaderInfo->set(leader.get().first);
}
}
try {
wait(nomineeChange.onTrigger() || allActors);
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
coordinatorsChanged->trigger();
}
throw e;
}
wait(nomineeChange.onTrigger() || allActors);
}
}
@ -995,7 +866,7 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
Key traceLogGroup) {
state ClusterConnectionString cs = info.intermediateConnRecord->getConnectionString();
state std::vector<NetworkAddress> addrs = cs.coordinators();
state int coordinatorsSize = cs.hostnames.size() + cs.coordinators().size();
state int index = 0;
state int successIndex = 0;
state Optional<double> incorrectTime;
@ -1003,15 +874,26 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
state std::vector<CommitProxyInterface> lastCommitProxies;
state std::vector<UID> lastGrvProxyUIDs;
state std::vector<GrvProxyInterface> lastGrvProxies;
state std::vector<ClientLeaderRegInterface> clientLeaderServers;
clientLeaderServers.reserve(coordinatorsSize);
for (const auto& h : cs.hostnames) {
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
}
for (const auto& c : cs.coordinators()) {
clientLeaderServers.push_back(ClientLeaderRegInterface(c));
}
deterministicRandom()->randomShuffle(clientLeaderServers);
deterministicRandom()->randomShuffle(addrs);
loop {
state ClientLeaderRegInterface clientLeaderServer(addrs[index]);
state ClientLeaderRegInterface clientLeaderServer = clientLeaderServers[index];
state OpenDatabaseCoordRequest req;
coordinator->set(clientLeaderServer);
req.clusterKey = cs.clusterKey();
req.hostnames = cs.hostnames;
req.coordinators = cs.coordinators();
req.knownClientInfoID = clientInfo->get().id;
req.supportedVersions = supportedVersions->get();
@ -1040,8 +922,16 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
incorrectTime = Optional<double>();
}
state ErrorOr<CachedSerialization<ClientDBInfo>> rep =
wait(clientLeaderServer.openDatabase.tryGetReply(req, TaskPriority::CoordinationReply));
state ErrorOr<CachedSerialization<ClientDBInfo>> rep;
if (clientLeaderServer.hostname.present()) {
wait(store(rep,
tryGetReplyFromHostname(req,
clientLeaderServer.hostname.get(),
WLTOKEN_CLIENTLEADERREG_OPENDATABASE,
TaskPriority::CoordinationReply)));
} else {
wait(store(rep, clientLeaderServer.openDatabase.tryGetReply(req, TaskPriority::CoordinationReply)));
}
if (rep.present()) {
if (rep.get().read().forward.present()) {
TraceEvent("MonitorProxiesForwarding")
@ -1072,15 +962,10 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
successIndex = index;
} else {
TEST(rep.getError().code() == error_code_failed_to_progress); // Coordinator cant talk to cluster controller
if (rep.getError().code() == error_code_coordinators_changed) {
throw coordinators_changed();
}
index = (index + 1) % addrs.size();
TEST(rep.getError().code() == error_code_lookup_failed); // Coordinator hostname resolving failure
index = (index + 1) % coordinatorsSize;
if (index == successIndex) {
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
// When the client fails talking to all coordinators, we throw coordinators_changed() and let the caller
// re-resolve the connection string and retry.
throw coordinators_changed();
}
}
}
@ -1092,27 +977,16 @@ ACTOR Future<Void> monitorProxies(
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
Key traceLogGroup) {
wait(connRecord->get()->resolveHostnames());
state MonitorLeaderInfo info(connRecord->get());
loop {
try {
wait(info.intermediateConnRecord->resolveHostnames());
choose {
when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration(
connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
info = _info;
}
when(wait(connRecord->onChange())) {
info.hasConnected = false;
info.intermediateConnRecord = connRecord->get();
}
choose {
when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration(
connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
info = _info;
}
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
TraceEvent("MonitorProxiesCoordinatorsChanged").suppressFor(1.0);
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
} else {
throw e;
when(wait(connRecord->onChange())) {
info.hasConnected = false;
info.intermediateConnRecord = connRecord->get();
}
}
}

View File

@ -75,10 +75,10 @@ Future<Void> monitorLeader(Reference<IClusterConnectionRecord> const& connFile,
// nominees, the nominee with the most nomination is the leader, and collects client data from the leader. This function
// also monitors the change of the leader.
Future<Void> monitorLeaderAndGetClientInfo(Key const& clusterKey,
std::vector<Hostname> const& hostnames,
std::vector<NetworkAddress> const& coordinators,
ClientData* const& clientData,
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo,
Reference<AsyncVar<Void>> const& coordinatorsChanged);
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo);
Future<Void> monitorProxies(
Reference<AsyncVar<Reference<IClusterConnectionRecord>>> const& connRecord,

View File

@ -18,6 +18,10 @@
* limitations under the License.
*/
#ifdef ADDRESS_SANITIZER
#include <sanitizer/lsan_interface.h>
#endif
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/GenericManagementAPI.actor.h"
@ -2763,6 +2767,11 @@ template <class T>
THREAD_FUNC runSingleAssignmentVarTest(void* arg) {
noUnseed = true;
// This test intentionally leaks memory
#ifdef ADDRESS_SANITIZER
__lsan::ScopedDisabler disableLeakChecks;
#endif
volatile bool* done = (volatile bool*)arg;
try {
for (int i = 0; i < 25; ++i) {

View File

@ -5100,10 +5100,10 @@ Future<Optional<Value>> Transaction::get(const Key& key, Snapshot snapshot) {
++trState->cx->transactionGetValueRequests;
// ASSERT (key < allKeys.end);
// There are no keys in the database with size greater than KEY_SIZE_LIMIT
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
// There are no keys in the database with size greater than the max key size
if (key.size() > getMaxReadKeySize(key)) {
return Optional<Value>();
}
auto ver = getReadVersion();
@ -5484,23 +5484,19 @@ Future<Void> Transaction::getRangeStream(const PromiseStream<RangeResult>& resul
void Transaction::addReadConflictRange(KeyRangeRef const& keys) {
ASSERT(!keys.empty());
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin;
KeyRef end = keys.end;
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxReadKeySize(begin);
int64_t endMaxSize = getMaxReadKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
KeyRangeRef r = KeyRangeRef(begin, end);
@ -5522,8 +5518,7 @@ void Transaction::makeSelfConflicting() {
void Transaction::set(const KeyRef& key, const ValueRef& value, AddConflictRange addConflictRange) {
++trState->cx->transactionSetMutations;
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxWriteKeySize(key, trState->options.rawAccess))
throw key_too_large();
if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large();
@ -5544,8 +5539,7 @@ void Transaction::atomicOp(const KeyRef& key,
MutationRef::Type operationType,
AddConflictRange addConflictRange) {
++trState->cx->transactionAtomicMutations;
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxWriteKeySize(key, trState->options.rawAccess))
throw key_too_large();
if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large();
@ -5578,20 +5572,16 @@ void Transaction::clear(const KeyRangeRef& range, AddConflictRange addConflictRa
KeyRef begin = range.begin;
KeyRef end = range.end;
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxClearKeySize(begin);
int64_t endMaxSize = getMaxClearKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
auto r = KeyRangeRef(req.arena, KeyRangeRef(begin, end));
if (r.empty())
@ -5604,10 +5594,10 @@ void Transaction::clear(const KeyRangeRef& range, AddConflictRange addConflictRa
}
void Transaction::clear(const KeyRef& key, AddConflictRange addConflictRange) {
++trState->cx->transactionClearMutations;
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
// There aren't any keys in the database with size larger than the max key size
if (key.size() > getMaxClearKeySize(key)) {
return;
}
auto& req = tr;
auto& t = req.transaction;
@ -5626,24 +5616,19 @@ void Transaction::addWriteConflictRange(const KeyRangeRef& keys) {
auto& req = tr;
auto& t = req.transaction;
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin;
KeyRef end = keys.end;
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxKeySize(begin);
int64_t endMaxSize = getMaxKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
KeyRangeRef r = KeyRangeRef(begin, end);
if (r.empty()) {
@ -6942,11 +6927,18 @@ Future<Standalone<StringRef>> Transaction::getVersionstamp() {
}
// Gets the protocol version reported by a coordinator via the protocol info interface
ACTOR Future<ProtocolVersion> getCoordinatorProtocol(NetworkAddressList coordinatorAddresses) {
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint::wellKnown({ coordinatorAddresses },
WLTOKEN_PROTOCOL_INFO) };
ProtocolInfoReply reply = wait(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
ACTOR Future<ProtocolVersion> getCoordinatorProtocol(
Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator) {
state ProtocolInfoReply reply;
if (coordinator->get().get().hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(
ProtocolInfoRequest{}, coordinator->get().get().hostname.get(), WLTOKEN_PROTOCOL_INFO)));
} else {
RequestStream<ProtocolInfoRequest> requestStream(
Endpoint::wellKnown({ coordinator->get().get().getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO));
wait(store(reply, retryBrokenPromise(requestStream, ProtocolInfoRequest{})));
}
return reply.version;
}
@ -6955,8 +6947,16 @@ ACTOR Future<ProtocolVersion> getCoordinatorProtocol(NetworkAddressList coordina
// function will return with an unset result.
// If an expected version is given, this future won't return if the actual protocol version matches the expected version
ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket(
NetworkAddress coordinatorAddress,
Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator,
Optional<ProtocolVersion> expectedVersion) {
state NetworkAddress coordinatorAddress;
if (coordinator->get().get().hostname.present()) {
Hostname h = coordinator->get().get().hostname.get();
wait(store(coordinatorAddress, h.resolveWithRetry()));
} else {
coordinatorAddress = coordinator->get().get().getLeader.getEndpoint().getPrimaryAddress();
}
state Reference<AsyncVar<Optional<ProtocolVersion>> const> protocolVersion =
FlowTransport::transport().getPeerProtocolAsyncVar(coordinatorAddress);
@ -6991,11 +6991,10 @@ ACTOR Future<ProtocolVersion> getClusterProtocolImpl(
if (!coordinator->get().present()) {
wait(coordinator->onChange());
} else {
Endpoint coordinatorEndpoint = coordinator->get().get().getLeader.getEndpoint();
if (needToConnect) {
// Even though we typically rely on the connect packet to get the protocol version, we need to send some
// request in order to start a connection. This protocol version request serves that purpose.
protocolVersion = getCoordinatorProtocol(coordinatorEndpoint.addresses);
protocolVersion = getCoordinatorProtocol(coordinator);
needToConnect = false;
}
choose {
@ -7011,8 +7010,8 @@ ACTOR Future<ProtocolVersion> getClusterProtocolImpl(
// Older versions of FDB don't have an endpoint to return the protocol version, so we get this info from
// the connect packet
when(Optional<ProtocolVersion> pv = wait(getCoordinatorProtocolFromConnectPacket(
coordinatorEndpoint.getPrimaryAddress(), expectedVersion))) {
when(Optional<ProtocolVersion> pv =
wait(getCoordinatorProtocolFromConnectPacket(coordinator, expectedVersion))) {
if (pv.present()) {
return pv.get();
} else {
@ -8186,14 +8185,20 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, std::vector<AddressExclusion
throw;
}
TraceEvent("ExclusionSafetyCheckCoordinators").log();
wait(cx->getConnectionRecord()->resolveHostnames());
state ClientCoordinators coordinatorList(cx->getConnectionRecord());
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
leaderServers.reserve(coordinatorList.clientLeaderServers.size());
for (int i = 0; i < coordinatorList.clientLeaderServers.size(); i++) {
leaderServers.push_back(retryBrokenPromise(coordinatorList.clientLeaderServers[i].getLeader,
GetLeaderRequest(coordinatorList.clusterKey, UID()),
TaskPriority::CoordinationReply));
if (coordinatorList.clientLeaderServers[i].hostname.present()) {
leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coordinatorList.clusterKey, UID()),
coordinatorList.clientLeaderServers[i].hostname.get(),
WLTOKEN_CLIENTLEADERREG_GETLEADER,
TaskPriority::CoordinationReply));
} else {
leaderServers.push_back(retryBrokenPromise(coordinatorList.clientLeaderServers[i].getLeader,
GetLeaderRequest(coordinatorList.clusterKey, UID()),
TaskPriority::CoordinationReply));
}
}
// Wait for quorum so we don't dismiss live coordinators as unreachable by acting too fast
choose {
@ -9395,3 +9400,21 @@ ACTOR Future<Void> waitPurgeGranulesCompleteActor(Reference<DatabaseContext> db,
Future<Void> DatabaseContext::waitPurgeGranulesComplete(Key purgeKey) {
return waitPurgeGranulesCompleteActor(Reference<DatabaseContext>::addRef(this), purgeKey);
}
int64_t getMaxKeySize(KeyRef const& key) {
return getMaxWriteKeySize(key, true);
}
int64_t getMaxReadKeySize(KeyRef const& key) {
return getMaxKeySize(key);
}
int64_t getMaxWriteKeySize(KeyRef const& key, bool hasRawAccess) {
int64_t tenantSize = hasRawAccess ? CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT : 0;
return key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT + tenantSize;
}
int64_t getMaxClearKeySize(KeyRef const& key) {
return getMaxKeySize(key);
}

View File

@ -539,5 +539,19 @@ ACTOR Future<std::vector<std::pair<UID, StorageWiggleValue>>> readStorageWiggleV
bool primary,
bool use_system_priority);
// Returns the maximum legal size of a key. This size will be determined by the prefix of the passed in key
// (system keys have a larger maximum size). This should be used for generic max key size requests.
int64_t getMaxKeySize(KeyRef const& key);
// Returns the maximum legal size of a key that can be read. Keys larger than this will be assumed not to exist.
int64_t getMaxReadKeySize(KeyRef const& key);
// Returns the maximum legal size of a key that can be written. If using raw access, writes to normal keys will
// be allowed to be slighly larger to accommodate the prefix.
int64_t getMaxWriteKeySize(KeyRef const& key, bool hasRawAccess);
// Returns the maximum legal size of a key that can be cleared. Keys larger than this will be assumed not to exist.
int64_t getMaxClearKeySize(KeyRef const& key);
#include "flow/unactorcompiler.h"
#endif

View File

@ -59,8 +59,14 @@ class CommitQuorum {
ConfigGeneration generation,
ConfigTransactionInterface cti) {
try {
wait(timeoutError(cti.commit.getReply(self->getCommitRequest(generation)),
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
if (cti.hostname.present()) {
wait(timeoutError(retryGetReplyFromHostname(
self->getCommitRequest(generation), cti.hostname.get(), WLTOKEN_CONFIGTXN_COMMIT),
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
} else {
wait(timeoutError(cti.commit.getReply(self->getCommitRequest(generation)),
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
}
++self->successful;
} catch (Error& e) {
// self might be destroyed if this actor is cancelled
@ -122,9 +128,20 @@ class GetGenerationQuorum {
ACTOR static Future<Void> addRequestActor(GetGenerationQuorum* self, ConfigTransactionInterface cti) {
loop {
try {
ConfigTransactionGetGenerationReply reply = wait(timeoutError(
cti.getGeneration.getReply(ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion }),
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
state ConfigTransactionGetGenerationReply reply;
if (cti.hostname.present()) {
wait(timeoutError(store(reply,
retryGetReplyFromHostname(
ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion },
cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETGENERATION)),
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
} else {
wait(timeoutError(store(reply,
cti.getGeneration.getReply(
ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion })),
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
}
++self->totalRepliesReceived;
auto gen = reply.generation;
@ -225,9 +242,18 @@ class PaxosConfigTransactionImpl {
state ConfigKey configKey = ConfigKey::decodeKey(key);
loop {
try {
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state Reference<ConfigTransactionInfo> configNodes(
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state std::vector<ConfigTransactionInterface> readReplicas =
self->getGenerationQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigTransactionInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&readReplica.get, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GET));
}
}
wait(waitForAll(fs));
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
ConfigTransactionGetReply reply =
wait(timeoutError(basicLoadBalance(configNodes,
&ConfigTransactionInterface::get,
@ -248,9 +274,17 @@ class PaxosConfigTransactionImpl {
}
ACTOR static Future<RangeResult> getConfigClasses(PaxosConfigTransactionImpl* self) {
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state Reference<ConfigTransactionInfo> configNodes(
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state std::vector<ConfigTransactionInterface> readReplicas = self->getGenerationQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigTransactionInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&readReplica.getClasses, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETCLASSES));
}
}
wait(waitForAll(fs));
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
ConfigTransactionGetConfigClassesReply reply =
wait(basicLoadBalance(configNodes,
&ConfigTransactionInterface::getClasses,
@ -264,9 +298,17 @@ class PaxosConfigTransactionImpl {
}
ACTOR static Future<RangeResult> getKnobs(PaxosConfigTransactionImpl* self, Optional<Key> configClass) {
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state Reference<ConfigTransactionInfo> configNodes(
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state std::vector<ConfigTransactionInterface> readReplicas = self->getGenerationQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigTransactionInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&readReplica.getKnobs, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETKNOBS));
}
}
wait(waitForAll(fs));
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
ConfigTransactionGetKnobsReply reply =
wait(basicLoadBalance(configNodes,
&ConfigTransactionInterface::getKnobs,
@ -366,10 +408,13 @@ public:
Future<Void> commit() { return commit(this); }
PaxosConfigTransactionImpl(Database const& cx) : cx(cx) {
auto coordinators = cx->getConnectionRecord()->getConnectionString().coordinators();
ctis.reserve(coordinators.size());
for (const auto& coordinator : coordinators) {
ctis.emplace_back(coordinator);
const ClusterConnectionString& cs = cx->getConnectionRecord()->getConnectionString();
ctis.reserve(cs.hostnames.size() + cs.coordinators().size());
for (const auto& h : cs.hostnames) {
ctis.emplace_back(h);
}
for (const auto& c : cs.coordinators()) {
ctis.emplace_back(c);
}
getGenerationQuorum = GetGenerationQuorum{ ctis };
commitQuorum = CommitQuorum{ ctis };

View File

@ -19,6 +19,7 @@
*/
#include "fdbclient/ReadYourWrites.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/Atomic.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/SpecialKeySpace.actor.h"
@ -1578,10 +1579,10 @@ Future<Optional<Value>> ReadYourWritesTransaction::get(const Key& key, Snapshot
if (key >= getMaxReadKey() && key != metadataVersionKey)
return key_outside_legal_range();
// There are no keys in the database with size greater than KEY_SIZE_LIMIT
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
// There are no keys in the database with size greater than the max key size
if (key.size() > getMaxReadKeySize(key)) {
return Optional<Value>();
}
Future<Optional<Value>> result = RYWImpl::readWithConflictRange(this, RYWImpl::GetValueReq(key), snapshot);
reading.add(success(result));
@ -1822,23 +1823,19 @@ void ReadYourWritesTransaction::addReadConflictRange(KeyRangeRef const& keys) {
}
}
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin;
KeyRef end = keys.end;
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxReadKeySize(begin);
int64_t endMaxSize = getMaxReadKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
KeyRangeRef r = KeyRangeRef(begin, end);
@ -2111,9 +2108,9 @@ void ReadYourWritesTransaction::atomicOp(const KeyRef& key, const ValueRef& oper
if (!isValidMutationType(operationType) || !isAtomicOp((MutationRef::Type)operationType))
throw invalid_mutation_type();
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
throw key_too_large();
}
if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large();
@ -2218,9 +2215,9 @@ void ReadYourWritesTransaction::set(const KeyRef& key, const ValueRef& value) {
}
// TODO: check transaction size here
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
throw key_too_large();
}
if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large();
@ -2254,23 +2251,19 @@ void ReadYourWritesTransaction::clear(const KeyRangeRef& range) {
return tr.clear(range, addWriteConflict);
}
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
KeyRef begin = range.begin;
KeyRef end = range.end;
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxClearKeySize(begin);
int64_t endMaxSize = getMaxClearKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
KeyRangeRef r = KeyRangeRef(begin, end);
@ -2300,9 +2293,9 @@ void ReadYourWritesTransaction::clear(const KeyRef& key) {
if (key >= getMaxWriteKey())
throw key_outside_legal_range();
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxClearKeySize(key)) {
return;
}
if (options.readYourWritesDisabled) {
return tr.clear(key, addWriteConflict);
@ -2332,9 +2325,9 @@ Future<Void> ReadYourWritesTransaction::watch(const Key& key) {
if (key >= allKeys.end || (key >= getMaxReadKey() && key != metadataVersionKey && tr.apiVersionAtLeast(300)))
return key_outside_legal_range();
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
return key_too_large();
}
return RYWImpl::watch(this, key);
}
@ -2350,23 +2343,19 @@ void ReadYourWritesTransaction::addWriteConflictRange(KeyRangeRef const& keys) {
}
}
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin;
KeyRef end = keys.end;
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxKeySize(begin);
int64_t endMaxSize = getMaxKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
KeyRangeRef r = KeyRangeRef(begin, end);

View File

@ -25,9 +25,15 @@
#include "fdbclient/sha1/SHA1.h"
#include <time.h>
#include <iomanip>
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/sha.h>
#include <openssl/evp.h>
#include <openssl/hmac.h>
#if defined(HAVE_WOLFSSL)
#undef SHA1 // wolfSSL will will shadow FDB SHA1.h
#endif
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string.hpp>

View File

@ -450,6 +450,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1;
init( MAX_PROXY_COMPUTE, 2.0 );
init( MAX_COMPUTE_PER_OPERATION, 0.1 );
init( MAX_COMPUTE_DURATION_LOG_CUTOFF, 0.05 );
init( PROXY_COMPUTE_BUCKETS, 20000 );
init( PROXY_COMPUTE_GROWTH_RATE, 0.01 );
init( TXN_STATE_SEND_AMOUNT, 4 );
@ -541,6 +542,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( CC_ENABLE_ENTIRE_SATELLITE_MONITORING, false );
init( CC_SATELLITE_DEGRADATION_MIN_COMPLAINER, 3 );
init( CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER, 3 );
init( CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL, 0.5 );
init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0;
init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit );

View File

@ -374,6 +374,7 @@ public:
int MAX_COMMIT_UPDATES;
double MAX_PROXY_COMPUTE;
double MAX_COMPUTE_PER_OPERATION;
double MAX_COMPUTE_DURATION_LOG_CUTOFF;
int PROXY_COMPUTE_BUCKETS;
double PROXY_COMPUTE_GROWTH_RATE;
int TXN_STATE_SEND_AMOUNT;
@ -480,6 +481,8 @@ public:
// be determined as degraded worker.
int CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER; // The minimum amount of degraded server in satellite DC to be
// determined as degraded satellite.
double CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL; // The interval to prevent re-recruiting the same singleton if a
// recruiting fight between two cluster controllers occurs.
// Knobs used to select the best policy (via monte carlo)
int POLICY_RATING_TESTS; // number of tests per policy (in order to compare)

View File

@ -41,9 +41,15 @@ class SimpleConfigTransactionImpl {
if (self->dID.present()) {
TraceEvent("SimpleConfigTransactionGettingReadVersion", self->dID.get());
}
ConfigTransactionGetGenerationRequest req;
ConfigTransactionGetGenerationReply reply =
wait(retryBrokenPromise(self->cti.getGeneration, ConfigTransactionGetGenerationRequest{}));
state ConfigTransactionGetGenerationReply reply;
if (self->cti.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetGenerationRequest{},
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETGENERATION)));
} else {
wait(store(reply, retryBrokenPromise(self->cti.getGeneration, ConfigTransactionGetGenerationRequest{})));
}
if (self->dID.present()) {
TraceEvent("SimpleConfigTransactionGotReadVersion", self->dID.get())
.detail("Version", reply.generation.liveVersion);
@ -62,8 +68,15 @@ class SimpleConfigTransactionImpl {
.detail("ConfigClass", configKey.configClass)
.detail("KnobName", configKey.knobName);
}
ConfigTransactionGetReply reply =
wait(retryBrokenPromise(self->cti.get, ConfigTransactionGetRequest{ generation, configKey }));
state ConfigTransactionGetReply reply;
if (self->cti.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetRequest{ generation, configKey },
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GET)));
} else {
wait(store(reply, retryBrokenPromise(self->cti.get, ConfigTransactionGetRequest{ generation, configKey })));
}
if (self->dID.present()) {
TraceEvent("SimpleConfigTransactionGotValue", self->dID.get())
.detail("Value", reply.value.get().toString());
@ -80,8 +93,17 @@ class SimpleConfigTransactionImpl {
self->getGenerationFuture = getGeneration(self);
}
ConfigGeneration generation = wait(self->getGenerationFuture);
ConfigTransactionGetConfigClassesReply reply =
wait(retryBrokenPromise(self->cti.getClasses, ConfigTransactionGetConfigClassesRequest{ generation }));
state ConfigTransactionGetConfigClassesReply reply;
if (self->cti.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetConfigClassesRequest{ generation },
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETCLASSES)));
} else {
wait(store(
reply,
retryBrokenPromise(self->cti.getClasses, ConfigTransactionGetConfigClassesRequest{ generation })));
}
RangeResult result;
for (const auto& configClass : reply.configClasses) {
result.push_back_deep(result.arena(), KeyValueRef(configClass, ""_sr));
@ -94,8 +116,17 @@ class SimpleConfigTransactionImpl {
self->getGenerationFuture = getGeneration(self);
}
ConfigGeneration generation = wait(self->getGenerationFuture);
ConfigTransactionGetKnobsReply reply =
wait(retryBrokenPromise(self->cti.getKnobs, ConfigTransactionGetKnobsRequest{ generation, configClass }));
state ConfigTransactionGetKnobsReply reply;
if (self->cti.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetKnobsRequest{ generation, configClass },
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETKNOBS)));
} else {
wait(store(
reply,
retryBrokenPromise(self->cti.getKnobs, ConfigTransactionGetKnobsRequest{ generation, configClass })));
}
RangeResult result;
for (const auto& knobName : reply.knobNames) {
result.push_back_deep(result.arena(), KeyValueRef(knobName, ""_sr));
@ -109,7 +140,11 @@ class SimpleConfigTransactionImpl {
}
wait(store(self->toCommit.generation, self->getGenerationFuture));
self->toCommit.annotation.timestamp = now();
wait(retryBrokenPromise(self->cti.commit, self->toCommit));
if (self->cti.hostname.present()) {
wait(retryGetReplyFromHostname(self->toCommit, self->cti.hostname.get(), WLTOKEN_CONFIGTXN_COMMIT));
} else {
wait(retryBrokenPromise(self->cti.commit, self->toCommit));
}
self->committed = true;
return Void();
}
@ -126,9 +161,14 @@ class SimpleConfigTransactionImpl {
public:
SimpleConfigTransactionImpl(Database const& cx) : cx(cx) {
auto coordinators = cx->getConnectionRecord()->getConnectionString().coordinators();
std::sort(coordinators.begin(), coordinators.end());
cti = ConfigTransactionInterface(coordinators[0]);
const ClusterConnectionString& cs = cx->getConnectionRecord()->getConnectionString();
if (cs.coordinators().size()) {
std::vector<NetworkAddress> coordinators = cs.coordinators();
std::sort(coordinators.begin(), coordinators.end());
cti = ConfigTransactionInterface(coordinators[0]);
} else {
cti = ConfigTransactionInterface(cs.hostnames[0]);
}
}
SimpleConfigTransactionImpl(ConfigTransactionInterface const& cti) : cti(cti) {}

View File

@ -1644,13 +1644,10 @@ void TracingOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key
CoordinatorsImpl::CoordinatorsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
ACTOR Future<RangeResult> coordinatorsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRef prefix, KeyRangeRef kr) {
state ClusterConnectionString cs = ryw->getDatabase()->getConnectionRecord()->getConnectionString();
state std::vector<NetworkAddress> coordinator_processes = wait(cs.tryResolveHostnames());
RangeResult result;
KeyRef prefix(getKeyRange().begin);
auto cs = ryw->getDatabase()->getConnectionRecord()->getConnectionString();
auto coordinator_processes = cs.coordinators();
Key cluster_decription_key = prefix.withSuffix(LiteralStringRef("cluster_description"));
if (kr.contains(cluster_decription_key)) {
result.push_back_deep(result.arena(), KeyValueRef(cluster_decription_key, cs.clusterKeyName()));
@ -1673,10 +1670,16 @@ Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
return rywGetRange(ryw, kr, result);
}
Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
KeyRef prefix(getKeyRange().begin);
return coordinatorsGetRangeActor(ryw, prefix, kr);
}
ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
state Reference<IQuorumChange> change;
state ClusterConnectionString
conn; // We don't care about the Key here, it will be overrode in changeQuorumChecker().
state ClusterConnectionString conn; // We don't care about the Key here.
state std::vector<std::string> process_address_or_hostname_strs;
state Optional<std::string> msg;
state int index;
@ -1700,7 +1703,6 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
try {
if (Hostname::isHostname(process_address_or_hostname_strs[index])) {
conn.hostnames.push_back(Hostname::parse(process_address_or_hostname_strs[index]));
conn.status = ClusterConnectionString::ConnectionStringStatus::UNRESOLVED;
} else {
NetworkAddress a = NetworkAddress::parse(process_address_or_hostname_strs[index]);
if (!a.isValid()) {
@ -1717,18 +1719,19 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
if (parse_error) {
std::string error = "ERROR: \'" + process_address_or_hostname_strs[index] +
"\' is not a valid network endpoint address\n";
if (process_address_or_hostname_strs[index].find(":tls") != std::string::npos)
error += " Do not include the `:tls' suffix when naming a process\n";
return ManagementAPIError::toJsonString(false, "coordinators", error);
}
}
}
wait(conn.resolveHostnames());
if (conn.coordinators().size())
change = specifiedQuorumChange(conn.coordinators());
else
std::vector<NetworkAddress> addressesVec = wait(conn.tryResolveHostnames());
if (addressesVec.size() != conn.hostnames.size() + conn.coordinators().size()) {
return ManagementAPIError::toJsonString(false, "coordinators", "One or more hostnames are not resolvable.");
} else if (addressesVec.size()) {
change = specifiedQuorumChange(addressesVec);
} else {
change = noQuorumChange();
}
// check update for cluster_description
Key cluster_decription_key = LiteralStringRef("cluster_description").withPrefix(kr.begin);
@ -1740,19 +1743,18 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
change = nameQuorumChange(entry.second.get().toString(), change);
} else {
// throw the error
return Optional<std::string>(ManagementAPIError::toJsonString(
false, "coordinators", "Cluster description must match [A-Za-z0-9_]+"));
return ManagementAPIError::toJsonString(
false, "coordinators", "Cluster description must match [A-Za-z0-9_]+");
}
}
ASSERT(change.isValid());
TraceEvent(SevDebug, "SKSChangeCoordinatorsStart")
.detail("NewHostnames", conn.hostnames.size() ? describe(conn.hostnames) : "N/A")
.detail("NewAddresses", describe(conn.coordinators()))
.detail("NewAddresses", describe(addressesVec))
.detail("Description", entry.first ? entry.second.get().toString() : "");
Optional<CoordinatorsResult> r = wait(changeQuorumChecker(&ryw->getTransaction(), change, &conn));
Optional<CoordinatorsResult> r = wait(changeQuorumChecker(&ryw->getTransaction(), change, addressesVec));
TraceEvent(SevDebug, "SKSChangeCoordinatorsFinish")
.detail("Result", r.present() ? static_cast<int>(r.get()) : -1); // -1 means success
@ -1804,9 +1806,10 @@ ACTOR static Future<RangeResult> CoordinatorsAutoImplActor(ReadYourWritesTransac
state ClusterConnectionString old(currentKey.get().toString());
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
std::vector<NetworkAddress> _desiredCoordinators = wait(autoQuorumChange()->getDesiredCoordinators(
&tr,
old.coordinators(),
oldCoordinators,
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result));

View File

@ -307,23 +307,35 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<I
bool* quorum_reachable,
int* coordinatorsFaultTolerance) {
try {
wait(connRecord->resolveHostnames());
state ClientCoordinators coord(connRecord);
state StatusObject statusObj;
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
leaderServers.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
GetLeaderRequest(coord.clusterKey, UID()),
TaskPriority::CoordinationReply));
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
if (coord.clientLeaderServers[i].hostname.present()) {
leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coord.clusterKey, UID()),
coord.clientLeaderServers[i].hostname.get(),
WLTOKEN_CLIENTLEADERREG_GETLEADER,
TaskPriority::CoordinationReply));
} else {
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
GetLeaderRequest(coord.clusterKey, UID()),
TaskPriority::CoordinationReply));
}
}
state std::vector<Future<ProtocolInfoReply>> coordProtocols;
coordProtocols.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint::wellKnown(
{ coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO) };
coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
if (coord.clientLeaderServers[i].hostname.present()) {
coordProtocols.push_back(retryGetReplyFromHostname(
ProtocolInfoRequest{}, coord.clientLeaderServers[i].hostname.get(), WLTOKEN_PROTOCOL_INFO));
} else {
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint::wellKnown(
{ coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO) };
coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
}
}
wait(smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) &&
@ -337,8 +349,12 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<I
int coordinatorsUnavailable = 0;
for (int i = 0; i < leaderServers.size(); i++) {
StatusObject coordStatus;
coordStatus["address"] =
coord.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress().toString();
if (coord.clientLeaderServers[i].hostname.present()) {
coordStatus["address"] = coord.clientLeaderServers[i].hostname.get().toString();
} else {
coordStatus["address"] =
coord.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress().toString();
}
if (leaderServers[i].isReady()) {
coordStatus["reachable"] = true;

View File

@ -48,6 +48,8 @@ struct TenantMapEntry {
int64_t id;
Key prefix;
constexpr static int ROOT_PREFIX_SIZE = sizeof(id);
private:
void initPrefix(KeyRef subspace) {
ASSERT(id >= 0);

View File

@ -24,6 +24,9 @@
*/
#if defined(HAVE_OPENSSL) && !defined(TLS_DISABLED)
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/md5.h>
#elif !defined(_MD5_H)
#define _MD5_H

View File

@ -72,6 +72,20 @@ Future<REPLY_TYPE(Req)> retryBrokenPromise(RequestStream<Req, P> to, Req request
}
}
ACTOR template <class Req>
Future<Void> tryInitializeRequestStream(RequestStream<Req>* stream, Hostname hostname, WellKnownEndpoints token) {
Optional<NetworkAddress> address = wait(hostname.resolve());
if (!address.present()) {
return Void();
}
if (stream == nullptr) {
stream = new RequestStream<Req>(Endpoint::wellKnown({ address.get() }, token));
} else {
*stream = RequestStream<Req>(Endpoint::wellKnown({ address.get() }, token));
}
return Void();
}
ACTOR template <class Req>
Future<ErrorOr<REPLY_TYPE(Req)>> tryGetReplyFromHostname(Req request, Hostname hostname, WellKnownEndpoints token) {
// A wrapper of tryGetReply(request), except that the request is sent to an address resolved from a hostname.

View File

@ -1110,10 +1110,10 @@ ACTOR Future<Void> registerWorker(RegisterWorkerRequest req,
newPriorityInfo.processClassFitness = newProcessClass.machineClassFitness(ProcessClass::ClusterController);
bool isCoordinator =
(std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), req.wi.address()) !=
(std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), w.address()) !=
coordinatorAddresses.end()) ||
(req.wi.secondaryAddress().present() &&
std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), req.wi.secondaryAddress().get()) !=
(w.secondaryAddress().present() &&
std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), w.secondaryAddress().get()) !=
coordinatorAddresses.end());
for (auto it : req.incompatiblePeers) {
@ -1933,8 +1933,24 @@ ACTOR Future<Void> handleForcedRecoveries(ClusterControllerData* self, ClusterCo
}
}
ACTOR Future<Void> startDataDistributor(ClusterControllerData* self) {
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
struct SingletonRecruitThrottler {
double lastRecruitStart;
SingletonRecruitThrottler() : lastRecruitStart(-1) {}
double newRecruitment() {
double n = now();
double waitTime =
std::max(0.0, (lastRecruitStart + SERVER_KNOBS->CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL - n));
lastRecruitStart = n;
return waitTime;
}
};
ACTOR Future<Void> startDataDistributor(ClusterControllerData* self, double waitTime) {
// If master fails at the same time, give it a chance to clear master PID.
// Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
TraceEvent("CCStartDataDistributor", self->id).log();
loop {
@ -2003,6 +2019,7 @@ ACTOR Future<Void> startDataDistributor(ClusterControllerData* self) {
}
ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
wait(self->db.serverInfo->onChange());
}
@ -2019,13 +2036,15 @@ ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
when(wait(self->recruitDistributor.onChange())) {}
}
} else {
wait(startDataDistributor(self));
wait(startDataDistributor(self, recruitThrottler.newRecruitment()));
}
}
}
ACTOR Future<Void> startRatekeeper(ClusterControllerData* self) {
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
ACTOR Future<Void> startRatekeeper(ClusterControllerData* self, double waitTime) {
// If master fails at the same time, give it a chance to clear master PID.
// Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
TraceEvent("CCStartRatekeeper", self->id).log();
loop {
@ -2091,6 +2110,7 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData* self) {
}
ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
wait(self->db.serverInfo->onChange());
}
@ -2107,34 +2127,15 @@ ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
when(wait(self->recruitRatekeeper.onChange())) {}
}
} else {
wait(startRatekeeper(self));
wait(startRatekeeper(self, recruitThrottler.newRecruitment()));
}
}
}
// Acquires the BM lock by getting the next epoch no.
ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
loop {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
try {
Optional<Value> oldEpoch = wait(tr->get(blobManagerEpochKey));
state int64_t newEpoch = oldEpoch.present() ? decodeBlobManagerEpochValue(oldEpoch.get()) + 1 : 1;
tr->set(blobManagerEpochKey, blobManagerEpochValueFor(newEpoch));
wait(tr->commit());
TraceEvent(SevDebug, "CCNextBlobManagerEpoch", self->id).detail("Epoch", newEpoch);
return newEpoch;
} catch (Error& e) {
wait(tr->onError(e));
}
}
}
ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self) {
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self, double waitTime) {
// If master fails at the same time, give it a chance to clear master PID.
// Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
TraceEvent("CCEKP_Start", self->id).log();
loop {
@ -2208,6 +2209,7 @@ ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self) {
}
ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
loop {
if (self->db.serverInfo->get().encryptKeyProxy.present() && !self->recruitEncryptKeyProxy.get()) {
choose {
@ -2219,13 +2221,36 @@ ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) {
when(wait(self->recruitEncryptKeyProxy.onChange())) {}
}
} else {
wait(startEncryptKeyProxy(self));
wait(startEncryptKeyProxy(self, recruitThrottler.newRecruitment()));
}
}
}
ACTOR Future<Void> startBlobManager(ClusterControllerData* self) {
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
// Acquires the BM lock by getting the next epoch no.
ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
loop {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
try {
Optional<Value> oldEpoch = wait(tr->get(blobManagerEpochKey));
state int64_t newEpoch = oldEpoch.present() ? decodeBlobManagerEpochValue(oldEpoch.get()) + 1 : 1;
tr->set(blobManagerEpochKey, blobManagerEpochValueFor(newEpoch));
wait(tr->commit());
TraceEvent(SevDebug, "CCNextBlobManagerEpoch", self->id).detail("Epoch", newEpoch);
return newEpoch;
} catch (Error& e) {
wait(tr->onError(e));
}
}
}
ACTOR Future<Void> startBlobManager(ClusterControllerData* self, double waitTime) {
// If master fails at the same time, give it a chance to clear master PID.
// Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
TraceEvent("CCStartBlobManager", self->id).log();
loop {
@ -2322,6 +2347,7 @@ ACTOR Future<Void> watchBlobGranulesConfigKey(ClusterControllerData* self) {
}
ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
wait(self->db.serverInfo->onChange());
}
@ -2352,7 +2378,7 @@ ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
}
} else if (self->db.blobGranulesEnabled.get()) {
// if there is no blob manager present but blob granules are now enabled, recruit a BM
wait(startBlobManager(self));
wait(startBlobManager(self, recruitThrottler.newRecruitment()));
} else {
// if there is no blob manager present and blob granules are disabled, wait for a config change
wait(self->db.blobGranulesEnabled.onChange());
@ -2481,12 +2507,11 @@ ACTOR Future<Void> workerHealthMonitor(ClusterControllerData* self) {
}
}
ACTOR Future<Void> clusterControllerCore(Reference<IClusterConnectionRecord> connRecord,
ClusterControllerFullInterface interf,
ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
Future<Void> leaderFail,
ServerCoordinators coordinators,
LocalityData locality,
ConfigDBType configDBType) {
state ServerCoordinators coordinators(connRecord);
state ClusterControllerData self(interf, locality, coordinators);
state ConfigBroadcaster configBroadcaster(coordinators, configDBType);
state Future<Void> coordinationPingDelay = delay(SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY);
@ -2621,7 +2646,7 @@ ACTOR Future<Void> replaceInterface(ClusterControllerFullInterface interf) {
}
}
ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRecord,
ACTOR Future<Void> clusterController(ServerCoordinators coordinators,
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC,
bool hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
@ -2632,10 +2657,9 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
state bool inRole = false;
cci.initEndpoints();
try {
wait(connRecord->resolveHostnames());
// Register as a possible leader; wait to be elected
state Future<Void> leaderFail =
tryBecomeLeader(connRecord, cci, currentCC, hasConnected, asyncPriorityInfo);
tryBecomeLeader(coordinators, cci, currentCC, hasConnected, asyncPriorityInfo);
state Future<Void> shouldReplace = replaceInterface(cci);
while (!currentCC->get().present() || currentCC->get().get() != cci) {
@ -2654,7 +2678,7 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
startRole(Role::CLUSTER_CONTROLLER, cci.id(), UID());
inRole = true;
wait(clusterControllerCore(connRecord, cci, leaderFail, locality, configDBType));
wait(clusterControllerCore(cci, leaderFail, coordinators, locality, configDBType));
}
} catch (Error& e) {
if (inRole)
@ -2683,7 +2707,8 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
state bool hasConnected = false;
loop {
try {
wait(clusterController(connRecord, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType));
ServerCoordinators coordinators(connRecord);
wait(clusterController(coordinators, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType));
hasConnected = true;
} catch (Error& e) {
if (e.code() != error_code_coordinators_changed)

View File

@ -537,8 +537,7 @@ ACTOR Future<Void> changeCoordinators(Reference<ClusterRecoveryData> self) {
}
try {
state ClusterConnectionString conn(changeCoordinatorsRequest.newConnectionString.toString());
wait(conn.resolveHostnames());
ClusterConnectionString conn(changeCoordinatorsRequest.newConnectionString.toString());
wait(self->cstate.move(conn));
} catch (Error& e) {
if (e.code() != error_code_actor_cancelled)

View File

@ -236,6 +236,105 @@ struct ResolutionRequestBuilder {
}
};
ErrorOr<Optional<TenantMapEntry>> getTenantEntry(ProxyCommitData* commitData,
Optional<TenantNameRef> tenant,
Optional<int64_t> tenantId,
bool logOnFailure) {
if (tenant.present()) {
auto itr = commitData->tenantMap.find(tenant.get());
if (itr == commitData->tenantMap.end()) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyUnknownTenant", commitData->dbgid).detail("Tenant", tenant.get());
}
return unknown_tenant();
} else if (tenantId.present() && tenantId.get() != itr->second.id) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyTenantIdMismatch", commitData->dbgid)
.detail("Tenant", tenant.get())
.detail("TenantId", tenantId)
.detail("ExistingId", itr->second.id);
}
return unknown_tenant();
}
return ErrorOr<Optional<TenantMapEntry>>(Optional<TenantMapEntry>(itr->second));
}
return Optional<TenantMapEntry>();
}
bool verifyTenantPrefix(ProxyCommitData* const commitData, const CommitTransactionRequest& req) {
ErrorOr<Optional<TenantMapEntry>> tenantEntry =
getTenantEntry(commitData, req.tenantInfo.name.castTo<TenantNameRef>(), req.tenantInfo.tenantId, true);
if (tenantEntry.isError()) {
return true;
}
if (tenantEntry.get().present()) {
Key tenantPrefix = tenantEntry.get().get().prefix;
for (auto& m : req.transaction.mutations) {
if (m.param1 != metadataVersionKey) {
if (!m.param1.startsWith(tenantPrefix)) {
TraceEvent(SevWarnAlways, "TenantPrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("Key", m.param1.toHexString());
return false;
}
if (m.type == MutationRef::ClearRange && !m.param2.startsWith(tenantPrefix)) {
TraceEvent(SevWarnAlways, "TenantClearRangePrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("Key", m.param2.toHexString());
return false;
} else if (m.type == MutationRef::SetVersionstampedKey) {
ASSERT(m.param1.size() >= 4);
uint8_t* key = const_cast<uint8_t*>(m.param1.begin());
int* offset = reinterpret_cast<int*>(&key[m.param1.size() - 4]);
if (*offset < tenantPrefix.size()) {
TraceEvent(SevWarnAlways, "TenantVersionstampInvalidOffset")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("Key", m.param1.toHexString())
.detail("Offset", *offset);
return false;
}
}
}
}
for (auto& rc : req.transaction.read_conflict_ranges) {
if (rc.begin != metadataVersionKey &&
(!rc.begin.startsWith(tenantPrefix) || !rc.end.startsWith(tenantPrefix))) {
TraceEvent(SevWarnAlways, "TenantReadConflictPrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("BeginKey", rc.begin.toHexString())
.detail("EndKey", rc.end.toHexString());
return false;
}
}
for (auto& wc : req.transaction.write_conflict_ranges) {
if (wc.begin != metadataVersionKey &&
(!wc.begin.startsWith(tenantPrefix) || !wc.end.startsWith(tenantPrefix))) {
TraceEvent(SevWarnAlways, "TenantWriteConflictPrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("BeginKey", wc.begin.toHexString())
.detail("EndKey", wc.end.toHexString());
return false;
}
}
}
return true;
}
ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData,
PromiseStream<std::pair<std::vector<CommitTransactionRequest>, int>> out,
FutureStream<CommitTransactionRequest> in,
@ -282,6 +381,13 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData,
.detail("Size", bytes)
.detail("Client", req.reply.getEndpoint().getPrimaryAddress());
}
if (!verifyTenantPrefix(commitData, req)) {
++commitData->stats.txnCommitErrors;
req.reply.sendError(illegal_tenant_access());
continue;
}
++commitData->stats.txnCommitIn;
if (req.debugID.present()) {
@ -450,35 +556,6 @@ ACTOR static Future<ResolveTransactionBatchReply> trackResolutionMetrics(Referen
return reply;
}
ErrorOr<Optional<TenantMapEntry>> getTenantEntry(ProxyCommitData* commitData,
Optional<TenantNameRef> tenant,
Optional<int64_t> tenantId,
bool logOnFailure) {
if (tenant.present()) {
auto itr = commitData->tenantMap.find(tenant.get());
if (itr == commitData->tenantMap.end()) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyUnknownTenant", commitData->dbgid).detail("Tenant", tenant.get());
}
return unknown_tenant();
} else if (tenantId.present() && tenantId.get() != itr->second.id) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyTenantIdMismatch", commitData->dbgid)
.detail("Tenant", tenant.get())
.detail("TenantId", tenantId)
.detail("ExistingId", itr->second.id);
}
return unknown_tenant();
}
return ErrorOr<Optional<TenantMapEntry>>(Optional<TenantMapEntry>(itr->second));
}
return Optional<TenantMapEntry>();
}
namespace CommitBatch {
struct CommitBatchContext {
@ -685,6 +762,11 @@ bool canReject(const std::vector<CommitTransactionRequest>& trs) {
return true;
}
double computeReleaseDelay(CommitBatchContext* self, double latencyBucket) {
return std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE,
self->batchOperations * self->pProxyCommitData->commitComputePerOperation[latencyBucket]);
}
ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
state ProxyCommitData* const pProxyCommitData = self->pProxyCommitData;
@ -708,6 +790,7 @@ ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
// Pre-resolution the commits
TEST(pProxyCommitData->latestLocalCommitBatchResolving.get() < localBatchNumber - 1); // Wait for local batch
wait(pProxyCommitData->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber - 1));
pProxyCommitData->stats.computeLatency.addMeasurement(now() - timeStart);
double queuingDelay = g_network->now() - timeStart;
pProxyCommitData->stats.commitBatchQueuingDist->sampleSeconds(queuingDelay);
if ((queuingDelay > (double)SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS / SERVER_KNOBS->VERSIONS_PER_SECOND ||
@ -736,10 +819,7 @@ ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
return Void();
}
self->releaseDelay =
delay(std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE,
self->batchOperations * pProxyCommitData->commitComputePerOperation[latencyBucket]),
TaskPriority::ProxyMasterVersionReply);
self->releaseDelay = delay(computeReleaseDelay(self, latencyBucket), TaskPriority::ProxyMasterVersionReply);
if (debugID.present()) {
g_traceBatch.addEvent(
@ -1385,8 +1465,10 @@ ACTOR Future<Void> postResolution(CommitBatchContext* self) {
self->computeDuration += g_network->timer() - self->computeStart;
if (self->batchOperations > 0) {
double estimatedDelay = computeReleaseDelay(self, self->latencyBucket);
double computePerOperation =
std::min(SERVER_KNOBS->MAX_COMPUTE_PER_OPERATION, self->computeDuration / self->batchOperations);
if (computePerOperation <= pProxyCommitData->commitComputePerOperation[self->latencyBucket]) {
pProxyCommitData->commitComputePerOperation[self->latencyBucket] = computePerOperation;
} else {
@ -1401,6 +1483,20 @@ ACTOR Future<Void> postResolution(CommitBatchContext* self) {
pProxyCommitData->stats.minComputeNS =
std::min<int64_t>(pProxyCommitData->stats.minComputeNS,
1e9 * pProxyCommitData->commitComputePerOperation[self->latencyBucket]);
if (estimatedDelay >= SERVER_KNOBS->MAX_COMPUTE_DURATION_LOG_CUTOFF ||
self->computeDuration >= SERVER_KNOBS->MAX_COMPUTE_DURATION_LOG_CUTOFF) {
TraceEvent(SevInfo, "LongComputeDuration", pProxyCommitData->dbgid)
.suppressFor(10.0)
.detail("EstimatedComputeDuration", estimatedDelay)
.detail("ComputeDuration", self->computeDuration)
.detail("ComputePerOperation", computePerOperation)
.detail("LatencyBucket", self->latencyBucket)
.detail("UpdatedComputePerOperationEstimate",
pProxyCommitData->commitComputePerOperation[self->latencyBucket])
.detail("BatchBytes", self->batchBytes)
.detail("BatchOperations", self->batchOperations);
}
}
pProxyCommitData->stats.processingMutationDist->sampleSeconds(now() - postResolutionQueuing);

View File

@ -26,21 +26,29 @@
#include "fdbserver/LeaderElection.h"
#include "flow/actorcompiler.h" // has to be last include
ACTOR Future<GenerationRegReadReply> waitAndSendRead(RequestStream<GenerationRegReadRequest> to,
GenerationRegReadRequest req) {
ACTOR Future<GenerationRegReadReply> waitAndSendRead(GenerationRegInterface stateServer, GenerationRegReadRequest req) {
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
state GenerationRegReadReply reply = wait(retryBrokenPromise(to, req));
state GenerationRegReadReply reply;
if (stateServer.hostname.present()) {
wait(store(reply, retryGetReplyFromHostname(req, stateServer.hostname.get(), WLTOKEN_GENERATIONREG_READ)));
} else {
wait(store(reply, retryBrokenPromise(stateServer.read, req)));
}
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
return reply;
}
ACTOR Future<UniqueGeneration> waitAndSendWrite(RequestStream<GenerationRegWriteRequest> to,
GenerationRegWriteRequest req) {
ACTOR Future<UniqueGeneration> waitAndSendWrite(GenerationRegInterface stateServer, GenerationRegWriteRequest req) {
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
state UniqueGeneration reply = wait(retryBrokenPromise(to, req));
state UniqueGeneration reply;
if (stateServer.hostname.present()) {
wait(store(reply, retryGetReplyFromHostname(req, stateServer.hostname.get(), WLTOKEN_GENERATIONREG_WRITE)));
} else {
wait(store(reply, retryBrokenPromise(stateServer.write, req)));
}
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
return reply;
@ -152,7 +160,7 @@ struct CoordinatedStateImpl {
state std::vector<Future<GenerationRegReadReply>> rep_reply;
for (int i = 0; i < replicas.size(); i++) {
Future<GenerationRegReadReply> reply =
waitAndSendRead(replicas[i].read, GenerationRegReadRequest(req.key, req.gen));
waitAndSendRead(replicas[i], GenerationRegReadRequest(req.key, req.gen));
rep_empty_reply.push_back(nonemptyToNever(reply));
rep_reply.push_back(emptyToNever(reply));
self->ac.add(success(reply));
@ -192,8 +200,7 @@ struct CoordinatedStateImpl {
state std::vector<GenerationRegInterface>& replicas = self->coordinators.stateServers;
state std::vector<Future<UniqueGeneration>> wrep_reply;
for (int i = 0; i < replicas.size(); i++) {
Future<UniqueGeneration> reply =
waitAndSendWrite(replicas[i].write, GenerationRegWriteRequest(req.kv, req.gen));
Future<UniqueGeneration> reply = waitAndSendWrite(replicas[i], GenerationRegWriteRequest(req.kv, req.gen));
wrep_reply.push_back(reply);
self->ac.add(success(reply));
}

View File

@ -98,12 +98,16 @@ LeaderElectionRegInterface::LeaderElectionRegInterface(INetwork* local) : Client
}
ServerCoordinators::ServerCoordinators(Reference<IClusterConnectionRecord> ccr) : ClientCoordinators(ccr) {
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
ClusterConnectionString cs = ccr->getConnectionString();
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s) {
leaderElectionServers.emplace_back(*s);
stateServers.emplace_back(*s);
configServers.emplace_back(*s);
for (auto h : cs.hostnames) {
leaderElectionServers.emplace_back(h);
stateServers.emplace_back(h);
configServers.emplace_back(h);
}
for (auto s : cs.coordinators()) {
leaderElectionServers.emplace_back(s);
stateServers.emplace_back(s);
configServers.emplace_back(s);
}
}
@ -208,10 +212,8 @@ ACTOR Future<Void> openDatabase(ClientData* db,
int* clientCount,
Reference<AsyncVar<bool>> hasConnectedClients,
OpenDatabaseCoordRequest req,
Future<Void> checkStuck,
Reference<AsyncVar<Void>> coordinatorsChanged) {
Future<Void> checkStuck) {
state ErrorOr<CachedSerialization<ClientDBInfo>> replyContents;
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
state Future<Void> clientInfoOnChange = db->clientInfo->onChange();
++(*clientCount);
@ -233,11 +235,6 @@ ACTOR Future<Void> openDatabase(ClientData* db,
clientInfoOnChange = db->clientInfo->onChange();
replyContents = db->clientInfo->get();
}
when(wait(coordinatorsChangedOnChange)) {
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
replyContents = coordinators_changed();
break;
}
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) {
if (db->clientInfo->get().read().id.isValid()) {
replyContents = db->clientInfo->get();
@ -268,10 +265,7 @@ ACTOR Future<Void> openDatabase(ClientData* db,
ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
Reference<AsyncVar<bool>> hasConnectedClients,
Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader,
ElectionResultRequest req,
Reference<AsyncVar<Void>> coordinatorsChanged) {
state bool coordinatorsChangeDetected = false;
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
ElectionResultRequest req) {
state Future<Void> currentElectedLeaderOnChange = currentElectedLeader->onChange();
++(*clientCount);
hasConnectedClients->set(true);
@ -281,20 +275,11 @@ ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
when(wait(yieldedFuture(currentElectedLeaderOnChange))) {
currentElectedLeaderOnChange = currentElectedLeader->onChange();
}
when(wait(coordinatorsChangedOnChange)) {
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
coordinatorsChangeDetected = true;
break;
}
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) { break; }
}
}
if (coordinatorsChangeDetected) {
req.reply.sendError(coordinators_changed());
} else {
req.reply.send(currentElectedLeader->get());
}
req.reply.send(currentElectedLeader->get());
if (--(*clientCount) == 0) {
hasConnectedClients->set(false);
@ -325,8 +310,6 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
state Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader =
makeReference<AsyncVar<Optional<LeaderInfo>>>();
state LivenessChecker canConnectToLeader(SERVER_KNOBS->COORDINATOR_LEADER_CONNECTION_TIMEOUT);
state Reference<AsyncVar<Void>> coordinatorsChanged = makeReference<AsyncVar<Void>>();
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
state Future<Void> hasConnectedClientsOnChange = hasConnectedClients->onChange();
loop choose {
@ -338,14 +321,10 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
} else {
if (!leaderMon.isValid()) {
leaderMon = monitorLeaderAndGetClientInfo(
req.clusterKey, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged);
req.clusterKey, req.hostnames, req.coordinators, &clientData, currentElectedLeader);
}
actors.add(openDatabase(&clientData,
&clientCount,
hasConnectedClients,
req,
canConnectToLeader.checkStuck(),
coordinatorsChanged));
actors.add(
openDatabase(&clientData, &clientCount, hasConnectedClients, req, canConnectToLeader.checkStuck()));
}
}
when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) {
@ -355,10 +334,9 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
} else {
if (!leaderMon.isValid()) {
leaderMon = monitorLeaderAndGetClientInfo(
req.key, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged);
req.key, req.hostnames, req.coordinators, &clientData, currentElectedLeader);
}
actors.add(remoteMonitorLeader(
&clientCount, hasConnectedClients, currentElectedLeader, req, coordinatorsChanged));
actors.add(remoteMonitorLeader(&clientCount, hasConnectedClients, currentElectedLeader, req));
}
}
when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) {
@ -499,10 +477,6 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
}
}
when(wait(actors.getResult())) {}
when(wait(coordinatorsChangedOnChange)) {
leaderMon = Future<Void>();
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
}
}
}

View File

@ -153,17 +153,21 @@ struct CandidacyRequest {
struct ElectionResultRequest {
constexpr static FileIdentifier file_identifier = 11815465;
Key key;
std::vector<Hostname> hostnames;
std::vector<NetworkAddress> coordinators;
UID knownLeader;
ReplyPromise<Optional<LeaderInfo>> reply;
ElectionResultRequest() = default;
ElectionResultRequest(Key key, std::vector<NetworkAddress> coordinators, UID knownLeader)
: key(key), coordinators(std::move(coordinators)), knownLeader(knownLeader) {}
ElectionResultRequest(Key key,
std::vector<Hostname> hostnames,
std::vector<NetworkAddress> coordinators,
UID knownLeader)
: key(key), hostnames(std::move(hostnames)), coordinators(std::move(coordinators)), knownLeader(knownLeader) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, key, coordinators, knownLeader, reply);
serializer(ar, key, hostnames, coordinators, knownLeader, reply);
}
};

View File

@ -718,18 +718,19 @@ public:
bool recheck = !healthy && (lastReady != self->initialFailureReactionDelay.isReady() ||
(lastZeroHealthy && !self->zeroHealthyTeams->get()) || containsFailed);
// TraceEvent("TeamHealthChangeDetected", self->distributorId)
// .detail("Team", team->getDesc())
// .detail("ServersLeft", serversLeft)
// .detail("LastServersLeft", lastServersLeft)
// .detail("AnyUndesired", anyUndesired)
// .detail("LastAnyUndesired", lastAnyUndesired)
// .detail("AnyWrongConfiguration", anyWrongConfiguration)
// .detail("LastWrongConfiguration", lastWrongConfiguration)
// .detail("Recheck", recheck)
// .detail("BadTeam", badTeam)
// .detail("LastZeroHealthy", lastZeroHealthy)
// .detail("ZeroHealthyTeam", self->zeroHealthyTeams->get());
//TraceEvent("TeamHealthChangeDetected", self->distributorId)
// .detail("Team", team->getDesc())
// .detail("ServersLeft", serversLeft)
// .detail("LastServersLeft", lastServersLeft)
// .detail("AnyUndesired", anyUndesired)
// .detail("LastAnyUndesired", lastAnyUndesired)
// .detail("AnyWrongConfiguration", anyWrongConfiguration)
// .detail("LastWrongConfiguration", lastWrongConfiguration)
// .detail("ContainsWigglingServer", anyWigglingServer)
// .detail("Recheck", recheck)
// .detail("BadTeam", badTeam)
// .detail("LastZeroHealthy", lastZeroHealthy)
// .detail("ZeroHealthyTeam", self->zeroHealthyTeams->get());
lastReady = self->initialFailureReactionDelay.isReady();
lastZeroHealthy = self->zeroHealthyTeams->get();
@ -1103,9 +1104,8 @@ public:
if (worstStatus == DDTeamCollection::Status::WIGGLING && invalidWiggleServer(worstAddr, self, server)) {
TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId)
.detail("Address", worstAddr.toString())
.detail("ProcessId", server->getLastKnownInterface().locality.processId())
.detail("WigglingId", self->wigglingId.present());
self->excludedServers.set(worstAddr, DDTeamCollection::Status::NONE);
.detail("ServerId", server->getId())
.detail("WigglingId", self->wigglingId.present() ? self->wigglingId.get().toString() : "");
worstStatus = DDTeamCollection::Status::NONE;
}
otherChanges.push_back(self->excludedServers.onChange(worstAddr));
@ -1127,10 +1127,9 @@ public:
if (testStatus == DDTeamCollection::Status::WIGGLING &&
invalidWiggleServer(testAddr, self, server)) {
TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId)
.detail("Address", testAddr.toString())
.detail("ProcessId", server->getLastKnownInterface().locality.processId())
.detail("ValidWigglingId", self->wigglingId.present());
self->excludedServers.set(testAddr, DDTeamCollection::Status::NONE);
.detail("Address", worstAddr.toString())
.detail("ServerId", server->getId())
.detail("WigglingId", self->wigglingId.present() ? self->wigglingId.get().toString() : "");
testStatus = DDTeamCollection::Status::NONE;
}
@ -2052,7 +2051,7 @@ public:
"PerpetualStorageWigglePause",
self->distributorId)
.detail("Primary", self->primary)
.detail("ProcessId", id)
.detail("ServerId", id)
.detail("BestTeamKeepStuckCount", self->bestTeamKeepStuckCount)
.detail("ExtraHealthyTeamCount", extraTeamCount)
.detail("HealthyTeamCount", self->healthyTeamCount);
@ -2065,7 +2064,7 @@ public:
moveFinishFuture = fv;
TraceEvent("PerpetualStorageWiggleStart", self->distributorId)
.detail("Primary", self->primary)
.detail("ProcessId", id)
.detail("ServerId", id)
.detail("ExtraHealthyTeamCount", extraTeamCount)
.detail("HealthyTeamCount", self->healthyTeamCount);
}
@ -2091,7 +2090,7 @@ public:
self->includeStorageServersForWiggle();
TraceEvent("PerpetualStorageWiggleFinish", self->distributorId)
.detail("Primary", self->primary)
.detail("ProcessId", self->wigglingId.get());
.detail("ServerId", self->wigglingId.get());
wait(self->eraseStorageWiggleMap(&metadataMap, self->wigglingId.get()) &&
self->storageWiggler->finishWiggle());
@ -2112,7 +2111,7 @@ public:
self->includeStorageServersForWiggle();
TraceEvent("PerpetualStorageWiggleExitingPause", self->distributorId)
.detail("Primary", self->primary)
.detail("ProcessId", self->wigglingId.get());
.detail("ServerId", self->wigglingId.get());
self->wigglingId.reset();
}

View File

@ -27,44 +27,29 @@
// Keep trying to become a leader by submitting itself to all coordinators.
// Monitor the health of all coordinators at the same time.
// Note: for coordinators whose NetworkAddress is parsed out of a hostname, a connection failure will cause this actor
// to throw `coordinators_changed()` error
ACTOR Future<Void> submitCandidacy(Key key,
LeaderElectionRegInterface coord,
LeaderInfo myInfo,
UID prevChangeID,
AsyncTrigger* nomineeChange,
Optional<LeaderInfo>* nominee,
Optional<Hostname> hostname = Optional<Hostname>()) {
Optional<LeaderInfo>* nominee) {
loop {
state Optional<LeaderInfo> li;
if (coord.candidacy.getEndpoint().getPrimaryAddress().fromHostname) {
state ErrorOr<Optional<LeaderInfo>> rep = wait(coord.candidacy.tryGetReply(
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
TaskPriority::CoordinationReply));
if (rep.isError()) {
// Connecting to nominee failed, most likely due to connection failed.
TraceEvent("SubmitCandadicyError")
.error(rep.getError())
.detail("Hostname", hostname.present() ? hostname.get().toString() : "UnknownHostname")
.detail("OldAddr", coord.candidacy.getEndpoint().getPrimaryAddress().toString());
if (rep.getError().code() == error_code_request_maybe_delivered) {
// Delay to prevent tight resolving loop due to outdated DNS cache
wait(delay(FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL));
throw coordinators_changed();
} else {
throw rep.getError();
}
} else if (rep.present()) {
li = rep.get();
}
if (coord.hostname.present()) {
wait(store(
li,
retryGetReplyFromHostname(
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
coord.hostname.get(),
WLTOKEN_LEADERELECTIONREG_CANDIDACY,
TaskPriority::CoordinationReply)));
} else {
Optional<LeaderInfo> tmp = wait(retryBrokenPromise(
coord.candidacy,
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
TaskPriority::CoordinationReply));
li = tmp;
wait(store(
li,
retryBrokenPromise(
coord.candidacy,
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
TaskPriority::CoordinationReply)));
}
wait(Future<Void>(Void())); // Make sure we weren't cancelled
@ -104,20 +89,26 @@ Future<Void> buggifyDelayedAsyncVar(Reference<AsyncVar<T>>& var) {
ACTOR Future<Void> changeLeaderCoordinators(ServerCoordinators coordinators, Value forwardingInfo) {
std::vector<Future<Void>> forwardRequests;
forwardRequests.reserve(coordinators.leaderElectionServers.size());
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++)
forwardRequests.push_back(retryBrokenPromise(coordinators.leaderElectionServers[i].forward,
ForwardRequest(coordinators.clusterKey, forwardingInfo)));
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
if (coordinators.leaderElectionServers[i].hostname.present()) {
forwardRequests.push_back(retryGetReplyFromHostname(ForwardRequest(coordinators.clusterKey, forwardingInfo),
coordinators.leaderElectionServers[i].hostname.get(),
WLTOKEN_LEADERELECTIONREG_FORWARD));
} else {
forwardRequests.push_back(retryBrokenPromise(coordinators.leaderElectionServers[i].forward,
ForwardRequest(coordinators.clusterKey, forwardingInfo)));
}
}
int quorum_size = forwardRequests.size() / 2 + 1;
wait(quorum(forwardRequests, quorum_size));
return Void();
}
ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> connRecord,
ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators,
Value proposedSerializedInterface,
Reference<AsyncVar<Value>> outSerializedLeader,
bool hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo) {
state ServerCoordinators coordinators(connRecord);
state AsyncTrigger nomineeChange;
state std::vector<Optional<LeaderInfo>> nominees;
state LeaderInfo myInfo;
@ -134,6 +125,8 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
wait(delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY));
}
nominees.resize(coordinators.leaderElectionServers.size());
myInfo.serializedInfo = proposedSerializedInterface;
outSerializedLeader->set(Value());
@ -141,9 +134,6 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
(SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY) ? buggifyDelayedAsyncVar(outSerializedLeader) : Void();
while (!iAmLeader) {
wait(connRecord->resolveHostnames());
coordinators = ServerCoordinators(connRecord);
nominees.resize(coordinators.leaderElectionServers.size());
state Future<Void> badCandidateTimeout;
myInfo.changeID = deterministicRandom()->randomUniqueID();
@ -153,19 +143,12 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
std::vector<Future<Void>> cand;
cand.reserve(coordinators.leaderElectionServers.size());
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
Optional<Hostname> hostname;
auto r = connRecord->getConnectionString().networkAddressToHostname.find(
coordinators.leaderElectionServers[i].candidacy.getEndpoint().getPrimaryAddress());
if (r != connRecord->getConnectionString().networkAddressToHostname.end()) {
hostname = r->second;
}
cand.push_back(submitCandidacy(coordinators.clusterKey,
coordinators.leaderElectionServers[i],
myInfo,
prevChangeID,
&nomineeChange,
&nominees[i],
hostname));
&nominees[i]));
}
candidacies = waitForAll(cand);
@ -220,24 +203,15 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
} else
badCandidateTimeout = Future<Void>();
try {
choose {
when(wait(nomineeChange.onTrigger())) {}
when(wait(badCandidateTimeout.isValid() ? badCandidateTimeout : Never())) {
TEST(true); // Bad candidate timeout
TraceEvent("LeaderBadCandidateTimeout", myInfo.changeID).log();
break;
}
when(wait(candidacies)) { ASSERT(false); }
when(wait(asyncPriorityInfo->onChange())) { break; }
}
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
connRecord->getConnectionString().resetToUnresolved();
choose {
when(wait(nomineeChange.onTrigger())) {}
when(wait(badCandidateTimeout.isValid() ? badCandidateTimeout : Never())) {
TEST(true); // Bad candidate timeout
TraceEvent("LeaderBadCandidateTimeout", myInfo.changeID).log();
break;
} else {
throw e;
}
when(wait(candidacies)) { ASSERT(false); }
when(wait(asyncPriorityInfo->onChange())) { break; }
}
}
@ -258,10 +232,17 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
state std::vector<Future<Void>> true_heartbeats;
state std::vector<Future<Void>> false_heartbeats;
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
Future<LeaderHeartbeatReply> hb =
retryBrokenPromise(coordinators.leaderElectionServers[i].leaderHeartbeat,
LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID),
TaskPriority::CoordinationReply);
Future<LeaderHeartbeatReply> hb;
if (coordinators.leaderElectionServers[i].hostname.present()) {
hb = retryGetReplyFromHostname(LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID),
coordinators.leaderElectionServers[i].hostname.get(),
WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT,
TaskPriority::CoordinationReply);
} else {
hb = retryBrokenPromise(coordinators.leaderElectionServers[i].leaderHeartbeat,
LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID),
TaskPriority::CoordinationReply);
}
true_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ true }));
false_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ false }));
}

View File

@ -37,7 +37,7 @@ class ServerCoordinators;
// eventually be set. If the return value is cancelled, the candidacy or leadership of the proposedInterface
// will eventually end.
template <class LeaderInterface>
Future<Void> tryBecomeLeader(Reference<IClusterConnectionRecord> const& connRecord,
Future<Void> tryBecomeLeader(ServerCoordinators const& coordinators,
LeaderInterface const& proposedInterface,
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader,
bool hasConnected,
@ -50,20 +50,20 @@ Future<Void> changeLeaderCoordinators(ServerCoordinators const& coordinators, Va
#pragma region Implementation
#endif // __INTEL_COMPILER
Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> const& connRecord,
Future<Void> tryBecomeLeaderInternal(ServerCoordinators const& coordinators,
Value const& proposedSerializedInterface,
Reference<AsyncVar<Value>> const& outSerializedLeader,
bool const& hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo);
template <class LeaderInterface>
Future<Void> tryBecomeLeader(Reference<IClusterConnectionRecord> const& connRecord,
Future<Void> tryBecomeLeader(ServerCoordinators const& coordinators,
LeaderInterface const& proposedInterface,
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader,
bool hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo) {
auto serializedInfo = makeReference<AsyncVar<Value>>();
Future<Void> m = tryBecomeLeaderInternal(connRecord,
Future<Void> m = tryBecomeLeaderInternal(coordinators,
ObjectWriter::toValue(proposedInterface, IncludeVersion()),
serializedInfo,
hasConnected,

View File

@ -99,8 +99,17 @@ class GetCommittedVersionQuorum {
// Now roll node forward to match the largest committed version of
// the replies.
state Reference<ConfigFollowerInfo> quorumCfi(new ConfigFollowerInfo(self->replies[target]));
try {
state std::vector<ConfigFollowerInterface> interfs = self->replies[target];
std::vector<Future<Void>> fs;
for (ConfigFollowerInterface& interf : interfs) {
if (interf.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&interf.getChanges, interf.hostname.get(), WLTOKEN_CONFIGFOLLOWER_GETCHANGES));
}
}
wait(waitForAll(fs));
state Reference<ConfigFollowerInfo> quorumCfi(new ConfigFollowerInfo(interfs));
state Version lastSeenVersion = std::max(
rollback.present() ? rollback.get() : nodeVersion.lastCommitted, self->largestCompactedResponse);
ConfigFollowerGetChangesReply reply =
@ -108,9 +117,21 @@ class GetCommittedVersionQuorum {
&ConfigFollowerInterface::getChanges,
ConfigFollowerGetChangesRequest{ lastSeenVersion, target }),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
wait(timeoutError(cfi.rollforward.getReply(ConfigFollowerRollforwardRequest{
rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations }),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
if (cfi.hostname.present()) {
wait(timeoutError(
retryGetReplyFromHostname(
ConfigFollowerRollforwardRequest{
rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations },
cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_ROLLFORWARD),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
} else {
wait(timeoutError(
cfi.rollforward.getReply(ConfigFollowerRollforwardRequest{
rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations }),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
}
} catch (Error& e) {
if (e.code() == error_code_transaction_too_old) {
// Seeing this trace is not necessarily a problem. There
@ -129,9 +150,18 @@ class GetCommittedVersionQuorum {
ACTOR static Future<Void> getCommittedVersionActor(GetCommittedVersionQuorum* self, ConfigFollowerInterface cfi) {
try {
ConfigFollowerGetCommittedVersionReply reply =
wait(timeoutError(cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{}),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
state ConfigFollowerGetCommittedVersionReply reply;
if (cfi.hostname.present()) {
wait(timeoutError(store(reply,
retryGetReplyFromHostname(ConfigFollowerGetCommittedVersionRequest{},
cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION)),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
} else {
wait(timeoutError(
store(reply, cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{})),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
}
++self->totalRepliesReceived;
self->largestCompactedResponse = std::max(self->largestCompactedResponse, reply.lastCompacted);
@ -279,7 +309,15 @@ class PaxosConfigConsumerImpl {
std::vector<Future<Void>> compactionRequests;
compactionRequests.reserve(compactionRequests.size());
for (const auto& cfi : self->cfis) {
compactionRequests.push_back(cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
if (cfi.hostname.present()) {
compactionRequests.push_back(
retryGetReplyFromHostname(ConfigFollowerCompactRequest{ compactionVersion },
cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_COMPACT));
} else {
compactionRequests.push_back(
cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
}
}
try {
wait(timeoutError(waitForAll(compactionRequests), 1.0));
@ -294,8 +332,18 @@ class PaxosConfigConsumerImpl {
self->resetCommittedVersionQuorum(); // TODO: This seems to fix a segfault, investigate more
try {
state Version committedVersion = wait(getCommittedVersion(self));
state Reference<ConfigFollowerInfo> configNodes(
new ConfigFollowerInfo(self->getCommittedVersionQuorum.getReadReplicas()));
state std::vector<ConfigFollowerInterface> readReplicas =
self->getCommittedVersionQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigFollowerInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(&readReplica.getSnapshotAndChanges,
readReplica.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETSNAPSHOTANDCHANGES));
}
}
wait(waitForAll(fs));
state Reference<ConfigFollowerInfo> configNodes(new ConfigFollowerInfo(readReplicas));
ConfigFollowerGetSnapshotAndChangesReply reply =
wait(timeoutError(basicLoadBalance(configNodes,
&ConfigFollowerInterface::getSnapshotAndChanges,
@ -349,8 +397,18 @@ class PaxosConfigConsumerImpl {
// returned would be 1.
if (committedVersion > self->lastSeenVersion) {
ASSERT(self->getCommittedVersionQuorum.getReadReplicas().size() >= self->cfis.size() / 2 + 1);
state Reference<ConfigFollowerInfo> configNodes(
new ConfigFollowerInfo(self->getCommittedVersionQuorum.getReadReplicas()));
state std::vector<ConfigFollowerInterface> readReplicas =
self->getCommittedVersionQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigFollowerInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(&readReplica.getChanges,
readReplica.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCHANGES));
}
}
wait(waitForAll(fs));
state Reference<ConfigFollowerInfo> configNodes(new ConfigFollowerInfo(readReplicas));
ConfigFollowerGetChangesReply reply = wait(timeoutError(
basicLoadBalance(configNodes,
&ConfigFollowerInterface::getChanges,

View File

@ -73,6 +73,8 @@ struct ProxyStats {
LatencySample commitBatchingWindowSize;
LatencySample computeLatency;
Future<Void> logger;
int64_t maxComputeNS;
@ -126,6 +128,10 @@ struct ProxyStats {
id,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
computeLatency("ComputeLatency",
id,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
maxComputeNS(0), minComputeNS(1e12),
commitBatchQueuingDist(Histogram::getHistogram(LiteralStringRef("CommitProxy"),
LiteralStringRef("CommitBatchQueuing"),

View File

@ -161,9 +161,8 @@ ACTOR Future<std::vector<WorkerInterface>> getCoordWorkers(Database cx,
if (!coordinators.present()) {
throw operation_failed();
}
state ClusterConnectionString ccs(coordinators.get().toString());
wait(ccs.resolveHostnames());
std::vector<NetworkAddress> coordinatorsAddr = ccs.coordinators();
ClusterConnectionString ccs(coordinators.get().toString());
std::vector<NetworkAddress> coordinatorsAddr = wait(ccs.tryResolveHostnames());
std::set<NetworkAddress> coordinatorsAddrSet;
for (const auto& addr : coordinatorsAddr) {
TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", addr);

View File

@ -44,15 +44,29 @@ class SimpleConfigConsumerImpl {
loop {
state Version compactionVersion = self->lastSeenVersion;
wait(delayJittered(self->compactionInterval.get()));
wait(self->cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
if (self->cfi.hostname.present()) {
wait(retryGetReplyFromHostname(ConfigFollowerCompactRequest{ compactionVersion },
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_COMPACT));
} else {
wait(self->cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
}
++self->compactRequest;
broadcaster->compact(compactionVersion);
}
}
ACTOR static Future<Version> getCommittedVersion(SimpleConfigConsumerImpl* self) {
ConfigFollowerGetCommittedVersionReply committedVersionReply =
wait(self->cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{}));
state ConfigFollowerGetCommittedVersionReply committedVersionReply;
if (self->cfi.hostname.present()) {
wait(store(committedVersionReply,
retryGetReplyFromHostname(ConfigFollowerGetCommittedVersionRequest{},
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION)));
} else {
wait(store(committedVersionReply,
self->cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{})));
}
return committedVersionReply.lastCommitted;
}
@ -63,8 +77,18 @@ class SimpleConfigConsumerImpl {
state Version committedVersion = wait(getCommittedVersion(self));
ASSERT_GE(committedVersion, self->lastSeenVersion);
if (committedVersion > self->lastSeenVersion) {
ConfigFollowerGetChangesReply reply = wait(self->cfi.getChanges.getReply(
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion }));
state ConfigFollowerGetChangesReply reply;
if (self->cfi.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion },
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCHANGES)));
} else {
wait(store(reply,
self->cfi.getChanges.getReply(
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion })));
}
++self->successfulChangeRequest;
for (const auto& versionedMutation : reply.changes) {
TraceEvent te(SevDebug, "ConsumerFetchedMutation", self->id);
@ -96,8 +120,17 @@ class SimpleConfigConsumerImpl {
ACTOR static Future<Void> getSnapshotAndChanges(SimpleConfigConsumerImpl* self, ConfigBroadcaster* broadcaster) {
state Version committedVersion = wait(getCommittedVersion(self));
ConfigFollowerGetSnapshotAndChangesReply reply = wait(
self->cfi.getSnapshotAndChanges.getReply(ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion }));
state ConfigFollowerGetSnapshotAndChangesReply reply;
if (self->cfi.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion },
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETSNAPSHOTANDCHANGES)));
} else {
wait(store(reply,
self->cfi.getSnapshotAndChanges.getReply(
ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion })));
}
++self->snapshotRequest;
TraceEvent(SevDebug, "ConfigConsumerGotSnapshotAndChanges", self->id)
.detail("SnapshotVersion", reply.snapshotVersion)

View File

@ -1980,8 +1980,8 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
TEST(useIPv6); // Use IPv6
TEST(!useIPv6); // Use IPv4
// TODO(renxuan): Use hostname 25% of the time, unless it is disabled
bool useHostname = false; // !testConfig.disableHostname && deterministicRandom()->random01() < 0.25;
// Use hostname 25% of the time, unless it is disabled
bool useHostname = !testConfig.disableHostname && deterministicRandom()->random01() < 0.25;
TEST(useHostname); // Use hostname
TEST(!useHostname); // Use IP address
NetworkAddressFromHostname fromHostname =

View File

@ -831,7 +831,8 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
}
}
for (auto& coordinator : coordinators.ccr->getConnectionString().coordinators()) {
std::vector<NetworkAddress> addressVec = wait(coordinators.ccr->getConnectionString().tryResolveHostnames());
for (const auto& coordinator : addressVec) {
roles.addCoordinatorRole(coordinator);
}
@ -1689,8 +1690,7 @@ static JsonBuilderObject configurationFetcher(Optional<DatabaseConfiguration> co
}
statusObj["excluded_servers"] = excludedServersArr;
}
std::vector<ClientLeaderRegInterface> coordinatorLeaderServers = coordinators.clientLeaderServers;
int count = coordinatorLeaderServers.size();
int count = coordinators.clientLeaderServers.size();
statusObj["coordinators_count"] = count;
} catch (Error&) {
incomplete_reasons->insert("Could not retrieve all configuration status information.");
@ -2505,7 +2505,8 @@ static JsonBuilderArray tlogFetcher(int* logFaultTolerance,
static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration configuration,
ServerCoordinators coordinators,
std::vector<WorkerDetails>& workers,
const std::vector<NetworkAddress>& coordinatorAddresses,
const std::vector<WorkerDetails>& workers,
int extraTlogEligibleZones,
int minStorageReplicasRemaining,
int oldLogFaultTolerance,
@ -2521,11 +2522,11 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi
int maxCoordinatorFailures = (coordinators.clientLeaderServers.size() - 1) / 2;
std::map<NetworkAddress, StringRef> workerZones;
for (auto& worker : workers) {
for (const auto& worker : workers) {
workerZones[worker.interf.address()] = worker.interf.locality.zoneId().orDefault(LiteralStringRef(""));
}
std::map<StringRef, int> coordinatorZoneCounts;
for (auto& coordinator : coordinators.ccr->getConnectionString().coordinators()) {
for (const auto& coordinator : coordinatorAddresses) {
auto zone = workerZones[coordinator];
coordinatorZoneCounts[zone] += 1;
}
@ -3061,6 +3062,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
wait(success(primaryDCFO));
std::vector<NetworkAddress> coordinatorAddresses =
wait(coordinators.ccr->getConnectionString().tryResolveHostnames());
int logFaultTolerance = 100;
if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) {
statusObj["logs"] = tlogFetcher(&logFaultTolerance, db, address_workers);
@ -3070,6 +3074,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
statusObj["fault_tolerance"] =
faultToleranceStatusFetcher(configuration.get(),
coordinators,
coordinatorAddresses,
workers,
extraTlogEligibleZones,
minStorageReplicasRemaining,

View File

@ -1908,7 +1908,7 @@ Optional<std::tuple<Version, Version, std::vector<TLogLockResult>>> TagPartition
int absent = logSet->logServers.size() - results.size();
int safe_range_begin = logSet->tLogWriteAntiQuorum;
int new_safe_range_begin = std::min(logSet->tLogWriteAntiQuorum, (int)(results.size() - 1));
int safe_range_end = logSet->tLogReplicationFactor - absent;
int safe_range_end = std::max(logSet->tLogReplicationFactor - absent, 1);
if (!lastEnd.present() || ((safe_range_end > 0) && (safe_range_end - 1 < results.size()) &&
results[safe_range_end - 1].end < lastEnd.get())) {

View File

@ -859,9 +859,9 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
NetworkAddressList publicNetworkAddresses;
NetworkAddressList listenNetworkAddresses;
connectionRecord.resolveHostnamesBlocking();
auto& coordinators = connectionRecord.getConnectionString().coordinators();
ASSERT(coordinators.size() > 0);
std::vector<Hostname>& hostnames = connectionRecord.getConnectionString().hostnames;
const std::vector<NetworkAddress>& coords = connectionRecord.getConnectionString().coordinators();
ASSERT(hostnames.size() + coords.size() > 0);
for (int ii = 0; ii < publicAddressStrs.size(); ++ii) {
const std::string& publicAddressStr = publicAddressStrs[ii];
@ -930,13 +930,26 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
listenNetworkAddresses.secondaryAddress = currentListenAddress;
}
bool hasSameCoord = std::all_of(coordinators.begin(), coordinators.end(), [&](const NetworkAddress& address) {
bool matchCoordinatorsTls = std::all_of(coords.begin(), coords.end(), [&](const NetworkAddress& address) {
if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) {
return address.isTLS() == currentPublicAddress.isTLS();
}
return true;
});
if (!hasSameCoord) {
// If true, further check hostnames.
if (matchCoordinatorsTls) {
matchCoordinatorsTls = std::all_of(hostnames.begin(), hostnames.end(), [&](Hostname& hostname) {
Optional<NetworkAddress> resolvedAddress = hostname.resolveBlocking();
if (resolvedAddress.present()) {
NetworkAddress address = resolvedAddress.get();
if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) {
return address.isTLS() == currentPublicAddress.isTLS();
}
}
return true;
});
}
if (!matchCoordinatorsTls) {
fprintf(stderr,
"ERROR: TLS state of public address %s does not match in coordinator list.\n",
publicAddressStr.c_str());

View File

@ -3455,7 +3455,8 @@ ACTOR Future<GetRangeReqAndResultRef> quickGetKeyValues(
tr.setVersion(version);
// TODO: is DefaultPromiseEndpoint the best priority for this?
tr.trState->taskID = TaskPriority::DefaultPromiseEndpoint;
Future<RangeResult> rangeResultFuture = tr.getRange(prefixRange(prefix), Snapshot::True);
Future<RangeResult> rangeResultFuture =
tr.getRange(prefixRange(prefix), GetRangeLimits::ROW_LIMIT_UNLIMITED, Snapshot::True);
// TODO: async in case it needs to read from other servers.
RangeResult rangeResult = wait(rangeResultFuture);
a->dependsOn(rangeResult.arena());
@ -5182,7 +5183,11 @@ ACTOR Future<Version> fetchChangeFeed(StorageServer* data,
++data->counters.kvSystemClearRanges;
changeFeedInfo->destroy(cleanupVersion);
data->changeFeedCleanupDurable[changeFeedInfo->id] = cleanupVersion;
if (data->uidChangeFeed.count(changeFeedInfo->id)) {
// only register range for cleanup if it has not been already cleaned up
data->changeFeedCleanupDurable[changeFeedInfo->id] = cleanupVersion;
}
for (auto& it : data->changeFeedRemovals) {
it.second.send(changeFeedInfo->id);

View File

@ -2977,21 +2977,40 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> result,
MonitorLeaderInfo info) {
state ClusterConnectionString ccf = info.intermediateConnRecord->getConnectionString();
state std::vector<NetworkAddress> addrs = ccf.coordinators();
ClusterConnectionString cs = info.intermediateConnRecord->getConnectionString();
state int coordinatorsSize = cs.hostnames.size() + cs.coordinators().size();
state ElectionResultRequest request;
state int index = 0;
state int successIndex = 0;
request.key = ccf.clusterKey();
request.coordinators = ccf.coordinators();
state std::vector<LeaderElectionRegInterface> leaderElectionServers;
deterministicRandom()->randomShuffle(addrs);
leaderElectionServers.reserve(coordinatorsSize);
for (const auto& h : cs.hostnames) {
leaderElectionServers.push_back(LeaderElectionRegInterface(h));
}
for (const auto& c : cs.coordinators()) {
leaderElectionServers.push_back(LeaderElectionRegInterface(c));
}
deterministicRandom()->randomShuffle(leaderElectionServers);
request.key = cs.clusterKey();
request.hostnames = cs.hostnames;
request.coordinators = cs.coordinators();
loop {
LeaderElectionRegInterface interf(addrs[index]);
LeaderElectionRegInterface interf = leaderElectionServers[index];
bool usingHostname = interf.hostname.present();
request.reply = ReplyPromise<Optional<LeaderInfo>>();
ErrorOr<Optional<LeaderInfo>> leader = wait(interf.electionResult.tryGetReply(request));
state ErrorOr<Optional<LeaderInfo>> leader;
if (usingHostname) {
wait(store(
leader,
tryGetReplyFromHostname(request, interf.hostname.get(), WLTOKEN_LEADERELECTIONREG_ELECTIONRESULT)));
} else {
wait(store(leader, interf.electionResult.tryGetReply(request)));
}
if (leader.present()) {
if (leader.get().present()) {
if (leader.get().get().forward) {
@ -3027,14 +3046,9 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
}
successIndex = index;
} else {
if (leader.isError() && leader.getError().code() == error_code_coordinators_changed) {
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
throw coordinators_changed();
}
index = (index + 1) % addrs.size();
index = (index + 1) % coordinatorsSize;
if (index == successIndex) {
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
throw coordinators_changed();
}
}
}
@ -3042,22 +3056,11 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
ACTOR Future<Void> monitorLeaderWithDelayedCandidacyImplInternal(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> outSerializedLeaderInfo) {
wait(connRecord->resolveHostnames());
state MonitorLeaderInfo info(connRecord);
loop {
try {
wait(info.intermediateConnRecord->resolveHostnames());
MonitorLeaderInfo _info =
wait(monitorLeaderWithDelayedCandidacyImplOneGeneration(connRecord, outSerializedLeaderInfo, info));
info = _info;
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
TraceEvent("MonitorLeaderWithDelayedCandidacyCoordinatorsChanged").suppressFor(1.0);
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
} else {
throw e;
}
}
MonitorLeaderInfo _info =
wait(monitorLeaderWithDelayedCandidacyImplOneGeneration(connRecord, outSerializedLeaderInfo, info));
info = _info;
}
}
@ -3191,6 +3194,7 @@ ACTOR Future<Void> fdbd(Reference<IClusterConnectionRecord> connRecord,
actors.push_back(serveProcess());
try {
ServerCoordinators coordinators(connRecord);
if (g_network->isSimulated()) {
whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,";
}

View File

@ -2096,7 +2096,8 @@ struct ConsistencyCheckWorkload : TestWorkload {
return false;
}
state ClusterConnectionString old(currentKey.get().toString());
ClusterConnectionString old(currentKey.get().toString());
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
std::vector<ProcessData> workers = wait(::getWorkers(&tr));
@ -2106,7 +2107,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
}
std::set<Optional<Standalone<StringRef>>> checkDuplicates;
for (const auto& addr : old.coordinators()) {
for (const auto& addr : oldCoordinators) {
auto findResult = addr_locality.find(addr);
if (findResult != addr_locality.end()) {
if (checkDuplicates.count(findResult->second.zoneId())) {

View File

@ -329,9 +329,7 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
for (int j = i; j < end; j++) {
if (deterministicRandom()->random01() < self->initialKeyDensity) {
Key key = self->getKeyForIndex(tenantNum, j);
if (key.size() <= (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
if (key.size() <= getMaxWriteKeySize(key, false)) {
Value value = self->getRandomValue();
value = value.substr(
0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT));
@ -1091,24 +1089,22 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
pos = littleEndian32(*(int32_t*)&value.end()[-4]);
}
contract = {
std::make_pair(error_code_key_too_large,
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))),
std::make_pair(error_code_value_too_large,
ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)),
std::make_pair(
error_code_invalid_mutation_type,
ExceptionContract::requiredIf(!isValidMutationType(op) || !isAtomicOp((MutationRef::Type)op))),
std::make_pair(error_code_key_outside_legal_range,
ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))),
std::make_pair(
error_code_client_invalid_operation,
ExceptionContract::requiredIf(
(op == MutationRef::SetVersionstampedKey && (pos < 0 || pos + 10 > key.size() - 4)) ||
(op == MutationRef::SetVersionstampedValue && (pos < 0 || pos + 10 > value.size() - 4))))
};
contract = { std::make_pair(error_code_key_too_large,
key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
: key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
: ExceptionContract::Never),
std::make_pair(error_code_value_too_large,
ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)),
std::make_pair(error_code_invalid_mutation_type,
ExceptionContract::requiredIf(!isValidMutationType(op) ||
!isAtomicOp((MutationRef::Type)op))),
std::make_pair(error_code_key_outside_legal_range,
ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))),
std::make_pair(error_code_client_invalid_operation,
ExceptionContract::requiredIf((op == MutationRef::SetVersionstampedKey &&
(pos < 0 || pos + 10 > key.size() - 4)) ||
(op == MutationRef::SetVersionstampedValue &&
(pos < 0 || pos + 10 > value.size() - 4)))) };
}
void callback(Reference<ITransaction> tr) override { tr->atomicOp(key, value, (FDBMutationTypes::Option)op); }
@ -1131,11 +1127,10 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
key = makeKey();
}
value = makeValue();
contract = { std::make_pair(
error_code_key_too_large,
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))),
contract = { std::make_pair(error_code_key_too_large,
key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
: key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
: ExceptionContract::Never),
std::make_pair(error_code_value_too_large,
ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)),
std::make_pair(error_code_key_outside_legal_range,
@ -1268,11 +1263,11 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
TestWatch(unsigned int id, FuzzApiCorrectnessWorkload* workload, Reference<ITransaction> tr)
: BaseTest(id, workload, "TestWatch") {
key = makeKey();
contract = { std::make_pair(
error_code_key_too_large,
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))),
printf("Watching: %d %s\n", key.size(), printable(key.substr(0, std::min(key.size(), 20))).c_str());
contract = { std::make_pair(error_code_key_too_large,
key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
: key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
: ExceptionContract::Never),
std::make_pair(error_code_watches_disabled, ExceptionContract::Possible),
std::make_pair(error_code_key_outside_legal_range,
ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))),

View File

@ -541,7 +541,12 @@ struct RemoveServersSafelyWorkload : TestWorkload {
state AddressExclusion coordExcl;
// Exclude a coordinator under buggify, but only if fault tolerance is > 0 and kill set is non-empty already
if (BUGGIFY && toKill.size()) {
std::vector<NetworkAddress> coordinators = wait(getCoordinators(cx));
Optional<ClusterConnectionString> csOptional = wait(getConnectionString(cx));
state std::vector<NetworkAddress> coordinators;
if (csOptional.present()) {
ClusterConnectionString cs = csOptional.get();
wait(store(coordinators, cs.tryResolveHostnames()));
}
if (coordinators.size() > 2) {
auto randomCoordinator = deterministicRandom()->randomChoice(coordinators);
coordExcl = AddressExclusion(randomCoordinator.ip, randomCoordinator.port);

View File

@ -957,9 +957,9 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
boost::split(
process_addresses, coordinator_processes_key.get().toString(), [](char c) { return c == ','; });
ASSERT(process_addresses.size() == cs.coordinators().size() + cs.hostnames.size());
wait(cs.resolveHostnames());
// compare the coordinator process network addresses one by one
for (const auto& network_address : cs.coordinators()) {
std::vector<NetworkAddress> coordinators = wait(cs.tryResolveHostnames());
for (const auto& network_address : coordinators) {
ASSERT(std::find(process_addresses.begin(), process_addresses.end(), network_address.toString()) !=
process_addresses.end());
}
@ -1077,19 +1077,20 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
Optional<Value> res = wait(tx->get(coordinatorsKey));
ASSERT(res.present()); // Otherwise, database is in a bad state
state ClusterConnectionString csNew(res.get().toString());
wait(csNew.resolveHostnames());
ASSERT(csNew.coordinators().size() == old_coordinators_processes.size() + 1);
ClusterConnectionString csNew(res.get().toString());
// verify the cluster decription
ASSERT(new_cluster_description == csNew.clusterKeyName().toString());
ASSERT(csNew.hostnames.size() + csNew.coordinators().size() ==
old_coordinators_processes.size() + 1);
std::vector<NetworkAddress> newCoordinators = wait(csNew.tryResolveHostnames());
// verify the coordinators' addresses
for (const auto& network_address : csNew.coordinators()) {
for (const auto& network_address : newCoordinators) {
std::string address_str = network_address.toString();
ASSERT(std::find(old_coordinators_processes.begin(),
old_coordinators_processes.end(),
address_str) != old_coordinators_processes.end() ||
new_coordinator_process == address_str);
}
// verify the cluster decription
ASSERT(new_cluster_description == csNew.clusterKeyName().toString());
tx->reset();
} catch (Error& e) {
wait(tx->onError(e));

View File

@ -653,9 +653,7 @@ struct WriteDuringReadWorkload : TestWorkload {
for (int j = i; j < end; j++) {
if (deterministicRandom()->random01() < self->initialKeyDensity) {
Key key = self->getKeyForIndex(j);
if (key.size() <= (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
if (key.size() <= getMaxWriteKeySize(key, false)) {
Value value = self->getRandomValue();
value =
value.substr(0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT));
@ -898,18 +896,10 @@ struct WriteDuringReadWorkload : TestWorkload {
tr.clear(range);
if (!noConflict) {
KeyRangeRef conflict(
range.begin.substr(0,
std::min<int>(range.begin.size(),
(range.begin.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)),
range.end.substr(0,
std::min<int>(range.end.size(),
(range.end.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)));
range.begin.substr(
0, std::min<int>(range.begin.size(), getMaxClearKeySize(range.begin) + 1)),
range.end.substr(
0, std::min<int>(range.end.size(), getMaxClearKeySize(range.end) + 1)));
self->addedConflicts.insert(conflict, true);
}
self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin),
@ -922,9 +912,7 @@ struct WriteDuringReadWorkload : TestWorkload {
if (noConflict)
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
tr.clear(key);
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
if (!noConflict && key.size() <= getMaxClearKeySize(key)) {
self->addedConflicts.insert(key, true);
}
self->memoryDatabase.erase(key);
@ -936,18 +924,9 @@ struct WriteDuringReadWorkload : TestWorkload {
//TraceEvent("WDRAddWriteConflict").detail("Range", range);
tr.addWriteConflictRange(range);
KeyRangeRef conflict(
range.begin.substr(0,
std::min<int>(range.begin.size(),
(range.begin.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)),
range.end.substr(0,
std::min<int>(range.end.size(),
(range.end.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)));
range.begin.substr(
0, std::min<int>(range.begin.size(), getMaxKeySize(range.begin) + 1)),
range.end.substr(0, std::min<int>(range.end.size(), getMaxKeySize(range.end) + 1)));
self->addedConflicts.insert(conflict, true);
} else if (operationType == 8 && !disableDelay) {
double maxTime = 6.0;
@ -991,18 +970,10 @@ struct WriteDuringReadWorkload : TestWorkload {
tr.atomicOp(versionStampKey, value, MutationRef::SetVersionstampedKey);
tr.clear(range);
KeyRangeRef conflict(
range.begin.substr(0,
std::min<int>(range.begin.size(),
(range.begin.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)),
range.end.substr(0,
std::min<int>(range.end.size(),
(range.end.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)));
range.begin.substr(
0, std::min<int>(range.begin.size(), getMaxClearKeySize(range.begin) + 1)),
range.end.substr(
0, std::min<int>(range.end.size(), getMaxClearKeySize(range.end) + 1)));
self->addedConflicts.insert(conflict, true);
self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin),
self->memoryDatabase.lower_bound(range.end));
@ -1043,10 +1014,9 @@ struct WriteDuringReadWorkload : TestWorkload {
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
tr.atomicOp(key, value, opType);
//TraceEvent("WDRAtomicOpSuccess").detail("Key", key).detail("Value", value.size());
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (!noConflict && key.size() <= getMaxWriteKeySize(key, false)) {
self->addedConflicts.insert(key, true);
}
Optional<Value> existing = self->memoryGet(&self->memoryDatabase, key);
self->memoryDatabase[key] =
self->applyAtomicOp(existing.present() ? Optional<StringRef>(existing.get())
@ -1063,10 +1033,9 @@ struct WriteDuringReadWorkload : TestWorkload {
if (noConflict)
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
tr.set(key, value);
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (!noConflict && key.size() <= getMaxWriteKeySize(key, false)) {
self->addedConflicts.insert(key, true);
}
//TraceEvent("WDRSetSuccess").detail("Key", key).detail("Value", value.size());
self->memoryDatabase[key] = value;
}

View File

@ -39,6 +39,9 @@
#include "flow/flow.h"
#include "flow/genericactors.actor.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/aes.h>
#include <openssl/engine.h>
#include <openssl/evp.h>

View File

@ -84,6 +84,10 @@ set(FLOW_SRCS
actorcompiler.h
crc32c.h
crc32c.cpp
ppc-asm.h
crc32.S
crc32_wrapper.h
crc32_wrapper.c
error_definitions.h
${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h
flat_buffers.cpp
@ -172,6 +176,10 @@ if(NOT WITH_TLS)
else()
target_link_libraries(flow PUBLIC OpenSSL::SSL)
target_link_libraries(flow_sampling PUBLIC OpenSSL::SSL)
if(USE_WOLFSSL)
target_include_directories(flow SYSTEM BEFORE PUBLIC ${WOLFSSL_INCLUDE_DIR}/wolfssl)
target_include_directories(flow_sampling SYSTEM BEFORE PUBLIC ${WOLFSSL_INCLUDE_DIR}/wolfssl)
endif()
endif()
target_link_libraries(flow PUBLIC Threads::Threads ${CMAKE_DL_LIBS})
target_link_libraries(flow_sampling PUBLIC Threads::Threads ${CMAKE_DL_LIBS})

View File

@ -29,6 +29,12 @@
#define BOOST_DATE_TIME_NO_LIB
#define BOOST_REGEX_NO_LIB
#include <boost/asio.hpp>
#ifndef TLS_DISABLED
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include "boost/asio/ssl.hpp"
#endif
#include <boost/date_time/posix_time/posix_time_types.hpp>
#include <boost/range.hpp>
#include <boost/algorithm/string/join.hpp>

View File

@ -32,6 +32,9 @@
#include "flow/FastRef.h"
#include "flow/flow.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/aes.h>
#include <openssl/evp.h>
#include <openssl/hmac.h>

View File

@ -38,6 +38,9 @@ void LoadedTLSConfig::print(FILE* fp) {
#include <exception>
#include <map>
#include <set>
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/objects.h>
#include <openssl/bio.h>
#include <openssl/err.h>

View File

@ -39,6 +39,9 @@
#ifndef TLS_DISABLED
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/x509.h>
typedef int NID;
@ -280,4 +283,4 @@ public:
" and format of CONSTRAINTS are plugin-specific.\n"
#include "flow/unactorcompiler.h"
#endif
#endif

791
flow/crc32.S Normal file
View File

@ -0,0 +1,791 @@
#if defined(__powerpc64__)
/*
* Calculate the checksum of data that is 16 byte aligned and a multiple of
* 16 bytes.
*
* The first step is to reduce it to 1024 bits. We do this in 8 parallel
* chunks in order to mask the latency of the vpmsum instructions. If we
* have more than 32 kB of data to checksum we repeat this step multiple
* times, passing in the previous 1024 bits.
*
* The next step is to reduce the 1024 bits to 64 bits. This step adds
* 32 bits of 0s to the end - this matches what a CRC does. We just
* calculate constants that land the data in this 32 bits.
*
* We then use fixed point Barrett reduction to compute a mod n over GF(2)
* for n = CRC using POWER8 instructions. We use x = 32.
*
* http://en.wikipedia.org/wiki/Barrett_reduction
*
* Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of either:
*
* a) the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version, or
* b) the Apache License, Version 2.0
*/
#if defined (__clang__)
#ifndef __ALTIVEC__
#define __ALTIVEC__
#endif
#include "ppc-asm.h"
#else
#include <ppc-asm.h>
#endif
#include "ppc-opcode.h"
#undef toc
#ifndef r1
#define r1 1
#endif
#ifndef r2
#define r2 2
#endif
.section .rodata
.balign 16
.byteswap_constant:
/* byte reverse permute constant */
.octa 0x0F0E0D0C0B0A09080706050403020100
#ifdef CRC32_CONSTANTS_HEADER
#include CRC32_CONSTANTS_HEADER
#else
#include "crc32_constants.h"
#endif
.text
#if defined(__BIG_ENDIAN__) && defined(REFLECT)
#define BYTESWAP_DATA
#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
#define BYTESWAP_DATA
#else
#undef BYTESWAP_DATA
#endif
#define off16 r25
#define off32 r26
#define off48 r27
#define off64 r28
#define off80 r29
#define off96 r30
#define off112 r31
#define const1 v24
#define const2 v25
#define byteswap v26
#define mask_32bit v27
#define mask_64bit v28
#define zeroes v29
#ifdef BYTESWAP_DATA
#define VPERM(A, B, C, D) vperm A, B, C, D
#else
#define VPERM(A, B, C, D)
#endif
#ifndef CRC32_FUNCTION_ASM
#define CRC32_FUNCTION_ASM __crc32_vpmsum
#endif
//clang-format off
/* unsigned int __crc32_vpmsum(unsigned int crc, void *p, unsigned long len) */
FUNC_START(CRC32_FUNCTION_ASM)
std r31,-8(r1)
std r30,-16(r1)
std r29,-24(r1)
std r28,-32(r1)
std r27,-40(r1)
std r26,-48(r1)
std r25,-56(r1)
li off16,16
li off32,32
li off48,48
li off64,64
li off80,80
li off96,96
li off112,112
li r0,0
/* Enough room for saving 10 non volatile VMX registers */
subi r6,r1,56+10*16
subi r7,r1,56+2*16
stvx v20,0,r6
stvx v21,off16,r6
stvx v22,off32,r6
stvx v23,off48,r6
stvx v24,off64,r6
stvx v25,off80,r6
stvx v26,off96,r6
stvx v27,off112,r6
stvx v28,0,r7
stvx v29,off16,r7
mr r10,r3
vxor zeroes,zeroes,zeroes
vspltisw v0,-1
vsldoi mask_32bit,zeroes,v0,4
vsldoi mask_64bit,zeroes,v0,8
/* Get the initial value into v8 */
vxor v8,v8,v8
MTVRD(v8, r3)
#ifdef REFLECT
vsldoi v8,zeroes,v8,8 /* shift into bottom 32 bits */
#else
vsldoi v8,v8,zeroes,4 /* shift into top 32 bits */
#endif
#ifdef BYTESWAP_DATA
addis r3,r2,.byteswap_constant@toc@ha
addi r3,r3,.byteswap_constant@toc@l
lvx byteswap,0,r3
addi r3,r3,16
#endif
cmpdi r5,256
blt .Lshort
rldicr r6,r5,0,56
/* Checksum in blocks of MAX_SIZE */
1: lis r7,MAX_SIZE@h
ori r7,r7,MAX_SIZE@l
mr r9,r7
cmpd r6,r7
bgt 2f
mr r7,r6
2: subf r6,r7,r6
/* our main loop does 128 bytes at a time */
srdi r7,r7,7
/*
* Work out the offset into the constants table to start at. Each
* constant is 16 bytes, and it is used against 128 bytes of input
* data - 128 / 16 = 8
*/
sldi r8,r7,4
srdi r9,r9,3
subf r8,r8,r9
/* We reduce our final 128 bytes in a separate step */
addi r7,r7,-1
mtctr r7
addis r3,r2,.constants@toc@ha
addi r3,r3,.constants@toc@l
/* Find the start of our constants */
add r3,r3,r8
/* zero v0-v7 which will contain our checksums */
vxor v0,v0,v0
vxor v1,v1,v1
vxor v2,v2,v2
vxor v3,v3,v3
vxor v4,v4,v4
vxor v5,v5,v5
vxor v6,v6,v6
vxor v7,v7,v7
lvx const1,0,r3
/*
* If we are looping back to consume more data we use the values
* already in v16-v23.
*/
cmpdi r0,1
beq 2f
/* First warm up pass */
lvx v16,0,r4
lvx v17,off16,r4
VPERM(v16,v16,v16,byteswap)
VPERM(v17,v17,v17,byteswap)
lvx v18,off32,r4
lvx v19,off48,r4
VPERM(v18,v18,v18,byteswap)
VPERM(v19,v19,v19,byteswap)
lvx v20,off64,r4
lvx v21,off80,r4
VPERM(v20,v20,v20,byteswap)
VPERM(v21,v21,v21,byteswap)
lvx v22,off96,r4
lvx v23,off112,r4
VPERM(v22,v22,v22,byteswap)
VPERM(v23,v23,v23,byteswap)
addi r4,r4,8*16
/* xor in initial value */
vxor v16,v16,v8
2: bdz .Lfirst_warm_up_done
addi r3,r3,16
lvx const2,0,r3
/* Second warm up pass */
VPMSUMD(v8,v16,const1)
lvx v16,0,r4
VPERM(v16,v16,v16,byteswap)
ori r2,r2,0
VPMSUMD(v9,v17,const1)
lvx v17,off16,r4
VPERM(v17,v17,v17,byteswap)
ori r2,r2,0
VPMSUMD(v10,v18,const1)
lvx v18,off32,r4
VPERM(v18,v18,v18,byteswap)
ori r2,r2,0
VPMSUMD(v11,v19,const1)
lvx v19,off48,r4
VPERM(v19,v19,v19,byteswap)
ori r2,r2,0
VPMSUMD(v12,v20,const1)
lvx v20,off64,r4
VPERM(v20,v20,v20,byteswap)
ori r2,r2,0
VPMSUMD(v13,v21,const1)
lvx v21,off80,r4
VPERM(v21,v21,v21,byteswap)
ori r2,r2,0
VPMSUMD(v14,v22,const1)
lvx v22,off96,r4
VPERM(v22,v22,v22,byteswap)
ori r2,r2,0
VPMSUMD(v15,v23,const1)
lvx v23,off112,r4
VPERM(v23,v23,v23,byteswap)
addi r4,r4,8*16
bdz .Lfirst_cool_down
/*
* main loop. We modulo schedule it such that it takes three iterations
* to complete - first iteration load, second iteration vpmsum, third
* iteration xor.
*/
.balign 16
4: lvx const1,0,r3
addi r3,r3,16
ori r2,r2,0
vxor v0,v0,v8
VPMSUMD(v8,v16,const2)
lvx v16,0,r4
VPERM(v16,v16,v16,byteswap)
ori r2,r2,0
vxor v1,v1,v9
VPMSUMD(v9,v17,const2)
lvx v17,off16,r4
VPERM(v17,v17,v17,byteswap)
ori r2,r2,0
vxor v2,v2,v10
VPMSUMD(v10,v18,const2)
lvx v18,off32,r4
VPERM(v18,v18,v18,byteswap)
ori r2,r2,0
vxor v3,v3,v11
VPMSUMD(v11,v19,const2)
lvx v19,off48,r4
VPERM(v19,v19,v19,byteswap)
lvx const2,0,r3
ori r2,r2,0
vxor v4,v4,v12
VPMSUMD(v12,v20,const1)
lvx v20,off64,r4
VPERM(v20,v20,v20,byteswap)
ori r2,r2,0
vxor v5,v5,v13
VPMSUMD(v13,v21,const1)
lvx v21,off80,r4
VPERM(v21,v21,v21,byteswap)
ori r2,r2,0
vxor v6,v6,v14
VPMSUMD(v14,v22,const1)
lvx v22,off96,r4
VPERM(v22,v22,v22,byteswap)
ori r2,r2,0
vxor v7,v7,v15
VPMSUMD(v15,v23,const1)
lvx v23,off112,r4
VPERM(v23,v23,v23,byteswap)
addi r4,r4,8*16
bdnz 4b
.Lfirst_cool_down:
/* First cool down pass */
lvx const1,0,r3
addi r3,r3,16
vxor v0,v0,v8
VPMSUMD(v8,v16,const1)
ori r2,r2,0
vxor v1,v1,v9
VPMSUMD(v9,v17,const1)
ori r2,r2,0
vxor v2,v2,v10
VPMSUMD(v10,v18,const1)
ori r2,r2,0
vxor v3,v3,v11
VPMSUMD(v11,v19,const1)
ori r2,r2,0
vxor v4,v4,v12
VPMSUMD(v12,v20,const1)
ori r2,r2,0
vxor v5,v5,v13
VPMSUMD(v13,v21,const1)
ori r2,r2,0
vxor v6,v6,v14
VPMSUMD(v14,v22,const1)
ori r2,r2,0
vxor v7,v7,v15
VPMSUMD(v15,v23,const1)
ori r2,r2,0
.Lsecond_cool_down:
/* Second cool down pass */
vxor v0,v0,v8
vxor v1,v1,v9
vxor v2,v2,v10
vxor v3,v3,v11
vxor v4,v4,v12
vxor v5,v5,v13
vxor v6,v6,v14
vxor v7,v7,v15
#ifdef REFLECT
/*
* vpmsumd produces a 96 bit result in the least significant bits
* of the register. Since we are bit reflected we have to shift it
* left 32 bits so it occupies the least significant bits in the
* bit reflected domain.
*/
vsldoi v0,v0,zeroes,4
vsldoi v1,v1,zeroes,4
vsldoi v2,v2,zeroes,4
vsldoi v3,v3,zeroes,4
vsldoi v4,v4,zeroes,4
vsldoi v5,v5,zeroes,4
vsldoi v6,v6,zeroes,4
vsldoi v7,v7,zeroes,4
#endif
/* xor with last 1024 bits */
lvx v8,0,r4
lvx v9,off16,r4
VPERM(v8,v8,v8,byteswap)
VPERM(v9,v9,v9,byteswap)
lvx v10,off32,r4
lvx v11,off48,r4
VPERM(v10,v10,v10,byteswap)
VPERM(v11,v11,v11,byteswap)
lvx v12,off64,r4
lvx v13,off80,r4
VPERM(v12,v12,v12,byteswap)
VPERM(v13,v13,v13,byteswap)
lvx v14,off96,r4
lvx v15,off112,r4
VPERM(v14,v14,v14,byteswap)
VPERM(v15,v15,v15,byteswap)
addi r4,r4,8*16
vxor v16,v0,v8
vxor v17,v1,v9
vxor v18,v2,v10
vxor v19,v3,v11
vxor v20,v4,v12
vxor v21,v5,v13
vxor v22,v6,v14
vxor v23,v7,v15
li r0,1
cmpdi r6,0
addi r6,r6,128
bne 1b
/* Work out how many bytes we have left */
andi. r5,r5,127
/* Calculate where in the constant table we need to start */
subfic r6,r5,128
add r3,r3,r6
/* How many 16 byte chunks are in the tail */
srdi r7,r5,4
mtctr r7
/*
* Reduce the previously calculated 1024 bits to 64 bits, shifting
* 32 bits to include the trailing 32 bits of zeros
*/
lvx v0,0,r3
lvx v1,off16,r3
lvx v2,off32,r3
lvx v3,off48,r3
lvx v4,off64,r3
lvx v5,off80,r3
lvx v6,off96,r3
lvx v7,off112,r3
addi r3,r3,8*16
VPMSUMW(v0,v16,v0)
VPMSUMW(v1,v17,v1)
VPMSUMW(v2,v18,v2)
VPMSUMW(v3,v19,v3)
VPMSUMW(v4,v20,v4)
VPMSUMW(v5,v21,v5)
VPMSUMW(v6,v22,v6)
VPMSUMW(v7,v23,v7)
/* Now reduce the tail (0 - 112 bytes) */
cmpdi r7,0
beq 1f
lvx v16,0,r4
lvx v17,0,r3
VPERM(v16,v16,v16,byteswap)
VPMSUMW(v16,v16,v17)
vxor v0,v0,v16
bdz 1f
lvx v16,off16,r4
lvx v17,off16,r3
VPERM(v16,v16,v16,byteswap)
VPMSUMW(v16,v16,v17)
vxor v0,v0,v16
bdz 1f
lvx v16,off32,r4
lvx v17,off32,r3
VPERM(v16,v16,v16,byteswap)
VPMSUMW(v16,v16,v17)
vxor v0,v0,v16
bdz 1f
lvx v16,off48,r4
lvx v17,off48,r3
VPERM(v16,v16,v16,byteswap)
VPMSUMW(v16,v16,v17)
vxor v0,v0,v16
bdz 1f
lvx v16,off64,r4
lvx v17,off64,r3
VPERM(v16,v16,v16,byteswap)
VPMSUMW(v16,v16,v17)
vxor v0,v0,v16
bdz 1f
lvx v16,off80,r4
lvx v17,off80,r3
VPERM(v16,v16,v16,byteswap)
VPMSUMW(v16,v16,v17)
vxor v0,v0,v16
bdz 1f
lvx v16,off96,r4
lvx v17,off96,r3
VPERM(v16,v16,v16,byteswap)
VPMSUMW(v16,v16,v17)
vxor v0,v0,v16
/* Now xor all the parallel chunks together */
1: vxor v0,v0,v1
vxor v2,v2,v3
vxor v4,v4,v5
vxor v6,v6,v7
vxor v0,v0,v2
vxor v4,v4,v6
vxor v0,v0,v4
.Lbarrett_reduction:
/* Barrett constants */
addis r3,r2,.barrett_constants@toc@ha
addi r3,r3,.barrett_constants@toc@l
lvx const1,0,r3
lvx const2,off16,r3
vsldoi v1,v0,v0,8
vxor v0,v0,v1 /* xor two 64 bit results together */
#ifdef REFLECT
/* shift left one bit */
vspltisb v1,1
vsl v0,v0,v1
#endif
vand v0,v0,mask_64bit
#ifndef REFLECT
/*
* Now for the Barrett reduction algorithm. The idea is to calculate q,
* the multiple of our polynomial that we need to subtract. By
* doing the computation 2x bits higher (ie 64 bits) and shifting the
* result back down 2x bits, we round down to the nearest multiple.
*/
VPMSUMD(v1,v0,const1) /* ma */
vsldoi v1,zeroes,v1,8 /* q = floor(ma/(2^64)) */
VPMSUMD(v1,v1,const2) /* qn */
vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
/*
* Get the result into r3. We need to shift it left 8 bytes:
* V0 [ 0 1 2 X ]
* V0 [ 0 X 2 3 ]
*/
vsldoi v0,v0,zeroes,8 /* shift result into top 64 bits */
#else
/*
* The reflected version of Barrett reduction. Instead of bit
* reflecting our data (which is expensive to do), we bit reflect our
* constants and our algorithm, which means the intermediate data in
* our vector registers goes from 0-63 instead of 63-0. We can reflect
* the algorithm because we don't carry in mod 2 arithmetic.
*/
vand v1,v0,mask_32bit /* bottom 32 bits of a */
VPMSUMD(v1,v1,const1) /* ma */
vand v1,v1,mask_32bit /* bottom 32bits of ma */
VPMSUMD(v1,v1,const2) /* qn */
vxor v0,v0,v1 /* a - qn, subtraction is xor in GF(2) */
/*
* Since we are bit reflected, the result (ie the low 32 bits) is in
* the high 32 bits. We just need to shift it left 4 bytes
* V0 [ 0 1 X 3 ]
* V0 [ 0 X 2 3 ]
*/
vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
#endif
/* Get it into r3 */
MFVRD(r3, v0)
.Lout:
subi r6,r1,56+10*16
subi r7,r1,56+2*16
lvx v20,0,r6
lvx v21,off16,r6
lvx v22,off32,r6
lvx v23,off48,r6
lvx v24,off64,r6
lvx v25,off80,r6
lvx v26,off96,r6
lvx v27,off112,r6
lvx v28,0,r7
lvx v29,off16,r7
ld r31,-8(r1)
ld r30,-16(r1)
ld r29,-24(r1)
ld r28,-32(r1)
ld r27,-40(r1)
ld r26,-48(r1)
ld r25,-56(r1)
blr
.Lfirst_warm_up_done:
lvx const1,0,r3
addi r3,r3,16
VPMSUMD(v8,v16,const1)
VPMSUMD(v9,v17,const1)
VPMSUMD(v10,v18,const1)
VPMSUMD(v11,v19,const1)
VPMSUMD(v12,v20,const1)
VPMSUMD(v13,v21,const1)
VPMSUMD(v14,v22,const1)
VPMSUMD(v15,v23,const1)
b .Lsecond_cool_down
.Lshort:
cmpdi r5,0
beq .Lzero
addis r3,r2,.short_constants@toc@ha
addi r3,r3,.short_constants@toc@l
/* Calculate where in the constant table we need to start */
subfic r6,r5,256
add r3,r3,r6
/* How many 16 byte chunks? */
srdi r7,r5,4
mtctr r7
vxor v19,v19,v19
vxor v20,v20,v20
lvx v0,0,r4
lvx v16,0,r3
VPERM(v0,v0,v16,byteswap)
vxor v0,v0,v8 /* xor in initial value */
VPMSUMW(v0,v0,v16)
bdz .Lv0
lvx v1,off16,r4
lvx v17,off16,r3
VPERM(v1,v1,v17,byteswap)
VPMSUMW(v1,v1,v17)
bdz .Lv1
lvx v2,off32,r4
lvx v16,off32,r3
VPERM(v2,v2,v16,byteswap)
VPMSUMW(v2,v2,v16)
bdz .Lv2
lvx v3,off48,r4
lvx v17,off48,r3
VPERM(v3,v3,v17,byteswap)
VPMSUMW(v3,v3,v17)
bdz .Lv3
lvx v4,off64,r4
lvx v16,off64,r3
VPERM(v4,v4,v16,byteswap)
VPMSUMW(v4,v4,v16)
bdz .Lv4
lvx v5,off80,r4
lvx v17,off80,r3
VPERM(v5,v5,v17,byteswap)
VPMSUMW(v5,v5,v17)
bdz .Lv5
lvx v6,off96,r4
lvx v16,off96,r3
VPERM(v6,v6,v16,byteswap)
VPMSUMW(v6,v6,v16)
bdz .Lv6
lvx v7,off112,r4
lvx v17,off112,r3
VPERM(v7,v7,v17,byteswap)
VPMSUMW(v7,v7,v17)
bdz .Lv7
addi r3,r3,128
addi r4,r4,128
lvx v8,0,r4
lvx v16,0,r3
VPERM(v8,v8,v16,byteswap)
VPMSUMW(v8,v8,v16)
bdz .Lv8
lvx v9,off16,r4
lvx v17,off16,r3
VPERM(v9,v9,v17,byteswap)
VPMSUMW(v9,v9,v17)
bdz .Lv9
lvx v10,off32,r4
lvx v16,off32,r3
VPERM(v10,v10,v16,byteswap)
VPMSUMW(v10,v10,v16)
bdz .Lv10
lvx v11,off48,r4
lvx v17,off48,r3
VPERM(v11,v11,v17,byteswap)
VPMSUMW(v11,v11,v17)
bdz .Lv11
lvx v12,off64,r4
lvx v16,off64,r3
VPERM(v12,v12,v16,byteswap)
VPMSUMW(v12,v12,v16)
bdz .Lv12
lvx v13,off80,r4
lvx v17,off80,r3
VPERM(v13,v13,v17,byteswap)
VPMSUMW(v13,v13,v17)
bdz .Lv13
lvx v14,off96,r4
lvx v16,off96,r3
VPERM(v14,v14,v16,byteswap)
VPMSUMW(v14,v14,v16)
bdz .Lv14
lvx v15,off112,r4
lvx v17,off112,r3
VPERM(v15,v15,v17,byteswap)
VPMSUMW(v15,v15,v17)
.Lv15: vxor v19,v19,v15
.Lv14: vxor v20,v20,v14
.Lv13: vxor v19,v19,v13
.Lv12: vxor v20,v20,v12
.Lv11: vxor v19,v19,v11
.Lv10: vxor v20,v20,v10
.Lv9: vxor v19,v19,v9
.Lv8: vxor v20,v20,v8
.Lv7: vxor v19,v19,v7
.Lv6: vxor v20,v20,v6
.Lv5: vxor v19,v19,v5
.Lv4: vxor v20,v20,v4
.Lv3: vxor v19,v19,v3
.Lv2: vxor v20,v20,v2
.Lv1: vxor v19,v19,v1
.Lv0: vxor v20,v20,v0
vxor v0,v19,v20
b .Lbarrett_reduction
.Lzero:
mr r3,r10
b .Lout
FUNC_END(CRC32_FUNCTION_ASM)
#endif

1994
flow/crc32_constants.h Normal file

File diff suppressed because it is too large Load Diff

81
flow/crc32_wrapper.c Normal file
View File

@ -0,0 +1,81 @@
/*
* Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of either:
*
* a) the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version, or
* b) the Apache License, Version 2.0
*/
#define CRC_TABLE
#ifdef CRC32_CONSTANTS_HEADER
#include CRC32_CONSTANTS_HEADER
#else
#include "crc32_constants.h"
#endif
#define VMX_ALIGN 16
#define VMX_ALIGN_MASK (VMX_ALIGN - 1)
#ifdef REFLECT
static unsigned int crc32_align(unsigned int crc, unsigned char* p, unsigned long len) {
while (len--)
crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8);
return crc;
}
#else
static unsigned int crc32_align(unsigned int crc, unsigned char* p, unsigned long len) {
while (len--)
crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8);
return crc;
}
#endif
#ifndef CRC32_FUNCTION
#define CRC32_FUNCTION crc32_vpmsum
#endif
#ifndef CRC32_FUNCTION_ASM
#define CRC32_FUNCTION_ASM __crc32_vpmsum
#endif
unsigned int CRC32_FUNCTION_ASM(unsigned int crc, unsigned char* p, unsigned long len);
unsigned int CRC32_FUNCTION(unsigned int crc, unsigned char* p, unsigned long len) {
unsigned int prealign;
unsigned int tail;
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif
if (len < VMX_ALIGN + VMX_ALIGN_MASK) {
crc = crc32_align(crc, p, len);
goto out;
}
if ((unsigned long)p & VMX_ALIGN_MASK) {
prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
crc = crc32_align(crc, p, prealign);
len -= prealign;
p += prealign;
}
crc = CRC32_FUNCTION_ASM(crc, p, len & ~VMX_ALIGN_MASK);
tail = len & VMX_ALIGN_MASK;
if (tail) {
p += len & ~VMX_ALIGN_MASK;
crc = crc32_align(crc, p, tail);
}
out:
#ifdef CRC_XOR
crc ^= 0xffffffff;
#endif
return crc;
}

33
flow/crc32_wrapper.h Normal file
View File

@ -0,0 +1,33 @@
/*
* crc32_wrapper.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLOW_CRC32_WRAPPER_H
#define FLOW_CRC32_WRAPPER_H
#pragma once
#ifdef __powerpc64__
extern "C" {
unsigned int crc32_vpmsum(unsigned int crc, unsigned char* p, unsigned long len);
}
#endif // powerpc64
#endif

View File

@ -266,13 +266,28 @@ append_hw(uint32_t crc, const uint8_t* buf, size_t len) {
}
#endif
#ifdef __powerpc64__
#include "crc32_wrapper.h"
#ifndef CRC32_FUNCTION
#define CRC32_FUNCTION crc32_vpmsum
#endif
uint32_t ppc_hw(uint32_t crc, const uint8_t* input, size_t length) {
return CRC32_FUNCTION(0, (unsigned char*)input, (unsigned long)length);
}
#endif
static bool hw_available = platform::isHwCrcSupported();
extern "C" uint32_t crc32c_append(uint32_t crc, const uint8_t* input, size_t length) {
#ifndef __powerpc64__
if (hw_available)
return append_hw(crc, input, length);
else
if (hw_available) {
#ifdef __powerpc64__
return ppc_hw(crc, input, length);
#endif
#ifndef __powerpc64__
return append_hw(crc, input, length);
#endif
} else
return append_table(crc, input, length);
}

View File

@ -230,6 +230,7 @@ ERROR( invalid_tenant_name, 2134, "Tenant name cannot begin with \\xff");
ERROR( tenant_prefix_allocator_conflict, 2135, "The database already has keys stored at the prefix allocated for the tenant");
ERROR( tenants_disabled, 2136, "Tenants have been disabled in the cluster");
ERROR( unknown_tenant, 2137, "Tenant is not available from this server")
ERROR( illegal_tenant_access, 2138, "Illegal tenant access")
// 2200 - errors from bindings and official APIs
ERROR( api_version_unset, 2200, "API version is not set" )

View File

@ -30,9 +30,6 @@
#include <variant>
#include <atomic>
#include "boost/asio.hpp"
#ifndef TLS_DISABLED
#include "boost/asio/ssl.hpp"
#endif
#include "flow/Arena.h"
#include "flow/BooleanParam.h"
#include "flow/IRandom.h"

366
flow/ppc-asm.h Normal file
View File

@ -0,0 +1,366 @@
/* PowerPC asm definitions for GNU C.
Copyright (C) 2002-2017 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* Under winnt, 1) gas supports the following as names and 2) in particular
defining "toc" breaks the FUNC_START macro as ".toc" becomes ".2" */
#define r0 0
#define sp 1
#define toc 2
#define r3 3
#define r4 4
#define r5 5
#define r6 6
#define r7 7
#define r8 8
#define r9 9
#define r10 10
#define r11 11
#define r12 12
#define r13 13
#define r14 14
#define r15 15
#define r16 16
#define r17 17
#define r18 18
#define r19 19
#define r20 20
#define r21 21
#define r22 22
#define r23 23
#define r24 24
#define r25 25
#define r26 26
#define r27 27
#define r28 28
#define r29 29
#define r30 30
#define r31 31
#define cr0 0
#define cr1 1
#define cr2 2
#define cr3 3
#define cr4 4
#define cr5 5
#define cr6 6
#define cr7 7
#define f0 0
#define f1 1
#define f2 2
#define f3 3
#define f4 4
#define f5 5
#define f6 6
#define f7 7
#define f8 8
#define f9 9
#define f10 10
#define f11 11
#define f12 12
#define f13 13
#define f14 14
#define f15 15
#define f16 16
#define f17 17
#define f18 18
#define f19 19
#define f20 20
#define f21 21
#define f22 22
#define f23 23
#define f24 24
#define f25 25
#define f26 26
#define f27 27
#define f28 28
#define f29 29
#define f30 30
#define f31 31
#ifdef __VSX__
#define f32 32
#define f33 33
#define f34 34
#define f35 35
#define f36 36
#define f37 37
#define f38 38
#define f39 39
#define f40 40
#define f41 41
#define f42 42
#define f43 43
#define f44 44
#define f45 45
#define f46 46
#define f47 47
#define f48 48
#define f49 49
#define f50 30
#define f51 51
#define f52 52
#define f53 53
#define f54 54
#define f55 55
#define f56 56
#define f57 57
#define f58 58
#define f59 59
#define f60 60
#define f61 61
#define f62 62
#define f63 63
#endif
#ifdef __ALTIVEC__
#define v0 0
#define v1 1
#define v2 2
#define v3 3
#define v4 4
#define v5 5
#define v6 6
#define v7 7
#define v8 8
#define v9 9
#define v10 10
#define v11 11
#define v12 12
#define v13 13
#define v14 14
#define v15 15
#define v16 16
#define v17 17
#define v18 18
#define v19 19
#define v20 20
#define v21 21
#define v22 22
#define v23 23
#define v24 24
#define v25 25
#define v26 26
#define v27 27
#define v28 28
#define v29 29
#define v30 30
#define v31 31
#endif
#ifdef __VSX__
#define vs0 0
#define vs1 1
#define vs2 2
#define vs3 3
#define vs4 4
#define vs5 5
#define vs6 6
#define vs7 7
#define vs8 8
#define vs9 9
#define vs10 10
#define vs11 11
#define vs12 12
#define vs13 13
#define vs14 14
#define vs15 15
#define vs16 16
#define vs17 17
#define vs18 18
#define vs19 19
#define vs20 20
#define vs21 21
#define vs22 22
#define vs23 23
#define vs24 24
#define vs25 25
#define vs26 26
#define vs27 27
#define vs28 28
#define vs29 29
#define vs30 30
#define vs31 31
#define vs32 32
#define vs33 33
#define vs34 34
#define vs35 35
#define vs36 36
#define vs37 37
#define vs38 38
#define vs39 39
#define vs40 40
#define vs41 41
#define vs42 42
#define vs43 43
#define vs44 44
#define vs45 45
#define vs46 46
#define vs47 47
#define vs48 48
#define vs49 49
#define vs50 30
#define vs51 51
#define vs52 52
#define vs53 53
#define vs54 54
#define vs55 55
#define vs56 56
#define vs57 57
#define vs58 58
#define vs59 59
#define vs60 60
#define vs61 61
#define vs62 62
#define vs63 63
#endif
/*
* Macros to glue together two tokens.
*/
#ifdef __STDC__
#define XGLUE(a, b) a##b
#else
#define XGLUE(a, b) a /**/ b
#endif
#define GLUE(a, b) XGLUE(a, b)
/*
* Macros to begin and end a function written in assembler. If -mcall-aixdesc
* or -mcall-nt, create a function descriptor with the given name, and create
* the real function with one or two leading periods respectively.
*/
#if defined(__powerpc64__) && _CALL_ELF == 2
/* Defining "toc" above breaks @toc in assembler code. */
#undef toc
#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
#define JUMP_TARGET(name) FUNC_NAME(name)
#define FUNC_START(name) \
.type FUNC_NAME(name), @function; \
.globl FUNC_NAME(name); \
FUNC_NAME(name) : 0 : addis 2, 12, (.TOC.- 0b) @ha; \
addi 2, 2, (.TOC.- 0b) @l; \
.localentry FUNC_NAME(name), .- FUNC_NAME(name)
#define HIDDEN_FUNC(name) FUNC_START(name).hidden FUNC_NAME(name);
#define FUNC_END(name) .size FUNC_NAME(name), .- FUNC_NAME(name)
#elif defined(__powerpc64__)
#define FUNC_NAME(name) GLUE(., name)
#define JUMP_TARGET(name) FUNC_NAME(name)
#define FUNC_START(name) \
.section ".opd", "aw"; \
name: \
.quad GLUE(., name); \
.quad.TOC.@tocbase; \
.quad 0; \
.previous; \
.type GLUE(., name), @function; \
.globl name; \
.globl GLUE(., name); \
GLUE(., name) :
#define HIDDEN_FUNC(name) \
FUNC_START(name).hidden name; \
.hidden GLUE(., name);
#define FUNC_END(name) GLUE(.L, name) :.size GLUE(., name), GLUE(.L, name) - GLUE(., name)
#elif defined(_CALL_AIXDESC)
#ifdef _RELOCATABLE
#define DESC_SECTION ".got2"
#else
#define DESC_SECTION ".got1"
#endif
#define FUNC_NAME(name) GLUE(., name)
#define JUMP_TARGET(name) FUNC_NAME(name)
#define FUNC_START(name) \
.section DESC_SECTION, "aw"; \
name: \
.long GLUE(., name); \
.long _GLOBAL_OFFSET_TABLE_; \
.long 0; \
.previous; \
.type GLUE(., name), @function; \
.globl name; \
.globl GLUE(., name); \
GLUE(., name) :
#define HIDDEN_FUNC(name) \
FUNC_START(name).hidden name; \
.hidden GLUE(., name);
#define FUNC_END(name) GLUE(.L, name) :.size GLUE(., name), GLUE(.L, name) - GLUE(., name)
#else
#define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
#if defined __PIC__ || defined __pic__
#define JUMP_TARGET(name) FUNC_NAME(name @plt)
#else
#define JUMP_TARGET(name) FUNC_NAME(name)
#endif
#define FUNC_START(name) \
.type FUNC_NAME(name), @function; \
.globl FUNC_NAME(name); \
FUNC_NAME(name) :
#define HIDDEN_FUNC(name) FUNC_START(name).hidden FUNC_NAME(name);
#define FUNC_END(name) GLUE(.L, name) :.size FUNC_NAME(name), GLUE(.L, name) - FUNC_NAME(name)
#endif
#ifdef IN_GCC
/* For HAVE_GAS_CFI_DIRECTIVE. */
#include "auto-host.h"
#ifdef HAVE_GAS_CFI_DIRECTIVE
#define CFI_STARTPROC .cfi_startproc
#define CFI_ENDPROC .cfi_endproc
#define CFI_OFFSET(reg, off) .cfi_offset reg, off
#define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
#define CFI_RESTORE(reg) .cfi_restore reg
#else
#define CFI_STARTPROC
#define CFI_ENDPROC
#define CFI_OFFSET(reg, off)
#define CFI_DEF_CFA_REGISTER(reg)
#define CFI_RESTORE(reg)
#endif
#endif
#if defined __linux__
.section.note.GNU - stack.previous
#endif

35
flow/ppc-opcode.h Normal file
View File

@ -0,0 +1,35 @@
/*
* Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of either:
*
* a) the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version, or
* b) the Apache License, Version 2.0
*/
#ifndef __OPCODES_H
#define __OPCODES_H
#define __PPC_RA(a) (((a)&0x1f) << 16)
#define __PPC_RB(b) (((b)&0x1f) << 11)
#define __PPC_XA(a) ((((a)&0x1f) << 16) | (((a)&0x20) >> 3))
#define __PPC_XB(b) ((((b)&0x1f) << 11) | (((b)&0x20) >> 4))
#define __PPC_XS(s) ((((s)&0x1f) << 21) | (((s)&0x20) >> 5))
#define __PPC_XT(s) __PPC_XS(s)
#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
#define PPC_INST_VPMSUMW 0x10000488
#define PPC_INST_VPMSUMD 0x100004c8
#define PPC_INST_MFVSRD 0x7c000066
#define PPC_INST_MTVSRD 0x7c000166
#define VPMSUMW(t, a, b) .long PPC_INST_VPMSUMW | VSX_XX3((t), a, b)
#define VPMSUMD(t, a, b) .long PPC_INST_VPMSUMD | VSX_XX3((t), a, b)
#define MFVRD(a, t) .long PPC_INST_MFVSRD | VSX_XX1((t) + 32, a, 0)
#define MTVRD(t, a) .long PPC_INST_MTVSRD | VSX_XX1((t) + 32, a, 0)
#endif

View File

@ -254,7 +254,8 @@ push_docker_images="false"
if [ -n "${OKTETO_NAMESPACE+x}" ]; then
logg "RUNNING IN OKTETO/AWS"
# these are defaults for the Apple development environment
aws_region=$(curl -s "http://169.254.169.254/latest/meta-data/placement/region")
imdsv2_token=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
aws_region=$(curl -H "X-aws-ec2-metadata-token: ${imdsv2_token}" "http://169.254.169.254/latest/meta-data/placement/region")
aws_account_id=$(aws --output text sts get-caller-identity --query 'Account')
build_output_directory="${HOME}/build_output"
fdb_library_versions=( "${fdb_version}" )

View File

@ -221,8 +221,8 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES rare/StatusBuilderPerf.toml)
add_fdb_test(TEST_FILES rare/TLogVersionMessagesOverheadFactor.toml)
add_fdb_test(
TEST_FILES restarting/from_7.0.0/SnapIncrementalRestore-1.txt
restarting/from_7.0.0/SnapIncrementalRestore-2.txt)
TEST_FILES restarting/from_7.1.0/SnapIncrementalRestore-1.txt
restarting/from_7.1.0/SnapIncrementalRestore-2.txt)
add_fdb_test(
TEST_FILES restarting/from_7.1.0/ConfigureTestRestart-1.toml
restarting/from_7.1.0/ConfigureTestRestart-2.toml)
@ -257,17 +257,17 @@ if(WITH_PYTHON)
TEST_FILES restarting/to_7.1.0/CycleTestRestart-1.txt
restarting/to_7.1.0/CycleTestRestart-2.txt)
add_fdb_test(
TEST_FILES restarting/from_7.0.0/SnapTestAttrition-1.txt
restarting/from_7.0.0/SnapTestAttrition-2.txt)
TEST_FILES restarting/from_7.1.0/SnapTestAttrition-1.txt
restarting/from_7.1.0/SnapTestAttrition-2.txt)
add_fdb_test(
TEST_FILES restarting/from_7.0.0/SnapTestSimpleRestart-1.txt
restarting/from_7.0.0/SnapTestSimpleRestart-2.txt)
TEST_FILES restarting/from_7.1.0/SnapTestSimpleRestart-1.txt
restarting/from_7.1.0/SnapTestSimpleRestart-2.txt)
add_fdb_test(
TEST_FILES restarting/from_7.0.0/SnapTestRestart-1.txt
restarting/from_7.0.0/SnapTestRestart-2.txt)
TEST_FILES restarting/from_7.1.0/SnapTestRestart-1.txt
restarting/from_7.1.0/SnapTestRestart-2.txt)
add_fdb_test(
TEST_FILES restarting/from_7.0.0/SnapCycleRestart-1.txt
restarting/from_7.0.0/SnapCycleRestart-2.txt)
TEST_FILES restarting/from_7.1.0/SnapCycleRestart-1.txt
restarting/from_7.1.0/SnapCycleRestart-2.txt)
add_fdb_test(
TEST_FILES restarting/to_7.1.0/ConfigureStorageMigrationTestRestart-1.toml
restarting/to_7.1.0/ConfigureStorageMigrationTestRestart-2.toml)

View File

@ -9,29 +9,28 @@ class TestDirectory:
self.builddir = builddir
def get_test_root(self):
root = os.path.join(self.builddir, 'test_runs')
root = os.path.join(self.builddir, "test_runs")
if not os.path.exists(root):
os.mkdir(root)
return root
def create_new_test_dir(self):
t = self.get_test_root()
ts = datetime.now().strftime('%Y_%m_%d__%H_%M_%S')
ts = datetime.now().strftime("%Y_%m_%d__%H_%M_%S")
r = os.path.join(t, ts)
os.mkdir(r)
return r
def get_current_test_dir(self):
r = self.get_test_root()
dirs = list(filter(lambda x: os.path.isdir(os.path.join(r, x)),
os.listdir(r)))
dirs = list(filter(lambda x: os.path.isdir(os.path.join(r, x)), os.listdir(r)))
dirs.sort()
return os.path.join(r, dirs[-1])
if __name__ == '__main__':
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('builddir')
parser.add_argument("builddir")
args = parser.parse_args()
td = TestDirectory(args.builddir)
td.create_new_test_dir()

View File

@ -11,24 +11,25 @@ import json
import xml.sax
import xml.sax.handler
import functools
import multiprocessing
import re
import shutil
import io
import random
_logger = None
def init_logging(loglevel, logdir):
global _logger
_logger = logging.getLogger('TestRunner')
_logger = logging.getLogger("TestRunner")
_logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(process)d - %(levelname)s - %(message)s')
formatter = logging.Formatter(
"%(asctime)s - %(process)d - %(levelname)s - %(message)s"
)
try:
os.makedirs(logdir)
except:
except OSError:
pass
fh = logging.FileHandler(os.path.join(logdir, 'run_test.log'))
fh = logging.FileHandler(os.path.join(logdir, "run_test.log"))
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
sh = logging.StreamHandler()
@ -39,69 +40,72 @@ def init_logging(loglevel, logdir):
class LogParser:
def __init__(self, basedir, name, infile, out, aggregationPolicy, symbolicateBacktraces):
def __init__(
self, basedir, name, infile, out, aggregation_policy, symbolicate_backtraces
):
self.basedir = basedir
self.name = name
self.infile = infile
self.backtraces = []
self.result = True
self.address_re = re.compile(r'(0x[0-9a-f]+\s+)+')
self.aggregationPolicy = aggregationPolicy
self.symbolicateBacktraces = symbolicateBacktraces
self.address_re = re.compile(r"(0x[0-9a-f]+\s+)+")
self.aggregationPolicy = aggregation_policy
self.symbolicateBacktraces = symbolicate_backtraces
self.outStream = None
if self.aggregationPolicy == 'NONE':
if self.aggregationPolicy == "NONE":
self.out = None
elif self.aggregationPolicy != 'ALL':
elif self.aggregationPolicy != "ALL":
self.out = io.StringIO()
self.outStream = out
else:
self.out = out
def write(self, txt):
if self.aggregationPolicy == 'NONE':
if self.aggregationPolicy == "NONE":
pass
elif not self.result or self.aggregationPolicy == 'ALL':
elif not self.result or self.aggregationPolicy == "ALL":
self.out.write(txt)
else:
self.outStream.wite(txt)
def fail(self):
self.result = False
if self.aggregationPolicy == 'FAILED':
if self.aggregationPolicy == "FAILED":
self.out.write(self.outStream.getvalue())
self.outStream = None
def writeHeader(self):
def write_header(self):
pass
def writeFooter(self):
def write_footer(self):
pass
def applyAddr2line(self, obj):
addresses = self.sanitizeBacktrace(obj)
def apply_address_to_line(self, obj):
addresses = self.sanitize_backtrace(obj)
assert addresses is not None
config = ''
binaryExt = ''
if sys.platform == 'win32':
#config = options.config
binaryExt = '.exe'
fdbbin = os.path.realpath(os.path.join(basedir, 'bin', 'Release', 'fdbserver' + binaryExt))
binary_ext = ""
if sys.platform == "win32":
# config = options.config
binary_ext = ".exe"
fdbbin = os.path.realpath(
os.path.join(basedir, "bin", "Release", "fdbserver" + binary_ext)
)
try:
resolved = subprocess.check_output(
('addr2line -e %s -C -f -i' % fdbbin).split() + addresses.split()).splitlines()
("addr2line -e %s -C -f -i" % fdbbin).split() + addresses.split()
).splitlines()
tmp = dict(**obj)
for i, line in enumerate(resolved):
tmp['line%04d' % i] = line.decode('utf-8')
tmp["line%04d" % i] = line.decode("utf-8")
return tmp
except (subprocess.CalledProcessError, UnicodeDecodeError):
obj['FailedAddr2LineResolution'] = 'true'
obj["FailedAddr2LineResolution"] = "true"
return obj
def sanitizeBacktrace(self, obj):
def sanitize_backtrace(self, obj):
if sys.platform != "linux" and sys.platform != "linux2":
return None
raw_backtrace = obj.get('Backtrace', None)
raw_backtrace = obj.get("Backtrace", None)
if raw_backtrace is None:
return None
match = self.address_re.search(raw_backtrace)
@ -109,24 +113,30 @@ class LogParser:
return None
return match.group(0)
def processTraces(self):
def process_traces(self):
linenr = 0
with open(self.infile) as f:
line = f.readline()
while line != '':
while line != "":
obj = self.processLine(line, linenr)
line = f.readline()
linenr += 1
if obj is None:
continue
if 'Type' not in obj:
if "Type" not in obj:
continue
if obj['Severity'] == '40' and obj.get('ErrorIsInjectedFault', None) != '1':
if (
obj["Severity"] == "40"
and obj.get("ErrorIsInjectedFault", None) != "1"
):
self.fail()
if self.name is not None:
obj['testname'] = self.name
if self.symbolicateBacktraces and self.sanitizeBacktrace(obj) is not None:
obj = self.applyAddr2line(obj)
obj["testname"] = self.name
if (
self.symbolicateBacktraces
and self.sanitize_backtrace(obj) is not None
):
obj = self.apply_address_to_line(obj)
self.writeObject(obj)
def log_trace_parse_error(self, linenr, e):
@ -134,53 +144,56 @@ class LogParser:
_logger.error("Process trace line file {} Failed".format(self.infile))
_logger.error("Exception {} args: {}".format(type(e), e.args))
_logger.error("Line: '{}'".format(linenr))
obj['Severity'] = "warning"
obj['Type'] = "TestInfastructureLogLineGarbled"
obj['isLastLine'] = "TestFailure"
obj['TraceLine'] = linenr
obj['File'] = self.infile
obj["Severity"] = "warning"
obj["Type"] = "TestInfastructureLogLineGarbled"
obj["isLastLine"] = "TestFailure"
obj["TraceLine"] = linenr
obj["File"] = self.infile
return obj
def processReturnCodes(self, return_codes):
def process_return_codes(self, return_codes):
for (command, return_code) in return_codes.items():
return_code_trace = {}
if return_code != 0:
return_code_trace['Severity'] = '40'
return_code_trace['Type'] = 'TestFailure'
return_code_trace["Severity"] = "40"
return_code_trace["Type"] = "TestFailure"
self.fail()
else:
return_code_trace['Severity'] = '10'
return_code_trace['Type'] = 'ReturnCode'
return_code_trace['Command'] = command
return_code_trace['ReturnCode'] = return_code
return_code_trace['testname'] = self.name
return_code_trace["Severity"] = "10"
return_code_trace["Type"] = "ReturnCode"
return_code_trace["Command"] = command
return_code_trace["ReturnCode"] = return_code
return_code_trace["testname"] = self.name
self.writeObject(return_code_trace)
class JSONParser(LogParser):
def __init__(self, basedir, name, infile, out, aggregationPolicy, symbolicateBacktraces):
super().__init__(basedir, name, infile, out, aggregationPolicy, symbolicateBacktraces)
def __init__(
self, basedir, name, infile, out, aggregation_policy, symbolicate_backtraces
):
super().__init__(
basedir, name, infile, out, aggregation_policy, symbolicate_backtraces
)
def processLine(self, line, linenr):
def process_line(self, line, linenr):
try:
return json.loads(line)
except Exception as e:
self.log_trace_parse_error(linenr, e)
def writeObject(self, obj):
def write_object(self, obj):
self.write(json.dumps(obj))
self.write('\n')
self.write("\n")
class XMLParser(LogParser):
class XMLHandler(xml.sax.handler.ContentHandler):
def __init__(self):
super().__init__()
self.result = {}
def startElement(self, name, attrs):
if name != 'Event':
if name != "Event":
return
for (key, value) in attrs.items():
self.result[key] = value
@ -200,74 +213,112 @@ class XMLParser(LogParser):
def warning(self, exception):
self.warnings.append(exception)
def __init__(self, basedir, name, infile, out, aggregationPolicy, symbolicateBacktraces):
super().__init__(basedir, name, infile, out, aggregationPolicy, symbolicateBacktraces)
def __init__(
self, basedir, name, infile, out, aggregation_policy, symbolicate_backtraces
):
super().__init__(
basedir, name, infile, out, aggregation_policy, symbolicate_backtraces
)
def writeHeader(self):
def write_header(self):
self.write('<?xml version="1.0"?>\n<Trace>\n')
def writeFooter(self):
def write_footer(self):
self.write("</Trace>")
def writeObject(self, obj):
self.write('<Event')
def write_object(self, obj):
self.write("<Event")
for (key, value) in obj.items():
self.write(' {}="{}"'.format(key, value))
self.write('/>\n')
self.write("/>\n")
def processLine(self, line, linenr):
def process_line(self, line, linenr):
if linenr < 3:
# the first two lines don't need to be parsed
return None
if line.startswith('</'):
if line.startswith("</"):
# don't parse the closing element
return None
handler = XMLParser.XMLHandler()
errorHandler = XMLParser.XMLErrorHandler()
xml.sax.parseString(line.encode('utf-8'), handler, errorHandler=errorHandler)
if len(errorHandler.fatalErrors) > 0:
return self.log_trace_parse_error(linenr, errorHandler.fatalErrors[0])
error_handler = XMLParser.XMLErrorHandler()
xml.sax.parseString(line.encode("utf-8"), handler, errorHandler=error_handler)
if len(error_handler.fatalErrors) > 0:
return self.log_trace_parse_error(linenr, error_handler.fatalErrors[0])
return handler.result
def get_traces(d, log_format):
p = re.compile('^trace\\..*\\.{}$'.format(log_format))
traces = list(map(
functools.partial(os.path.join, d),
filter(
lambda f: p.match(f) is not None,
os.listdir(d))))
if os.path.isdir(os.path.join(d, 'testrunner')):
traces += list(map(
functools.partial(os.path.join, d, 'testrunner'),
filter(
lambda f: p.match(f) is not None,
os.listdir(os.path.join(d, 'testrunner')))))
p = re.compile("^trace\\..*\\.{}$".format(log_format))
traces = list(
map(
functools.partial(os.path.join, d),
filter(lambda f: p.match(f) is not None, os.listdir(d)),
)
)
if os.path.isdir(os.path.join(d, "testrunner")):
traces += list(
map(
functools.partial(os.path.join, d, "testrunner"),
filter(
lambda f: p.match(f) is not None,
os.listdir(os.path.join(d, "testrunner")),
),
)
)
return traces
def process_traces(basedir, testname, path, out, aggregationPolicy, symbolicateBacktraces, log_format, return_codes, cmake_seed):
def process_traces(
basedir,
testname,
path,
out,
aggregation_policy,
symbolicate_backtraces,
log_format,
return_codes,
cmake_seed,
):
res = True
backtraces = []
parser = None
if log_format == 'json':
parser = JSONParser(basedir, testname, None, out, aggregationPolicy, symbolicateBacktraces)
if log_format == "json":
parser = JSONParser(
basedir, testname, None, out, aggregation_policy, symbolicate_backtraces
)
else:
parser = XMLParser(basedir, testname, None, out, aggregationPolicy, symbolicateBacktraces)
parser.processReturnCodes(return_codes)
parser = XMLParser(
basedir, testname, None, out, aggregation_policy, symbolicate_backtraces
)
parser.process_return_codes(return_codes)
res = parser.result
for trace in get_traces(path, log_format):
if log_format == 'json':
parser = JSONParser(basedir, testname, trace, out, aggregationPolicy, symbolicateBacktraces)
if log_format == "json":
parser = JSONParser(
basedir,
testname,
trace,
out,
aggregation_policy,
symbolicate_backtraces,
)
else:
parser = XMLParser(basedir, testname, trace, out, aggregationPolicy, symbolicateBacktraces)
parser = XMLParser(
basedir,
testname,
trace,
out,
aggregation_policy,
symbolicate_backtraces,
)
if not res:
parser.fail()
parser.processTraces()
parser.process_traces()
res = res and parser.result
parser.writeObject({'CMakeSEED': str(cmake_seed)})
parser.write_object({"CMakeSEED": str(cmake_seed)})
return res
class RestartTestPolicy:
def __init__(self, name, old_binary, new_binary):
# Default is to use the same binary for the restart test, unless constraints are satisfied.
@ -275,26 +326,46 @@ class RestartTestPolicy:
self._second_binary = new_binary
if old_binary is None:
_logger.info("No old binary provided")
old_binary_version_raw = subprocess.check_output([old_binary, '--version']).decode('utf-8')
match = re.match('FoundationDB.*\(v([0-9]+\.[0-9]+\.[0-9]+)\)', old_binary_version_raw)
old_binary_version_raw = subprocess.check_output(
[old_binary, "--version"]
).decode("utf-8")
match = re.match(
"FoundationDB.*\(v([0-9]+\.[0-9]+\.[0-9]+)\)", old_binary_version_raw
)
assert match, old_binary_version_raw
old_binary_version = tuple(map(int, match.group(1).split('.')))
match = re.match('.*/restarting/from_([0-9]+\.[0-9]+\.[0-9]+)/', name)
if match: # upgrading _from_
lower_bound = tuple(map(int, match.group(1).split('.')))
old_binary_version = tuple(map(int, match.group(1).split(".")))
match = re.match(".*/restarting/from_([0-9]+\.[0-9]+\.[0-9]+)/", name)
if match: # upgrading _from_
lower_bound = tuple(map(int, match.group(1).split(".")))
if old_binary_version >= lower_bound:
self._first_binary = old_binary
_logger.info("Using old binary as first binary: {} >= {}".format(old_binary_version, lower_bound))
_logger.info(
"Using old binary as first binary: {} >= {}".format(
old_binary_version, lower_bound
)
)
else:
_logger.info("Using new binary as first binary: {} < {}".format(old_binary_version, lower_bound))
match = re.match('.*/restarting/to_([0-9]+\.[0-9]+\.[0-9]+)/', name)
if match: # downgrading _to_
lower_bound = tuple(map(int, match.group(1).split('.')))
_logger.info(
"Using new binary as first binary: {} < {}".format(
old_binary_version, lower_bound
)
)
match = re.match(".*/restarting/to_([0-9]+\.[0-9]+\.[0-9]+)/", name)
if match: # downgrading _to_
lower_bound = tuple(map(int, match.group(1).split(".")))
if old_binary_version >= lower_bound:
self._second_binary = old_binary
_logger.info("Using old binary as second binary: {} >= {}".format(old_binary_version, lower_bound))
_logger.info(
"Using old binary as second binary: {} >= {}".format(
old_binary_version, lower_bound
)
)
else:
_logger.info("Using new binary as second binary: {} < {}".format(old_binary_version, lower_bound))
_logger.info(
"Using new binary as second binary: {} < {}".format(
old_binary_version, lower_bound
)
)
def first_binary(self):
return self._first_binary
@ -302,50 +373,53 @@ class RestartTestPolicy:
def second_binary(self):
return self._second_binary
def run_simulation_test(basedir, options):
config = ''
binaryExt = ''
if sys.platform == 'win32':
config = ""
binary_ext = ""
if sys.platform == "win32":
config = options.config
binaryExt = '.exe'
fdbserver = os.path.realpath(os.path.join(basedir, 'bin', config, 'fdbserver' + binaryExt))
pargs = [fdbserver,
'-r', options.testtype]
binary_ext = ".exe"
fdbserver = os.path.realpath(
os.path.join(basedir, "bin", config, "fdbserver" + binary_ext)
)
pargs = [fdbserver, "-r", options.testtype]
seed = 0
if options.seed is not None:
pargs.append('-s')
pargs.append("-s")
seed = int(options.seed, 0)
if options.test_number:
idx = int(options.test_number)
seed = ((seed + idx) % (2**32-2)) + 1
seed = ((seed + idx) % (2 ** 32 - 2)) + 1
pargs.append("{}".format(seed))
if options.testtype == 'test':
pargs.append('-C')
pargs.append(os.path.join(args.builddir, 'fdb.cluster'))
if options.testtype == "test":
pargs.append("-C")
pargs.append(os.path.join(args.builddir, "fdb.cluster"))
td = TestDirectory(basedir)
if options.buggify:
pargs.append('-b')
pargs.append('on')
pargs.append("-b")
pargs.append("on")
if options.crash:
pargs.append('--crash')
pargs.append("--crash")
# Use old style argument with underscores because old binaries don't support hyphens
pargs.append('--trace_format')
pargs.append("--trace_format")
pargs.append(options.log_format)
test_dir = td.get_current_test_dir()
if options.seed is not None:
seed = int(options.seed, 0)
if options.test_number:
idx = int(options.test_number)
seed = ((seed + idx) % (2**32-2)) + 1
wd = os.path.join(test_dir,
'test_{}'.format(options.name.replace('/', '_')))
seed = ((seed + idx) % (2 ** 32 - 2)) + 1
wd = os.path.join(test_dir, "test_{}".format(options.name.replace("/", "_")))
os.mkdir(wd)
return_codes = {} # {command: return_code}
return_codes = {} # {command: return_code}
first = True
restart_test_policy = None
if len(options.testfile) > 1:
restart_test_policy = RestartTestPolicy(options.testfile[0], options.old_binary, fdbserver)
restart_test_policy = RestartTestPolicy(
options.testfile[0], options.old_binary, fdbserver
)
for testfile in options.testfile:
tmp = list(pargs)
valgrind_args = []
@ -356,54 +430,67 @@ def run_simulation_test(basedir, options):
tmp[0] = restart_test_policy.second_binary()
# old_binary is not under test, so don't run under valgrind
if options.use_valgrind and tmp[0] == fdbserver:
valgrind_args = ['valgrind', '--error-exitcode=99', '--']
valgrind_args = ["valgrind", "--error-exitcode=99", "--"]
if not first:
tmp.append('-R')
tmp.append("-R")
if seed is not None:
seed = ((seed + 1) % (2**32-2))
seed = (seed + 1) % (2 ** 32 - 2)
first = False
if seed is not None:
tmp.append('-s')
tmp.append("-s")
tmp.append("{}".format(seed))
tmp.append('-f')
tmp.append("-f")
tmp.append(testfile)
tmp = valgrind_args + tmp
command = ' '.join(tmp)
command = " ".join(tmp)
_logger.info("COMMAND: {}".format(command))
proc = subprocess.Popen(tmp,
stdout=sys.stdout,
stderr=sys.stderr,
cwd=wd)
proc = subprocess.Popen(tmp, stdout=sys.stdout, stderr=sys.stderr, cwd=wd)
proc.wait()
return_codes[command] = proc.returncode
outfile = os.path.join(test_dir, 'traces.{}'.format(options.log_format))
outfile = os.path.join(test_dir, "traces.{}".format(options.log_format))
res = True
if options.aggregate_traces == 'NONE':
res = process_traces(basedir, options.name,
wd, None, 'NONE', options.symbolicate,
options.log_format, return_codes, options.seed)
if options.aggregate_traces == "NONE":
res = process_traces(
basedir,
options.name,
wd,
None,
"NONE",
options.symbolicate,
options.log_format,
return_codes,
options.seed,
)
else:
with open(outfile, 'a') as f:
with open(outfile, "a") as f:
os.lockf(f.fileno(), os.F_LOCK, 0)
pos = f.tell()
res = process_traces(basedir, options.name,
wd, f, options.aggregate_traces, options.symbolicate,
options.log_format, return_codes, options.seed)
res = process_traces(
basedir,
options.name,
wd,
f,
options.aggregate_traces,
options.symbolicate,
options.log_format,
return_codes,
options.seed,
)
f.seek(pos)
os.lockf(f.fileno(), os.F_ULOCK, 0)
if proc.returncode != 0 or res == False:
if proc.returncode != 0 or not res:
break
if options.keep_logs == 'NONE' or options.keep_logs == 'FAILED' and res:
if options.keep_logs == "NONE" or options.keep_logs == "FAILED" and res:
print("Deleting old logs in {}".format(wd))
traces = get_traces(wd, options.log_format)
for trace in traces:
os.remove(trace)
if options.keep_simdirs == 'NONE' or options.keep_simdirs == 'FAILED' and res:
print("Delete {}".format(os.path.join(wd, 'simfdb')))
if options.keep_simdirs == "NONE" or options.keep_simdirs == "FAILED" and res:
print("Delete {}".format(os.path.join(wd, "simfdb")))
# Don't fail if the directory doesn't exist.
try:
shutil.rmtree(os.path.join(wd, 'simfdb'))
shutil.rmtree(os.path.join(wd, "simfdb"))
except FileNotFoundError:
pass
if len(os.listdir(wd)) == 0:
@ -412,47 +499,83 @@ def run_simulation_test(basedir, options):
return res and proc.returncode == 0
if __name__ == '__main__':
testtypes = ['simulation', 'test']
parser = ArgumentParser(description='Run a test preprocess trace')
parser.add_argument('-b', '--builddir', help='Path to build directory')
parser.add_argument('-s', '--sourcedir', help='Path to source directory')
parser.add_argument('-n', '--name', help='Name of the test')
parser.add_argument('-t', '--testtype', choices=testtypes,
default='simulation',
help='The type of test to run, choices are [{}]'.format(
', '.join(testtypes))),
parser.add_argument('-B', '--buggify', action='store_true',
help='Enable buggify')
parser.add_argument('--logdir', default='logs',
help='Directory for logs')
parser.add_argument('-l', '--loglevel',
choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO',
'DEBUG'],
default='INFO')
parser.add_argument('-x', '--seed', required=False, default=None,
help='The seed to use for this test')
parser.add_argument('-N', '--test-number', required=False, default=None,
help='A unique number for this test (for seed generation)')
parser.add_argument('-F', '--log-format', required=False, default='xml',
choices=['xml', 'json'], help='Log format (json or xml)')
parser.add_argument('-O', '--old-binary', required=False, default=None,
help='Path to the old binary to use for upgrade tests')
parser.add_argument('-S', '--symbolicate', action='store_true', default=False,
help='Symbolicate backtraces in trace events')
parser.add_argument('--config', default=None,
help='Configuration type to test')
parser.add_argument('--crash', action='store_true', default=False,
help='Test ASSERT failures should crash the test')
parser.add_argument('--aggregate-traces', default='NONE',
choices=['NONE', 'FAILED', 'ALL'])
parser.add_argument('--keep-logs', default='FAILED',
choices=['NONE', 'FAILED', 'ALL'])
parser.add_argument('--keep-simdirs', default='NONE',
choices=['NONE', 'FAILED', 'ALL'])
parser.add_argument('testfile', nargs="+", help='The tests to run')
parser.add_argument('--use-valgrind', action='store_true', default=False,
help='Run under valgrind')
if __name__ == "__main__":
testtypes = ["simulation", "test"]
parser = ArgumentParser(description="Run a test preprocess trace")
parser.add_argument("-b", "--builddir", help="Path to build directory")
parser.add_argument("-s", "--sourcedir", help="Path to source directory")
parser.add_argument("-n", "--name", help="Name of the test")
parser.add_argument(
"-t",
"--testtype",
choices=testtypes,
default="simulation",
help="The type of test to run, choices are [{}]".format(", ".join(testtypes)),
),
parser.add_argument("-B", "--buggify", action="store_true", help="Enable buggify")
parser.add_argument("--logdir", default="logs", help="Directory for logs")
parser.add_argument(
"-l",
"--loglevel",
choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"],
default="INFO",
)
parser.add_argument(
"-x",
"--seed",
required=False,
default=None,
help="The seed to use for this test",
)
parser.add_argument(
"-N",
"--test-number",
required=False,
default=None,
help="A unique number for this test (for seed generation)",
)
parser.add_argument(
"-F",
"--log-format",
required=False,
default="xml",
choices=["xml", "json"],
help="Log format (json or xml)",
)
parser.add_argument(
"-O",
"--old-binary",
required=False,
default=None,
help="Path to the old binary to use for upgrade tests",
)
parser.add_argument(
"-S",
"--symbolicate",
action="store_true",
default=False,
help="Symbolicate backtraces in trace events",
)
parser.add_argument("--config", default=None, help="Configuration type to test")
parser.add_argument(
"--crash",
action="store_true",
default=False,
help="Test ASSERT failures should crash the test",
)
parser.add_argument(
"--aggregate-traces", default="NONE", choices=["NONE", "FAILED", "ALL"]
)
parser.add_argument(
"--keep-logs", default="FAILED", choices=["NONE", "FAILED", "ALL"]
)
parser.add_argument(
"--keep-simdirs", default="NONE", choices=["NONE", "FAILED", "ALL"]
)
parser.add_argument("testfile", nargs="+", help="The tests to run")
parser.add_argument(
"--use-valgrind", action="store_true", default=False, help="Run under valgrind"
)
args = parser.parse_args()
init_logging(args.loglevel, args.logdir)
basedir = os.getcwd()

View File

@ -14,14 +14,13 @@ class ClusterFileGenerator:
self.output_dir = Path(output_dir).resolve()
assert self.output_dir.exists(), "{} does not exist".format(output_dir)
assert self.output_dir.is_dir(), "{} is not a directory".format(output_dir)
self.tmp_dir = self.output_dir.joinpath(
'tmp', random_secret_string(16))
self.tmp_dir = self.output_dir.joinpath("tmp", random_secret_string(16))
self.tmp_dir.mkdir(parents=True)
self.cluster_file_path = self.tmp_dir.joinpath('fdb.cluster')
self.cluster_file_path = self.tmp_dir.joinpath("fdb.cluster")
def __enter__(self):
with open(self.cluster_file_path, 'x') as f:
f.write('foo:bar@1.1.1.1:5678\n')
with open(self.cluster_file_path, "x") as f:
f.write("foo:bar@1.1.1.1:5678\n")
return self
@ -32,9 +31,10 @@ class ClusterFileGenerator:
shutil.rmtree(self.tmp_dir)
if __name__ == '__main__':
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
description="""
if __name__ == "__main__":
parser = ArgumentParser(
formatter_class=RawDescriptionHelpFormatter,
description="""
This script generates a cluster file that can be used to run a test and that will
be cleaned up when the test is over. The cluster file will not correspond to a real
cluster.
@ -44,11 +44,16 @@ if __name__ == '__main__':
The environment variable FDB_CLUSTER_FILE is set to the generated cluster file for the command if
it is not set already.
""")
parser.add_argument('--output-dir', '-o', metavar='OUTPUT_DIRECTORY',
help='Directory where output files are written', required=True)
parser.add_argument('cmd', metavar="COMMAND",
nargs="+", help="The command to run")
""",
)
parser.add_argument(
"--output-dir",
"-o",
metavar="OUTPUT_DIRECTORY",
help="Directory where output files are written",
required=True,
)
parser.add_argument("cmd", metavar="COMMAND", nargs="+", help="The command to run")
args = parser.parse_args()
errcode = 1
@ -56,15 +61,17 @@ if __name__ == '__main__':
print("cluster-file: {}".format(generator.cluster_file_path))
cmd_args = []
for cmd in args.cmd:
if cmd == '@CLUSTER_FILE@':
if cmd == "@CLUSTER_FILE@":
cmd_args.append(str(generator.cluster_file_path))
else:
cmd_args.append(cmd)
env = dict(**os.environ)
env['FDB_CLUSTER_FILE'] = env.get(
'FDB_CLUSTER_FILE', generator.cluster_file_path)
env["FDB_CLUSTER_FILE"] = env.get(
"FDB_CLUSTER_FILE", generator.cluster_file_path
)
errcode = subprocess.run(
cmd_args, stdout=sys.stdout, stderr=sys.stderr, env=env).returncode
cmd_args, stdout=sys.stdout, stderr=sys.stderr, env=env
).returncode
sys.exit(errcode)

View File

@ -10,7 +10,7 @@ import time
def _get_free_port_internal():
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(('0.0.0.0', 0))
s.bind(("0.0.0.0", 0))
return s.getsockname()[1]
@ -28,15 +28,16 @@ def get_free_port():
def is_port_in_use(port):
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(('localhost', port)) == 0
return s.connect_ex(("localhost", port)) == 0
valid_letters_for_secret = string.ascii_letters + string.digits
def random_secret_string(len):
return ''.join(random.choice(valid_letters_for_secret) for i in range(len))
def random_secret_string(length):
return "".join(random.choice(valid_letters_for_secret) for _ in range(length))
class LocalCluster:
@ -82,14 +83,24 @@ logdir = {logdir}
## Parameters set here override defaults from the [fdbserver] section
"""
def __init__(self, basedir: str, fdbserver_binary: str, fdbmonitor_binary: str, fdbcli_binary: str,
process_number: int, create_config=True, port=None, ip_address=None, blob_granules_enabled: bool=False):
def __init__(
self,
basedir: str,
fdbserver_binary: str,
fdbmonitor_binary: str,
fdbcli_binary: str,
process_number: int,
create_config=True,
port=None,
ip_address=None,
blob_granules_enabled: bool = False,
):
self.basedir = Path(basedir)
self.etc = self.basedir.joinpath('etc')
self.log = self.basedir.joinpath('log')
self.data = self.basedir.joinpath('data')
self.conf_file = self.etc.joinpath('foundationdb.conf')
self.cluster_file = self.etc.joinpath('fdb.cluster')
self.etc = self.basedir.joinpath("etc")
self.log = self.basedir.joinpath("log")
self.data = self.basedir.joinpath("data")
self.conf_file = self.etc.joinpath("foundationdb.conf")
self.cluster_file = self.etc.joinpath("fdb.cluster")
self.fdbserver_binary = Path(fdbserver_binary)
self.fdbmonitor_binary = Path(fdbmonitor_binary)
self.fdbcli_binary = Path(fdbcli_binary)
@ -99,17 +110,16 @@ logdir = {logdir}
self.log.mkdir(exist_ok=True)
self.data.mkdir(exist_ok=True)
self.process_number = process_number
self.ip_address = '127.0.0.1' if ip_address is None else ip_address
self.ip_address = "127.0.0.1" if ip_address is None else ip_address
self.first_port = port
self.blob_granules_enabled = blob_granules_enabled
if (blob_granules_enabled):
if blob_granules_enabled:
# add extra process for blob_worker
self.process_number += 1
if (self.first_port is not None):
self.last_used_port = int(self.first_port)-1
self.server_ports = [self.__next_port()
for _ in range(self.process_number)]
if self.first_port is not None:
self.last_used_port = int(self.first_port) - 1
self.server_ports = [self.__next_port() for _ in range(self.process_number)]
self.cluster_desc = random_secret_string(8)
self.cluster_secret = random_secret_string(8)
self.env_vars = {}
@ -117,71 +127,79 @@ logdir = {logdir}
self.process = None
self.fdbmonitor_logfile = None
self.use_legacy_conf_syntax = False
if create_config:
self.create_cluster_file()
self.save_config()
def __next_port(self):
if (self.first_port is None):
if self.first_port is None:
return get_free_port()
else:
self.last_used_port += 1
return self.last_used_port
def save_config(self):
new_conf_file = self.conf_file.parent / (self.conf_file.name + '.new')
with open(new_conf_file, 'x') as f:
new_conf_file = self.conf_file.parent / (self.conf_file.name + ".new")
with open(new_conf_file, "x") as f:
conf_template = LocalCluster.configuration_template
bg_knob_line = ""
if (self.use_legacy_conf_syntax):
if self.use_legacy_conf_syntax:
conf_template = conf_template.replace("-", "_")
if (self.blob_granules_enabled):
if self.blob_granules_enabled:
bg_knob_line = "knob_bg_url=file://" + str(self.data) + "/fdbblob/"
f.write(conf_template.format(
etcdir=self.etc,
fdbserver_bin=self.fdbserver_binary,
datadir=self.data,
logdir=self.log,
ip_address=self.ip_address,
bg_knob_line=bg_knob_line
))
f.write(
conf_template.format(
etcdir=self.etc,
fdbserver_bin=self.fdbserver_binary,
datadir=self.data,
logdir=self.log,
ip_address=self.ip_address,
bg_knob_line=bg_knob_line,
)
)
# By default, the cluster only has one process
# If a port number is given and process_number > 1, we will use subsequent numbers
# E.g., port = 4000, process_number = 5
# Then 4000,4001,4002,4003,4004 will be used as ports
# If port number is not given, we will randomly pick free ports
for port in self.server_ports:
f.write('[fdbserver.{server_port}]\n'.format(
server_port=port))
if (self.blob_granules_enabled):
f.write("[fdbserver.{server_port}]\n".format(server_port=port))
if self.blob_granules_enabled:
# make last process a blob_worker class
f.write('class = blob_worker')
f.write("class = blob_worker")
f.flush()
os.fsync(f.fileno())
os.replace(new_conf_file, self.conf_file)
def create_cluster_file(self):
with open(self.cluster_file, 'x') as f:
f.write('{desc}:{secret}@{ip_addr}:{server_port}'.format(
desc=self.cluster_desc,
secret=self.cluster_secret,
ip_addr=self.ip_address,
server_port=self.server_ports[0]
))
with open(self.cluster_file, "x") as f:
f.write(
"{desc}:{secret}@{ip_addr}:{server_port}".format(
desc=self.cluster_desc,
secret=self.cluster_secret,
ip_addr=self.ip_address,
server_port=self.server_ports[0],
)
)
def start_cluster(self):
assert not self.running, "Can't start a server that is already running"
args = [str(self.fdbmonitor_binary),
'--conffile',
str(self.etc.joinpath('foundationdb.conf')),
'--lockfile',
str(self.etc.joinpath('fdbmonitor.lock'))]
self.fdbmonitor_logfile = open(
self.log.joinpath('fdbmonitor.log'), 'w')
args = [
str(self.fdbmonitor_binary),
"--conffile",
str(self.etc.joinpath("foundationdb.conf")),
"--lockfile",
str(self.etc.joinpath("fdbmonitor.lock")),
]
self.fdbmonitor_logfile = open(self.log.joinpath("fdbmonitor.log"), "w")
self.process = subprocess.Popen(
args, stdout=self.fdbmonitor_logfile, stderr=self.fdbmonitor_logfile, env=self.process_env())
args,
stdout=self.fdbmonitor_logfile,
stderr=self.fdbmonitor_logfile,
env=self.process_env(),
)
self.running = True
def stop_cluster(self):
@ -192,7 +210,7 @@ logdir = {logdir}
def ensure_ports_released(self, timeout_sec=5):
sec = 0
while (sec < timeout_sec):
while sec < timeout_sec:
in_use = False
for port in self.server_ports:
if is_port_in_use(port):
@ -212,32 +230,36 @@ logdir = {logdir}
def __exit__(self, xc_type, exc_value, traceback):
self.stop_cluster()
def create_database(self, storage='ssd', enable_tenants=True):
db_config = 'configure new single {}'.format(storage)
if (enable_tenants):
def create_database(self, storage="ssd", enable_tenants=True):
db_config = "configure new single {}".format(storage)
if enable_tenants:
db_config += " tenant_mode=optional_experimental"
if (self.blob_granules_enabled):
if self.blob_granules_enabled:
db_config += " blob_granules_enabled:=1"
args = [self.fdbcli_binary, '-C',
self.cluster_file, '--exec', db_config]
args = [self.fdbcli_binary, "-C", self.cluster_file, "--exec", db_config]
res = subprocess.run(args, env=self.process_env())
assert res.returncode == 0, "Create database failed with {}".format(
res.returncode)
res.returncode
)
if (self.blob_granules_enabled):
bg_args = [self.fdbcli_binary, '-C',
self.cluster_file, '--exec', 'blobrange start \\x00 \\xff']
if self.blob_granules_enabled:
bg_args = [
self.fdbcli_binary,
"-C",
self.cluster_file,
"--exec",
"blobrange start \\x00 \\xff",
]
bg_res = subprocess.run(bg_args, env=self.process_env())
assert bg_res.returncode == 0, "Start blob granules failed with {}".format(bg_res.returncode)
assert bg_res.returncode == 0, "Start blob granules failed with {}".format(
bg_res.returncode
)
def get_status(self):
args = [self.fdbcli_binary, '-C', self.cluster_file, '--exec',
'status json']
res = subprocess.run(args, env=self.process_env(),
stdout=subprocess.PIPE)
assert res.returncode == 0, "Get status failed with {}".format(
res.returncode)
args = [self.fdbcli_binary, "-C", self.cluster_file, "--exec", "status json"]
res = subprocess.run(args, env=self.process_env(), stdout=subprocess.PIPE)
assert res.returncode == 0, "Get status failed with {}".format(res.returncode)
return json.loads(res.stdout)
def process_env(self):

View File

@ -11,14 +11,17 @@ from pathlib import Path
class TempCluster:
def __init__(self, build_dir: str, process_number: int = 1, port: str = None, blob_granules_enabled: bool = False):
def __init__(
self,
build_dir: str,
process_number: int = 1,
port: str = None,
blob_granules_enabled: bool = False,
):
self.build_dir = Path(build_dir).resolve()
assert self.build_dir.exists(), "{} does not exist".format(build_dir)
assert self.build_dir.is_dir(), "{} is not a directory".format(build_dir)
tmp_dir = self.build_dir.joinpath(
"tmp",
random_secret_string(16)
)
tmp_dir = self.build_dir.joinpath("tmp", random_secret_string(16))
tmp_dir.mkdir(parents=True)
self.cluster = LocalCluster(
tmp_dir,
@ -27,7 +30,7 @@ class TempCluster:
self.build_dir.joinpath("bin", "fdbcli"),
process_number,
port=port,
blob_granules_enabled=blob_granules_enabled
blob_granules_enabled=blob_granules_enabled,
)
self.log = self.cluster.log
self.etc = self.cluster.etc
@ -75,8 +78,7 @@ if __name__ == "__main__":
help="FDB build directory",
required=True,
)
parser.add_argument("cmd", metavar="COMMAND",
nargs="+", help="The command to run")
parser.add_argument("cmd", metavar="COMMAND", nargs="+", help="The command to run")
parser.add_argument(
"--process-number",
"-p",
@ -85,18 +87,20 @@ if __name__ == "__main__":
default=1,
)
parser.add_argument(
'--disable-log-dump',
help='Do not dump cluster log on error',
action="store_true"
"--disable-log-dump",
help="Do not dump cluster log on error",
action="store_true",
)
parser.add_argument(
'--blob-granules-enabled',
help='Enable blob granules',
action="store_true"
"--blob-granules-enabled", help="Enable blob granules", action="store_true"
)
args = parser.parse_args()
errcode = 1
with TempCluster(args.build_dir, args.process_number, blob_granules_enabled=args.blob_granules_enabled) as cluster:
with TempCluster(
args.build_dir,
args.process_number,
blob_granules_enabled=args.blob_granules_enabled,
) as cluster:
print("log-dir: {}".format(cluster.log))
print("etc-dir: {}".format(cluster.etc))
print("data-dir: {}".format(cluster.data))
@ -114,7 +118,7 @@ if __name__ == "__main__":
elif cmd == "@TMP_DIR@":
cmd_args.append(str(cluster.tmp_dir))
elif cmd.startswith("@DATA_DIR@"):
cmd_args.append(str(cluster.data) + cmd[len("@DATA_DIR@"):])
cmd_args.append(str(cluster.data) + cmd[len("@DATA_DIR@") :])
else:
cmd_args.append(cmd)
env = dict(**os.environ)
@ -134,9 +138,11 @@ if __name__ == "__main__":
)
for line in sev40s:
# When running ASAN we expect to see this message. Boost coroutine should be using the correct asan annotations so that it shouldn't produce any false positives.
# When running ASAN we expect to see this message. Boost coroutine should be using the correct asan
# annotations so that it shouldn't produce any false positives.
if line.endswith(
"WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false positives in some cases!"
"WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false "
"positives in some cases! "
):
continue
print(">>>>>>>>>>>>>>>>>>>> Found severity 40 events - the test fails")

View File

@ -23,53 +23,70 @@
import os
import subprocess
import sys
import shutil
from pathlib import Path
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from tmp_cluster import TempCluster
if __name__ == '__main__':
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,description="""
if __name__ == "__main__":
parser = ArgumentParser(
formatter_class=RawDescriptionHelpFormatter,
description="""
This script automatically configures N temporary local clusters on the machine and then
calls a command while these clusters are running. As soon as the command returns, all
configured clusters are killed and all generated data is deleted.
The purpose of this is to support testing a set of integration tests using multiple clusters
(i.e. using the Multi-threaded client).
""")
parser.add_argument('--build-dir','-b',metavar='BUILD_DIRECTORY',help='FDB build director',required=True)
parser.add_argument('--clusters','-c',metavar='NUM_CLUSTERS',type=int,help='The number of clusters to run',required=True)
parser.add_argument('cmd', metavar='COMMAND',nargs='+',help='The command to run')
args = parser.parse_args()
errcode = 1
""",
)
parser.add_argument(
"--build-dir",
"-b",
metavar="BUILD_DIRECTORY",
help="FDB build director",
required=True,
)
parser.add_argument(
"--clusters",
"-c",
metavar="NUM_CLUSTERS",
type=int,
help="The number of clusters to run",
required=True,
)
parser.add_argument("cmd", metavar="COMMAND", nargs="+", help="The command to run")
args = parser.parse_args()
errcode = 1
#spawn all the clusters
base_dir = args.build_dir
num_clusters = args.clusters
# spawn all the clusters
base_dir = args.build_dir
num_clusters = args.clusters
build_dir=Path(base_dir)
bin_dir=build_dir.joinpath('bin')
build_dir = Path(base_dir)
bin_dir = build_dir.joinpath("bin")
clusters = []
for c in range(1,num_clusters+1):
# now start the cluster up
local_c = TempCluster(args.build_dir, port="{}501".format(c))
clusters = []
for c in range(1, num_clusters + 1):
# now start the cluster up
local_c = TempCluster(args.build_dir, port="{}501".format(c))
local_c.__enter__()
clusters.append(local_c)
# all clusters should be running now, so run the subcommand
# TODO (bfines): pass through the proper ENV commands so that the client can find everything
cluster_paths = ';'.join([str(cluster.etc.joinpath('fdb.cluster')) for cluster in clusters])
print(cluster_paths)
env = dict(**os.environ)
env['FDB_CLUSTERS'] = env.get('FDB_CLUSTERS',cluster_paths)
errcode = subprocess.run(args.cmd,stdout=sys.stdout,stderr=sys.stderr,env=env).returncode
local_c.__enter__()
clusters.append(local_c)
# shutdown all the running clusters
for tc in clusters:
tc.close()
# all clusters should be running now, so run the subcommand
# TODO (bfines): pass through the proper ENV commands so that the client can find everything
cluster_paths = ";".join(
[str(cluster.etc.joinpath("fdb.cluster")) for cluster in clusters]
)
print(cluster_paths)
env = dict(**os.environ)
env["FDB_CLUSTERS"] = env.get("FDB_CLUSTERS", cluster_paths)
errcode = subprocess.run(
args.cmd, stdout=sys.stdout, stderr=sys.stderr, env=env
).returncode
sys.exit(errcode)
# shutdown all the running clusters
for tc in clusters:
tc.close()
sys.exit(errcode)

View File

@ -18,14 +18,53 @@ import hashlib
from local_cluster import LocalCluster, random_secret_string
SUPPORTED_PLATFORMS = ["x86_64"]
SUPPORTED_VERSIONS = ["7.2.0", "7.1.1", "7.1.0", "7.0.0", "6.3.24", "6.3.23",
"6.3.22", "6.3.18", "6.3.17", "6.3.16", "6.3.15", "6.3.13", "6.3.12", "6.3.9", "6.2.30",
"6.2.29", "6.2.28", "6.2.27", "6.2.26", "6.2.25", "6.2.24", "6.2.23", "6.2.22", "6.2.21",
"6.2.20", "6.2.19", "6.2.18", "6.2.17", "6.2.16", "6.2.15", "6.2.10", "6.1.13", "6.1.12",
"6.1.11", "6.1.10", "6.0.18", "6.0.17", "6.0.16", "6.0.15", "6.0.14", "5.2.8", "5.2.7",
"5.1.7", "5.1.6"]
SUPPORTED_VERSIONS = [
"7.2.0",
"7.1.1",
"7.1.0",
"7.0.0",
"6.3.24",
"6.3.23",
"6.3.22",
"6.3.18",
"6.3.17",
"6.3.16",
"6.3.15",
"6.3.13",
"6.3.12",
"6.3.9",
"6.2.30",
"6.2.29",
"6.2.28",
"6.2.27",
"6.2.26",
"6.2.25",
"6.2.24",
"6.2.23",
"6.2.22",
"6.2.21",
"6.2.20",
"6.2.19",
"6.2.18",
"6.2.17",
"6.2.16",
"6.2.15",
"6.2.10",
"6.1.13",
"6.1.12",
"6.1.11",
"6.1.10",
"6.0.18",
"6.0.17",
"6.0.16",
"6.0.15",
"6.0.14",
"5.2.8",
"5.2.7",
"5.1.7",
"5.1.6",
]
FDB_DOWNLOAD_ROOT = "https://github.com/apple/foundationdb/releases/download/"
CURRENT_VERSION = "7.2.0"
HEALTH_CHECK_TIMEOUT_SEC = 5
@ -35,7 +74,7 @@ MAX_DOWNLOAD_ATTEMPTS = 5
RUN_WITH_GDB = False
def make_executable(path):
def make_executable_path(path):
st = os.stat(path)
os.chmod(path, st.st_mode | stat.S_IEXEC)
@ -55,57 +94,59 @@ def version_from_str(ver_str):
def api_version_from_str(ver_str):
ver_tuple = version_from_str(ver_str)
return ver_tuple[0]*100+ver_tuple[1]*10
return ver_tuple[0] * 100 + ver_tuple[1] * 10
def version_before(ver_str1, ver_str2):
return version_from_str(ver_str1) < version_from_str(ver_str2)
def random_sleep(minSec, maxSec):
timeSec = random.uniform(minSec, maxSec)
print("Sleeping for {0:.3f}s".format(timeSec))
time.sleep(timeSec)
def random_sleep(min_sec, max_sec):
time_sec = random.uniform(min_sec, max_sec)
print("Sleeping for {0:.3f}s".format(time_sec))
time.sleep(time_sec)
def compute_sha256(filename):
hash = hashlib.sha256()
with open(filename, 'rb') as f:
hash_function = hashlib.sha256()
with open(filename, "rb") as f:
while True:
data = f.read(128*1024)
data = f.read(128 * 1024)
if not data:
break
hash.update(data)
hash_function.update(data)
return hash.hexdigest()
return hash_function.hexdigest()
def read_to_str(filename):
with open(filename, 'r') as f:
with open(filename, "r") as f:
return f.read()
class UpgradeTest:
def __init__(self, build_dir: str, upgrade_path: list, process_number: int = 1, port: str = None):
def __init__(
self,
build_dir: str,
upgrade_path: list,
process_number: int = 1,
port: str = None,
):
self.build_dir = Path(build_dir).resolve()
assert self.build_dir.exists(), "{} does not exist".format(build_dir)
assert self.build_dir.is_dir(), "{} is not a directory".format(build_dir)
self.upgrade_path = upgrade_path
for version in upgrade_path:
assert version in SUPPORTED_VERSIONS, "Unsupported version {}".format(
version)
version
)
self.platform = platform.machine()
assert self.platform in SUPPORTED_PLATFORMS, "Unsupported platform {}".format(
self.platform)
self.tmp_dir = self.build_dir.joinpath(
"tmp",
random_secret_string(16)
self.platform
)
self.tmp_dir = self.build_dir.joinpath("tmp", random_secret_string(16))
self.tmp_dir.mkdir(parents=True)
self.download_dir = self.build_dir.joinpath(
"tmp",
"old_binaries"
)
self.download_dir = self.build_dir.joinpath("tmp", "old_binaries")
self.download_old_binaries()
self.create_external_lib_dir()
init_version = upgrade_path[0]
@ -116,7 +157,7 @@ class UpgradeTest:
self.binary_path(init_version, "fdbcli"),
process_number,
port=port,
create_config=False
create_config=False,
)
self.cluster.create_cluster_file()
self.configure_version(init_version)
@ -124,12 +165,19 @@ class UpgradeTest:
self.etc = self.cluster.etc
self.data = self.cluster.data
self.input_pipe_path = self.tmp_dir.joinpath(
"input.{}".format(random_secret_string(8)))
"input.{}".format(random_secret_string(8))
)
self.output_pipe_path = self.tmp_dir.joinpath(
"output.{}".format(random_secret_string(8)))
"output.{}".format(random_secret_string(8))
)
os.mkfifo(self.input_pipe_path)
os.mkfifo(self.output_pipe_path)
self.progress_event = Event()
self.api_version = None
self.tester_retcode = None
self.tester_proc = None
self.output_pipe = None
self.tester_bin = None
def binary_path(self, version, bin_name):
if version == CURRENT_VERSION:
@ -144,63 +192,68 @@ class UpgradeTest:
return self.download_dir.joinpath(version)
# Download an old binary of a given version from a remote repository
def download_old_binary(self, version, target_bin_name, remote_bin_name, makeExecutable):
def download_old_binary(
self, version, target_bin_name, remote_bin_name, make_executable
):
local_file = self.binary_path(version, target_bin_name)
if (local_file.exists()):
if local_file.exists():
return
# Download to a temporary file and then replace the target file atomically
# to avoid consistency errors in case of multiple tests are downloading the
# same file in parallel
local_file_tmp = Path("{}.{}".format(
str(local_file), random_secret_string(8)))
self.download_dir.joinpath(version).mkdir(
parents=True, exist_ok=True)
remote_file = "{}{}/{}".format(FDB_DOWNLOAD_ROOT,
version, remote_bin_name)
local_file_tmp = Path("{}.{}".format(str(local_file), random_secret_string(8)))
self.download_dir.joinpath(version).mkdir(parents=True, exist_ok=True)
remote_file = "{}{}/{}".format(FDB_DOWNLOAD_ROOT, version, remote_bin_name)
remote_sha256 = "{}.sha256".format(remote_file)
local_sha256 = Path("{}.sha256".format(local_file_tmp))
for attempt_cnt in range(MAX_DOWNLOAD_ATTEMPTS):
print("Downloading '{}' to '{}'...".format(
remote_file, local_file_tmp))
print("Downloading '{}' to '{}'...".format(remote_file, local_file_tmp))
request.urlretrieve(remote_file, local_file_tmp)
print("Downloading '{}' to '{}'...".format(
remote_sha256, local_sha256))
print("Downloading '{}' to '{}'...".format(remote_sha256, local_sha256))
request.urlretrieve(remote_sha256, local_sha256)
print("Download complete")
assert local_file_tmp.exists(), "{} does not exist".format(local_file_tmp)
assert local_sha256.exists(), "{} does not exist".format(local_sha256)
expected_checksum = read_to_str(local_sha256)
actual_checkum = compute_sha256(local_file_tmp)
if (expected_checksum == actual_checkum):
if expected_checksum == actual_checkum:
print("Checksum OK")
break
print("Checksum mismatch. Expected: {} Actual: {}".format(
expected_checksum, actual_checkum))
if attempt_cnt == MAX_DOWNLOAD_ATTEMPTS-1:
print(
"Checksum mismatch. Expected: {} Actual: {}".format(
expected_checksum, actual_checkum
)
)
if attempt_cnt == MAX_DOWNLOAD_ATTEMPTS - 1:
assert False, "Failed to download {} after {} attempts".format(
local_file_tmp, MAX_DOWNLOAD_ATTEMPTS)
local_file_tmp, MAX_DOWNLOAD_ATTEMPTS
)
os.rename(local_file_tmp, local_file)
os.remove(local_sha256)
if makeExecutable:
make_executable(local_file)
if make_executable:
make_executable_path(local_file)
# Download all old binaries required for testing the specified upgrade path
def download_old_binaries(self):
for version in self.upgrade_path:
if version == CURRENT_VERSION:
continue
self.download_old_binary(version,
"fdbserver", "fdbserver.{}".format(self.platform), True)
self.download_old_binary(version,
"fdbmonitor", "fdbmonitor.{}".format(self.platform), True)
self.download_old_binary(version,
"fdbcli", "fdbcli.{}".format(self.platform), True)
self.download_old_binary(version,
"libfdb_c.so", "libfdb_c.{}.so".format(self.platform), False)
self.download_old_binary(
version, "fdbserver", "fdbserver.{}".format(self.platform), True
)
self.download_old_binary(
version, "fdbmonitor", "fdbmonitor.{}".format(self.platform), True
)
self.download_old_binary(
version, "fdbcli", "fdbcli.{}".format(self.platform), True
)
self.download_old_binary(
version, "libfdb_c.so", "libfdb_c.{}.so".format(self.platform), False
)
# Create a directory for external client libraries for MVC and fill it
# with the libraries necessary for the specified upgrade path
@ -211,7 +264,8 @@ class UpgradeTest:
src_file_path = self.lib_dir(version).joinpath("libfdb_c.so")
assert src_file_path.exists(), "{} does not exist".format(src_file_path)
target_file_path = self.external_lib_dir.joinpath(
"libfdb_c.{}.so".format(version))
"libfdb_c.{}.so".format(version)
)
shutil.copyfile(src_file_path, target_file_path)
# Perform a health check of the cluster: Use fdbcli status command to check if the number of
@ -221,34 +275,42 @@ class UpgradeTest:
while retries < timeout_sec:
retries += 1
status = self.cluster.get_status()
if not "processes" in status["cluster"]:
if "processes" not in status["cluster"]:
print("Health check: no processes found. Retrying")
time.sleep(1)
continue
num_proc = len(status["cluster"]["processes"])
if (num_proc < self.cluster.process_number):
print("Health check: {} of {} processes found. Retrying".format(
num_proc, self.cluster.process_number))
if num_proc < self.cluster.process_number:
print(
"Health check: {} of {} processes found. Retrying".format(
num_proc, self.cluster.process_number
)
)
time.sleep(1)
continue
assert num_proc == self.cluster.process_number, "Number of processes: expected: {}, actual: {}".format(
self.cluster.process_number, num_proc)
assert (
num_proc == self.cluster.process_number
), "Number of processes: expected: {}, actual: {}".format(
self.cluster.process_number, num_proc
)
for (_, proc_stat) in status["cluster"]["processes"].items():
proc_ver = proc_stat["version"]
assert proc_ver == self.cluster_version, "Process version: expected: {}, actual: {}".format(
self.cluster_version, proc_ver)
assert (
proc_ver == self.cluster_version
), "Process version: expected: {}, actual: {}".format(
self.cluster_version, proc_ver
)
print("Health check: OK")
return
assert False, "Health check: Failed"
# Create and save a cluster configuration for the given version
def configure_version(self, version):
self.cluster.fdbmonitor_binary = self.binary_path(
version, "fdbmonitor")
self.cluster.fdbmonitor_binary = self.binary_path(version, "fdbmonitor")
self.cluster.fdbserver_binary = self.binary_path(version, "fdbserver")
self.cluster.fdbcli_binary = self.binary_path(version, "fdbcli")
self.cluster.set_env_var = "LD_LIBRARY_PATH", self.lib_dir(version)
if (version_before(version, "7.1.0")):
if version_before(version, "7.1.0"):
self.cluster.use_legacy_conf_syntax = True
self.cluster.save_config()
self.cluster_version = version
@ -272,43 +334,56 @@ class UpgradeTest:
self.cluster.stop_cluster()
shutil.rmtree(self.tmp_dir)
# Determine FDB API version matching the upgrade path
# Determine FDB API version matching the upgrade path
def determine_api_version(self):
self.api_version = api_version_from_str(CURRENT_VERSION)
for version in self.upgrade_path:
self.api_version = min(
api_version_from_str(version), self.api_version)
self.api_version = min(api_version_from_str(version), self.api_version)
# Start the tester to generate the workload specified by the test file
def exec_workload(self, test_file):
self.tester_retcode = 1
try:
self.determine_api_version()
cmd_args = [self.tester_bin,
'--cluster-file', self.cluster.cluster_file,
'--test-file', test_file,
'--external-client-dir', self.external_lib_dir,
'--disable-local-client',
'--input-pipe', self.input_pipe_path,
'--output-pipe', self.output_pipe_path,
'--api-version', str(self.api_version),
'--log',
'--log-dir', self.log,
'--tmp-dir', self.tmp_dir,
'--transaction-retry-limit', str(TRANSACTION_RETRY_LIMIT)]
if (RUN_WITH_GDB):
cmd_args = ['gdb', '-ex', 'run', '--args'] + cmd_args
print("Executing test command: {}".format(
" ".join([str(c) for c in cmd_args])))
cmd_args = [
self.tester_bin,
"--cluster-file",
self.cluster.cluster_file,
"--test-file",
test_file,
"--external-client-dir",
self.external_lib_dir,
"--disable-local-client",
"--input-pipe",
self.input_pipe_path,
"--output-pipe",
self.output_pipe_path,
"--api-version",
str(self.api_version),
"--log",
"--log-dir",
self.log,
"--tmp-dir",
self.tmp_dir,
"--transaction-retry-limit",
str(TRANSACTION_RETRY_LIMIT),
]
if RUN_WITH_GDB:
cmd_args = ["gdb", "-ex", "run", "--args"] + cmd_args
print(
"Executing test command: {}".format(
" ".join([str(c) for c in cmd_args])
)
)
self.tester_proc = subprocess.Popen(
cmd_args, stdout=sys.stdout, stderr=sys.stderr)
cmd_args, stdout=sys.stdout, stderr=sys.stderr
)
self.tester_retcode = self.tester_proc.wait()
self.tester_proc = None
if (self.tester_retcode != 0):
print("Tester failed with return code {}".format(
self.tester_retcode))
if self.tester_retcode != 0:
print("Tester failed with return code {}".format(self.tester_retcode))
except Exception:
print("Execution of test workload failed")
print(traceback.format_exc())
@ -318,24 +393,24 @@ class UpgradeTest:
def progress_check(self, ctrl_pipe):
self.progress_event.clear()
os.write(ctrl_pipe, b"CHECK\n")
self.progress_event.wait(
None if RUN_WITH_GDB else PROGRESS_CHECK_TIMEOUT_SEC)
if (self.progress_event.is_set()):
self.progress_event.wait(None if RUN_WITH_GDB else PROGRESS_CHECK_TIMEOUT_SEC)
if self.progress_event.is_set():
print("Progress check: OK")
else:
assert False, "Progress check failed after upgrade to version {}".format(
self.cluster_version)
self.cluster_version
)
# The main function of a thread for reading and processing
# the notifications received from the tester
def output_pipe_reader(self):
try:
print("Opening pipe {} for reading".format(self.output_pipe_path))
self.output_pipe = open(self.output_pipe_path, 'r')
self.output_pipe = open(self.output_pipe_path, "r")
for line in self.output_pipe:
msg = line.strip()
print("Received {}".format(msg))
if (msg == "CHECK_OK"):
if msg == "CHECK_OK":
self.progress_event.set()
self.output_pipe.close()
except Exception as e:
@ -368,7 +443,7 @@ class UpgradeTest:
print("Killing the tester process")
self.tester_proc.kill()
workload_thread.join(5)
except:
except Exception:
print("Failed to kill the tester process")
# The main method implementing the test:
@ -381,8 +456,7 @@ class UpgradeTest:
self.tester_proc = None
test_retcode = 1
try:
workload_thread = Thread(
target=self.exec_workload, args=(args.test_file,))
workload_thread = Thread(target=self.exec_workload, args=(args.test_file,))
workload_thread.start()
reader_thread = Thread(target=self.output_pipe_reader)
@ -406,8 +480,8 @@ class UpgradeTest:
return (
subprocess.getoutput(
"grep -r 'Severity=\"{}\"' {}".format(
severity,
self.cluster.log.as_posix())
severity, self.cluster.log.as_posix()
)
)
.rstrip()
.splitlines()
@ -417,8 +491,7 @@ class UpgradeTest:
def check_cluster_logs(self, error_limit=100):
sev40s = (
subprocess.getoutput(
"grep -r 'Severity=\"40\"' {}".format(
self.cluster.log.as_posix())
"grep -r 'Severity=\"40\"' {}".format(self.cluster.log.as_posix())
)
.rstrip()
.splitlines()
@ -429,16 +502,19 @@ class UpgradeTest:
# When running ASAN we expect to see this message. Boost coroutine should be using the
# correct asan annotations so that it shouldn't produce any false positives.
if line.endswith(
"WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false positives in some cases!"
"WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false "
"positives in some cases! "
):
continue
if (err_cnt < error_limit):
if err_cnt < error_limit:
print(line)
err_cnt += 1
if err_cnt > 0:
print(
">>>>>>>>>>>>>>>>>>>> Found {} severity 40 events - the test fails", err_cnt)
">>>>>>>>>>>>>>>>>>>> Found {} severity 40 events - the test fails",
err_cnt,
)
else:
print("No errors found in logs")
return err_cnt == 0
@ -447,18 +523,20 @@ class UpgradeTest:
def dump_warnings_in_logs(self, limit=100):
sev30s = (
subprocess.getoutput(
"grep -r 'Severity=\"30\"' {}".format(
self.cluster.log.as_posix())
"grep -r 'Severity=\"30\"' {}".format(self.cluster.log.as_posix())
)
.rstrip()
.splitlines()
)
if (len(sev30s) == 0):
if len(sev30s) == 0:
print("No warnings found in logs")
else:
print(">>>>>>>>>>>>>>>>>>>> Found {} severity 30 events (warnings):".format(
len(sev30s)))
print(
">>>>>>>>>>>>>>>>>>>> Found {} severity 30 events (warnings):".format(
len(sev30s)
)
)
for line in sev30s[:limit]:
print(line)
@ -492,14 +570,14 @@ if __name__ == "__main__":
required=True,
)
parser.add_argument(
'--upgrade-path',
nargs='+',
help='Cluster upgrade path: a space separated list of versions',
default=[CURRENT_VERSION]
"--upgrade-path",
nargs="+",
help="Cluster upgrade path: a space separated list of versions",
default=[CURRENT_VERSION],
)
parser.add_argument(
'--test-file',
help='A .toml file describing a test workload to be generated with fdb_c_api_tester',
"--test-file",
help="A .toml file describing a test workload to be generated with fdb_c_api_tester",
required=True,
)
parser.add_argument(
@ -510,21 +588,19 @@ if __name__ == "__main__":
default=0,
)
parser.add_argument(
'--disable-log-dump',
help='Do not dump cluster log on error',
action="store_true"
"--disable-log-dump",
help="Do not dump cluster log on error",
action="store_true",
)
parser.add_argument(
'--run-with-gdb',
help='Execute the tester binary from gdb',
action="store_true"
"--run-with-gdb", help="Execute the tester binary from gdb", action="store_true"
)
args = parser.parse_args()
if (args.process_number == 0):
if args.process_number == 0:
args.process_number = random.randint(1, 5)
print("Testing with {} processes".format(args.process_number))
if (args.run_with_gdb):
if args.run_with_gdb:
RUN_WITH_GDB = True
errcode = 1

View File

@ -24,72 +24,89 @@ import subprocess
last_command_output = None
def check(condition):
global last_command_output
assert condition, 'Command output:\n' + last_command_output
assert condition, "Command output:\n" + last_command_output
def run_command(command, args):
global last_command_output
last_command_output = subprocess.run(command + args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.decode('utf-8').strip()
last_command_output = (
subprocess.run(command + args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
.stdout.decode("utf-8")
.strip()
)
return last_command_output
def is_unknown_option(output):
return output.startswith('ERROR: unknown option')
return output.startswith("ERROR: unknown option")
def is_unknown_knob(output):
return output.startswith('ERROR: Failed to set knob option')
return output.startswith("ERROR: Failed to set knob option")
def is_cli_usage(output):
return output.startswith('FoundationDB CLI')
return output.startswith("FoundationDB CLI")
def test_fdbserver(build_dir):
command = [args.build_dir + '/bin/fdbserver', '-r', 'unittests']
command = [args.build_dir + "/bin/fdbserver", "-r", "unittests"]
check(is_unknown_option(run_command(command, ['--unknown-option'])))
check(is_unknown_option(run_command(command, ["--unknown-option"])))
check(not is_unknown_option(run_command(command, ['--cluster-file', 'foo'])))
check( not is_unknown_option(run_command(command, ['--cluster_file', 'foo'])))
check(not is_unknown_option(run_command(command, ["--cluster-file", "foo"])))
check(not is_unknown_option(run_command(command, ["--cluster_file", "foo"])))
check(is_unknown_knob(run_command(command, ['--knob-fake-knob', 'foo'])))
check(is_unknown_knob(run_command(command, ["--knob-fake-knob", "foo"])))
check(not is_unknown_knob(run_command(command, ["--knob-min-trace-severity", "5"])))
check(not is_unknown_knob(run_command(command, ["--knob-min_trace_severity", "5"])))
check(not is_unknown_knob(run_command(command, ["--knob_min_trace_severity", "5"])))
check(not is_unknown_knob(run_command(command, ["--knob_min-trace-severity", "5"])))
check(not is_unknown_knob(run_command(command, ['--knob-min-trace-severity', '5'])))
check(not is_unknown_knob(run_command(command, ['--knob-min_trace_severity', '5'])))
check(not is_unknown_knob(run_command(command, ['--knob_min_trace_severity', '5'])))
check(not is_unknown_knob(run_command(command, ['--knob_min-trace-severity', '5'])))
def test_fdbcli(build_dir):
command = [args.build_dir + '/bin/fdbcli', '--exec', 'begin']
command = [args.build_dir + "/bin/fdbcli", "--exec", "begin"]
check(is_cli_usage(run_command(command, ['--unknown-option'])))
check(is_cli_usage(run_command(command, ["--unknown-option"])))
check(not is_cli_usage(run_command(command, ['--api-version', '700'])))
check(not is_cli_usage(run_command(command, ['--api_version', '700'])))
check(not is_cli_usage(run_command(command, ["--api-version", "700"])))
check(not is_cli_usage(run_command(command, ["--api_version", "700"])))
check(is_unknown_knob(run_command(command, ['--knob-fake-knob', 'foo'])))
check(is_unknown_knob(run_command(command, ["--knob-fake-knob", "foo"])))
check(not is_unknown_knob(run_command(command, ["--knob-min-trace-severity", "5"])))
check(not is_unknown_knob(run_command(command, ["--knob-min_trace_severity", "5"])))
check(not is_unknown_knob(run_command(command, ["--knob_min_trace_severity", "5"])))
check(not is_unknown_knob(run_command(command, ["--knob_min-trace-severity", "5"])))
check(not is_unknown_knob(run_command(command, ['--knob-min-trace-severity', '5'])))
check(not is_unknown_knob(run_command(command, ['--knob-min_trace_severity', '5'])))
check(not is_unknown_knob(run_command(command, ['--knob_min_trace_severity', '5'])))
check(not is_unknown_knob(run_command(command, ['--knob_min-trace-severity', '5'])))
def test_fdbbackup(build_dir):
command = [args.build_dir + '/bin/fdbbackup', 'list']
command = [args.build_dir + "/bin/fdbbackup", "list"]
check(is_unknown_option(run_command(command, ['--unknown-option'])))
check(is_unknown_option(run_command(command, ["--unknown-option"])))
check(not is_unknown_option(run_command(command, ['--trace-format', 'foo'])))
check(not is_unknown_option(run_command(command, ['--trace_format', 'foo'])))
check(not is_unknown_option(run_command(command, ["--trace-format", "foo"])))
check(not is_unknown_option(run_command(command, ["--trace_format", "foo"])))
check(is_unknown_knob(run_command(command, ['--knob-fake-knob', 'foo'])))
check(is_unknown_knob(run_command(command, ["--knob-fake-knob", "foo"])))
check(not is_unknown_knob(run_command(command, ['--knob-min-trace-severity', '5'])))
check(not is_unknown_knob(run_command(command, ['--knob-min_trace_severity', '5'])))
check(not is_unknown_knob(run_command(command, ['--knob_min_trace_severity', '5'])))
check(not is_unknown_knob(run_command(command, ['--knob_min-trace-severity', '5'])))
check(not is_unknown_knob(run_command(command, ["--knob-min-trace-severity", "5"])))
check(not is_unknown_knob(run_command(command, ["--knob-min_trace_severity", "5"])))
check(not is_unknown_knob(run_command(command, ["--knob_min_trace_severity", "5"])))
check(not is_unknown_knob(run_command(command, ["--knob_min-trace-severity", "5"])))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="This test checks for proper command line argument parsing.")
parser.add_argument('build_dir', metavar='BUILD_DIRECTORY', help='FDB build directory')
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="This test checks for proper command line argument parsing."
)
parser.add_argument(
"build_dir", metavar="BUILD_DIRECTORY", help="FDB build directory"
)
args = parser.parse_args()
test_fdbserver(args.build_dir)

View File

@ -27,74 +27,100 @@ import argparse
class Result(object):
def __init__(self):
self.id = random.randint(0, 2**63)
self.id = random.randint(0, 2 ** 63)
self.kpis = {}
self.errors = []
def add_kpi(self, name, value, units):
self.kpis[name] = {'value': value, 'units': units}
self.kpis[name] = {"value": value, "units": units}
def add_error(self, error):
self.errors.append(error)
def save(self, dir):
file = 'pyresult-%d.json' % self.id
file = "pyresult-%d.json" % self.id
if dir:
file = os.path.join(dir, file)
with open(file, 'w') as f:
json.dump({'kpis': self.kpis, 'errors': self.errors}, f)
with open(file, "w") as f:
json.dump({"kpis": self.kpis, "errors": self.errors}, f)
class PythonTest(object):
def __init__(self):
self.result = Result()
self.args = None
def run_test(self):
pass
def multi_version_description(self):
if self.args.disable_multiversion_api:
return 'multi-version API disabled'
return "multi-version API disabled"
elif self.args.use_external_client:
if self.args.enable_callbacks_on_external_threads:
return 'external client on external thread'
return "external client on external thread"
else:
return 'external client on main thread'
return "external client on main thread"
else:
return 'local client'
return "local client"
def run(self, parser=None):
import fdb
# API version should already be set by the caller
if parser is None:
parser = argparse.ArgumentParser()
parser.add_argument('--output-directory', default='', type=str,
help='The directory to store the output JSON in. If not set, the current directory is used')
parser.add_argument('--disable-multiversion-api', action='store_true', help='Disables the multi-version client API')
parser.add_argument('--enable-callbacks-on-external-threads', action='store_true',
help='Allows callbacks to be called on threads created by the client library')
parser.add_argument('--use-external-client', action='store_true', help='Connect to the server using an external client')
parser.add_argument(
"--output-directory",
default="",
type=str,
help="The directory to store the output JSON in. If not set, the current directory is used",
)
parser.add_argument(
"--disable-multiversion-api",
action="store_true",
help="Disables the multi-version client API",
)
parser.add_argument(
"--enable-callbacks-on-external-threads",
action="store_true",
help="Allows callbacks to be called on threads created by the client library",
)
parser.add_argument(
"--use-external-client",
action="store_true",
help="Connect to the server using an external client",
)
self.args = parser.parse_args()
if self.args.disable_multiversion_api:
if self.args.enable_callbacks_on_external_threads or self.args.use_external_client:
raise Exception('Invalid multi-version API argument combination')
if (
self.args.enable_callbacks_on_external_threads
or self.args.use_external_client
):
raise Exception("Invalid multi-version API argument combination")
fdb.options.set_disable_multi_version_client_api()
if self.args.enable_callbacks_on_external_threads:
if not self.args.use_external_client:
raise Exception('Cannot enable callbacks on external threads without using external clients')
raise Exception(
"Cannot enable callbacks on external threads without using external clients"
)
fdb.options.set_callbacks_on_external_threads()
if self.args.use_external_client:
fdb.options.set_disable_local_client()
fdb.options.set_external_client_directory(os.path.join(os.path.dirname(__file__), '..', '..', 'external_libraries'))
fdb.options.set_external_client_directory(
os.path.join(
os.path.dirname(__file__), "..", "..", "external_libraries"
)
)
try:
self.run_test()
except:
except Exception:
self.result.add_error(traceback.format_exc())
self.result.save(self.args.output_directory)

View File

@ -4,11 +4,31 @@ import argparse
parser = argparse.ArgumentParser("Run multithreaded client tests")
parser.add_argument("cluster_file", nargs='+', help='List of fdb.cluster files to connect to')
parser.add_argument("--skip-so-files", default=False, action='store_true', help='Do not load .so files')
parser.add_argument("--threads", metavar="N", type=int, default=3, help='Number of threads to use. Zero implies local client')
parser.add_argument("--build-dir", metavar="DIR", default='.', help='Path to root directory of FDB build output')
parser.add_argument("--client-log-dir", metavar="DIR", default="client-logs", help="Path to write client logs to. The directory will be created if it does not exist.")
parser.add_argument(
"cluster_file", nargs="+", help="List of fdb.cluster files to connect to"
)
parser.add_argument(
"--skip-so-files", default=False, action="store_true", help="Do not load .so files"
)
parser.add_argument(
"--threads",
metavar="N",
type=int,
default=3,
help="Number of threads to use. Zero implies local client",
)
parser.add_argument(
"--build-dir",
metavar="DIR",
default=".",
help="Path to root directory of FDB build output",
)
parser.add_argument(
"--client-log-dir",
metavar="DIR",
default="client-logs",
help="Path to write client logs to. The directory will be created if it does not exist.",
)
args = parser.parse_args()
import sys
@ -25,12 +45,12 @@ import sys
## This fails (unsupported configuration):
# ../tests/loopback_cluster/run_cluster.sh . 3 '../tests/python_tests/multithreaded_client.py loopback-cluster-*/fdb.cluster --threads 2 --skip-so-files'
sys.path.append(args.build_dir + '/bindings/python')
sys.path.append(args.build_dir + "/bindings/python")
import fdb
import os
import random
import time
fdb.api_version(630)
if not os.path.exists(args.client_log_dir):
@ -41,7 +61,7 @@ fdb.options.set_knob("min_trace_severity=5")
if not args.skip_so_files:
print("Loading .so files")
fdb.options.set_external_client_directory(args.build_dir + '/lib')
fdb.options.set_external_client_directory(args.build_dir + "/lib")
if args.threads > 0:
fdb.options.set_client_threads_per_version(args.threads)
@ -55,6 +75,6 @@ for i in range(100):
key = b"test_%d" % random.randrange(0, 100000000)
val = b"value_%d" % random.randrange(0, 10000000)
db = dbs[i % len(dbs)]
print ("Writing: ", key, val, db)
print("Writing: ", key, val, db)
db[key] = val
assert (val == db[key])
assert val == db[key]

File diff suppressed because it is too large Load Diff

View File

@ -30,68 +30,72 @@ import traceback
from collections import OrderedDict
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from python_tests import PythonTest
import fdb
import fdb.tuple
fdb.api_version(400)
class PythonPerformance(PythonTest):
tests = {
'future_latency': 'Python API future throughput',
'set': 'Python API set throughput',
'clear': 'Python API clear throughput',
'clear_range': 'Python API clear_range throughput',
'parallel_get': 'Python API parallel get throughput',
'serial_get': 'Python API serial get throughput',
'get_range': 'Python API get_range throughput',
'get_key': 'Python API get_key throughput',
'get_single_key_range': 'Python API get_single_key_range throughput',
'alternating_get_set': 'Python API alternating get and set throughput',
'write_transaction': 'Python API single-key transaction throughput',
"future_latency": "Python API future throughput",
"set": "Python API set throughput",
"clear": "Python API clear throughput",
"clear_range": "Python API clear_range throughput",
"parallel_get": "Python API parallel get throughput",
"serial_get": "Python API serial get throughput",
"get_range": "Python API get_range throughput",
"get_key": "Python API get_key throughput",
"get_single_key_range": "Python API get_single_key_range throughput",
"alternating_get_set": "Python API alternating get and set throughput",
"write_transaction": "Python API single-key transaction throughput",
}
def __init__(self, key_count=1000000, key_size=16, value_size=100):
super(PythonPerformance, self).__init__()
self.key_count = key_count
self.key_size = key_size
self.value_str = ''.join(['x' for i in range(value_size)])
self.value_str = "".join(["x" for _ in range(value_size)])
# Python Performance Tests (checks if functions run and yield correct results, gets performance indicators)
def run_test(self):
try:
db = fdb.open(None, 'DB')
db = fdb.open(None, "DB")
except KeyboardInterrupt:
raise
except:
self.result.add_error(self.get_error('fdb.open failed'))
except Exception:
self.result.add_error(self.get_error("fdb.open failed"))
return
try:
self.test_performance(db)
except KeyboardInterrupt:
raise
except:
self.result.add_error(self.get_error('Failed to complete all tests'))
except Exception:
self.result.add_error(self.get_error("Failed to complete all tests"))
def random_key(self):
return self.key(random.randint(0, self.key_count - 1))
def key(self, num):
return '%0*d' % (self.key_size, num)
return "%0*d" % (self.key_size, num)
def value(self, key):
return self.value_str
def insert_data(self, db):
print 'Loading database'
print("Loading database")
del db[:]
num_keys = 100000 / (self.key_size + len(self.value_str))
trs = [db.create_transaction() for i in range(int(math.ceil(float(self.key_count) / num_keys)))]
success = [False for i in range(len(trs))]
trs = [
db.create_transaction()
for _ in range(int(math.ceil(float(self.key_count) / num_keys)))
]
success = [False for _ in range(len(trs))]
while not all(success):
futures = {}
@ -130,11 +134,13 @@ class PythonPerformance(PythonTest):
# Run each test
for test in self.args.tests_to_run:
time.sleep(5)
print('Running test %s' % test)
print("Running test %s" % test)
results = []
fxn_name = 'run_%s' % test
assert hasattr(self, fxn_name), 'Test function %s not implemented' % fxn_name
fxn_name = "run_%s" % test
assert hasattr(self, fxn_name), (
"Test function %s not implemented" % fxn_name
)
# Perform each test several times
for x in range(0, num_runs):
@ -142,13 +148,22 @@ class PythonPerformance(PythonTest):
results.append(getattr(self, fxn_name)(db))
except KeyboardInterrupt:
raise
except:
self.result.add_error(self.get_error('Performance test failed: ' + PythonPerformance.tests[test]))
except Exception:
self.result.add_error(
self.get_error(
"Performance test failed: " + PythonPerformance.tests[test]
)
)
break
if len(results) == num_runs:
median = sorted(results)[num_runs / 2]
self.result.add_kpi("%s (%s)" % (PythonPerformance.tests[test], self.multi_version_description()), int(median), 'keys/s')
self.result.add_kpi(
"%s (%s)"
% (PythonPerformance.tests[test], self.multi_version_description()),
int(median),
"keys/s",
)
@fdb.transactional
def run_future_latency(self, tr, count=100000):
@ -179,7 +194,7 @@ class PythonPerformance(PythonTest):
for i in range(count):
key = self.random_key()
del tr[key: self.key(int(key) + 1)]
del tr[key : self.key(int(key) + 1)]
return count / (time.time() - s)
@ -233,11 +248,11 @@ class PythonPerformance(PythonTest):
tr.options.set_retry_limit(5)
if count > self.key_count / 2:
keys = [self.random_key() for i in range(count)]
keys = [self.random_key() for _ in range(count)]
else:
key_set = OrderedDict()
while len(key_set) < count:
key_set[self.random_key()] = ''
key_set[self.random_key()] = ""
keys = key_set.keys()
s = time.time()
@ -254,7 +269,7 @@ class PythonPerformance(PythonTest):
b = random.randint(0, self.key_count - count)
s = time.time()
list(tr[self.key(b): self.key(b + count)])
list(tr[self.key(b) : self.key(b + count)])
return count / (time.time() - s)
@ -265,7 +280,9 @@ class PythonPerformance(PythonTest):
s = time.time()
for i in range(count):
tr.get_key(fdb.KeySelector(self.random_key(), True, random.randint(-10, 10))).wait()
tr.get_key(
fdb.KeySelector(self.random_key(), True, random.randint(-10, 10))
).wait()
return count / (time.time() - s)
@ -295,24 +312,37 @@ class PythonPerformance(PythonTest):
# Adds the stack trace to an error message
def get_error(self, message):
errorMessage = message + "\n" + traceback.format_exc()
print('%s' % errorMessage)
return errorMessage
error_message = message + "\n" + traceback.format_exc()
print("%s" % error_message)
return error_message
if __name__ == '__main__':
print("Running PythonPerformance test on Python version %d.%d.%d%s%d" %
(sys.version_info[0], sys.version_info[1], sys.version_info[2], sys.version_info[3][0], sys.version_info[4]))
if __name__ == "__main__":
print(
"Running PythonPerformance test on Python version %d.%d.%d%s%d"
% (
sys.version_info[0],
sys.version_info[1],
sys.version_info[2],
sys.version_info[3][0],
sys.version_info[4],
)
)
parser = argparse.ArgumentParser()
tests = sorted(PythonPerformance.tests.keys())
assert len(tests) > 0, 'Python performance test has no test functions'
test_string = ', '.join(tests[:-1])
assert len(tests) > 0, "Python performance test has no test functions"
test_string = ", ".join(tests[:-1])
if len(tests) > 1:
test_string += ', and '
test_string += ", and "
test_string += tests[-1]
parser.add_argument("--tests-to-run", nargs="*", help="Names of tests to run. Can be any of %s. By default, all tests are run." % test_string)
parser.add_argument(
"--tests-to-run",
nargs="*",
help="Names of tests to run. Can be any of %s. By default, all tests are run."
% test_string,
)
PythonPerformance().run(parser=parser)

View File

@ -26,21 +26,22 @@ import sys
import time
import traceback
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from python_tests import PythonTest
import fdb
fdb.api_version(400)
class RYWBenchmark(PythonTest):
tests = {
'get_single': "RYW: get single cached value throughput",
'get_many_sequential': "RYW: get sequential cached values throughput",
'get_range_basic': "RYW: get range cached values throughput",
'single_clear_get_range': "RYW: get range cached values with clears throughput",
'clear_range_get_range': "RYW: get range cached values with clear ranges throughput",
'interleaved_sets_gets': "RYW: interleaved sets and gets on a single key throughput",
"get_single": "RYW: get single cached value throughput",
"get_many_sequential": "RYW: get sequential cached values throughput",
"get_range_basic": "RYW: get range cached values throughput",
"single_clear_get_range": "RYW: get range cached values with clears throughput",
"clear_range_get_range": "RYW: get range cached values with clear ranges throughput",
"interleaved_sets_gets": "RYW: interleaved sets and gets on a single key throughput",
}
def __init__(self, key_count=10000, key_size=16):
@ -50,28 +51,28 @@ class RYWBenchmark(PythonTest):
def run_test(self):
try:
db = fdb.open(None, 'DB')
db = fdb.open(None, "DB")
except KeyboardInterrupt:
raise
except:
self.result.add_error(self.get_error('fdb.open failed'))
except Exception:
self.result.add_error(self.get_error("fdb.open failed"))
return
try:
self.test_performance(db)
except KeyboardInterrupt:
raise
except:
self.result.add_error(self.get_error('Failed to complete all tests'))
except Exception:
self.result.add_error(self.get_error("Failed to complete all tests"))
def key(self, num):
return '%0*d' % (self.key_size, num)
return "%0*d" % (self.key_size, num)
# Adds the stack trace to an error message
def get_error(self, message):
errorMessage = message + "\n" + traceback.format_exc()
print(errorMessage)
return errorMessage
error_message = message + "\n" + traceback.format_exc()
print(error_message)
return error_message
def test_performance(self, db):
tr = db.create_transaction()
@ -88,29 +89,35 @@ class RYWBenchmark(PythonTest):
for test in self.args.tests_to_run:
time.sleep(5)
print('Running test %s' % test)
print("Running test %s" % test)
results = []
fxn_name = 'run_%s' % test
assert hasattr(self, fxn_name), 'Test function %s is not implemented' % fxn_name
fxn_name = "run_%s" % test
assert hasattr(self, fxn_name), (
"Test function %s is not implemented" % fxn_name
)
for x in range(0, num_runs):
try:
results.append(getattr(self, fxn_name)(tr))
except KeyboardInterrupt:
raise
except:
self.result.add_error(self.get_error('Performance test failed: ' + RYWBenchmark.tests[test]))
except Exception:
self.result.add_error(
self.get_error(
"Performance test failed: " + RYWBenchmark.tests[test]
)
)
break
if len(results) == num_runs:
median = sorted(results)[num_runs / 2]
self.result.add_kpi(RYWBenchmark.tests[test], int(median), 'keys/s')
self.result.add_kpi(RYWBenchmark.tests[test], int(median), "keys/s")
def insert_data(self, tr):
del tr[:]
for i in range(0, 10000):
tr[self.key(i)] = 'foo'
tr[self.key(i)] = "foo"
def run_get_single(self, tr, count=10000):
start = time.time()
@ -152,26 +159,39 @@ class RYWBenchmark(PythonTest):
def run_interleaved_sets_gets(self, tr, count=10000):
start = time.time()
tr['foo'] = str(1)
tr["foo"] = str(1)
for i in range(count):
old = int(tr.get('foo').wait())
tr.set('foo', str(old + 1))
old = int(tr.get("foo").wait())
tr.set("foo", str(old + 1))
return count / (time.time() - start)
if __name__ == '__main__':
print("Running RYW Benchmark test on Python version %d.%d.%d%s%d" %
(sys.version_info[0], sys.version_info[1], sys.version_info[2], sys.version_info[3][0], sys.version_info[4]))
if __name__ == "__main__":
print(
"Running RYW Benchmark test on Python version %d.%d.%d%s%d"
% (
sys.version_info[0],
sys.version_info[1],
sys.version_info[2],
sys.version_info[3][0],
sys.version_info[4],
)
)
parser = argparse.ArgumentParser()
tests = sorted(RYWBenchmark.tests.keys())
assert len(tests) > 0, 'RYW benchmark test has no test_functions'
test_string = ', '.join(tests[:-1])
assert len(tests) > 0, "RYW benchmark test has no test_functions"
test_string = ", ".join(tests[:-1])
if len(tests) > 1:
test_string += ', and '
test_string += ", and "
test_string += tests[-1]
parser.add_argument('--tests-to-run', nargs='*', help='Names of tests to run. Can be any of %s. By default, all tests are run.' % test_string)
parser.add_argument(
"--tests-to-run",
nargs="*",
help="Names of tests to run. Can be any of %s. By default, all tests are run."
% test_string,
)
RYWBenchmark().run(parser=parser)