Merge branch 'main' of github.com:apple/foundationdb into jfu-list-tenants

This commit is contained in:
Jon Fu 2022-04-29 13:16:54 -04:00
commit d953b961b7
130 changed files with 11900 additions and 5493 deletions

View File

@ -10,3 +10,4 @@ set(SRCS
add_library(FDBLibTLS STATIC ${SRCS})
target_link_libraries(FDBLibTLS PUBLIC OpenSSL::SSL boost_target PRIVATE flow)
target_include_directories(FDBLibTLS INTERFACE OpenSSL::SSL boost_target PRIVATE flow)

View File

@ -22,6 +22,9 @@
#include "FDBLibTLS/FDBLibTLSSession.h"
#include "flow/Trace.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/bio.h>
#include <openssl/err.h>
#include <openssl/evp.h>

View File

@ -23,6 +23,9 @@
#include "flow/flow.h"
#include "flow/Trace.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/bio.h>
#include <openssl/err.h>
#include <openssl/pem.h>

View File

@ -20,6 +20,9 @@
#include "FDBLibTLS/FDBLibTLSVerify.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/objects.h>
#include <algorithm>

View File

@ -25,6 +25,9 @@
#include <string.h>
#include <boost/lexical_cast.hpp>
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/objects.h>
#include "fdbrpc/ITLSPlugin.h"

View File

@ -80,10 +80,23 @@ endif()
# The tests don't build on windows
if(NOT WIN32)
set(MAKO_SRCS
test/mako/mako.c
test/mako/mako.h
test/mako/utils.c
test/mako/utils.h)
test/mako/async.hpp
test/mako/async.cpp
test/mako/blob_granules.hpp
test/mako/blob_granules.cpp
test/mako/future.hpp
test/mako/limit.hpp
test/mako/logger.hpp
test/mako/mako.cpp
test/mako/mako.hpp
test/mako/operations.hpp
test/mako/operations.cpp
test/mako/process.hpp
test/mako/shm.hpp
test/mako/stats.hpp
test/mako/time.hpp
test/mako/utils.cpp
test/mako/utils.hpp)
add_subdirectory(test/unit/third_party)
find_package(Threads REQUIRED)
set(UNIT_TEST_SRCS
@ -98,6 +111,11 @@ if(NOT WIN32)
test/unit/fdb_api.cpp
test/unit/fdb_api.hpp)
add_library(fdb_cpp INTERFACE)
target_sources(fdb_cpp INTERFACE test/fdb_api.hpp)
target_include_directories(fdb_cpp INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/test)
target_link_libraries(fdb_cpp INTERFACE fmt::fmt)
set(API_TESTER_SRCS
test/apitester/fdb_c_api_tester.cpp
test/apitester/TesterApiWorkload.cpp
@ -179,7 +197,11 @@ endif()
# do not set RPATH for mako
set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE)
target_link_libraries(mako PRIVATE fdb_c fdbclient)
if (USE_SANITIZER)
target_link_libraries(mako PRIVATE fdb_c fdbclient fmt::fmt Threads::Threads fdb_cpp boost_asan)
else()
target_link_libraries(mako PRIVATE fdb_c fdbclient fmt::fmt Threads::Threads fdb_cpp boost_target)
endif()
if(NOT OPEN_FOR_IDE)
# Make sure that fdb_c.h is compatible with c90
@ -254,6 +276,8 @@ endif()
${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests
--tmp-dir
@TMP_DIR@
--log-dir
@LOG_DIR@
)
add_fdbclient_test(
@ -271,6 +295,10 @@ endif()
${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/blobgranuletests
--blob-granule-local-file-path
@DATA_DIR@/fdbblob/
--tmp-dir
@TMP_DIR@
--log-dir
@LOG_DIR@
)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT USE_SANITIZER)

View File

@ -37,27 +37,42 @@ private:
enum OpType { OP_INSERT, OP_CLEAR, OP_CLEAR_RANGE, OP_READ, OP_GET_RANGES, OP_LAST = OP_GET_RANGES };
std::vector<OpType> excludedOpTypes;
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
// FIXME: should still guarantee a read succeeds eventually somehow
bool seenReadSuccess = false;
void randomReadOp(TTaskFct cont) {
std::string begin = randomKeyName();
std::string end = randomKeyName();
auto results = std::make_shared<std::vector<KeyValue>>();
auto tooOld = std::make_shared<bool>(false);
if (begin > end) {
std::swap(begin, end);
}
execTransaction(
[begin, end, results](auto ctx) {
[this, begin, end, results, tooOld](auto ctx) {
ctx->tx()->setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE);
KeyValuesResult res = ctx->tx()->readBlobGranules(begin, end, ctx->getBGBasePath());
bool more;
(*results) = res.getKeyValues(&more);
ASSERT(!more);
if (res.getError() != error_code_success) {
if (res.getError() == error_code_blob_granule_transaction_too_old) {
info("BlobGranuleCorrectness::randomReadOp bg too old\n");
ASSERT(!seenReadSuccess);
*tooOld = true;
ctx->done();
} else if (res.getError() != error_code_success) {
ctx->onError(res.getError());
} else {
if (!seenReadSuccess) {
info("BlobGranuleCorrectness::randomReadOp first success\n");
}
seenReadSuccess = true;
ctx->done();
}
},
[this, begin, end, results, cont]() {
[this, begin, end, results, tooOld, cont]() {
if (!*tooOld) {
std::vector<KeyValue> expected = store.getRange(begin, end, store.size(), false);
if (results->size() != expected.size()) {
error(fmt::format("randomReadOp result size mismatch. expected: {} actual: {}",
@ -77,8 +92,8 @@ private:
ASSERT((*results)[i].key == expected[i].key);
if ((*results)[i].value != expected[i].value) {
error(
fmt::format("randomReadOp value mismatch at {}/{}. key: {} expected: {:.80} actual: {:.80}",
error(fmt::format(
"randomReadOp value mismatch at {}/{}. key: {} expected: {:.80} actual: {:.80}",
i,
results->size(),
expected[i].key,
@ -87,6 +102,7 @@ private:
}
ASSERT((*results)[i].value == expected[i].value);
}
}
schedule(cont);
});
}
@ -110,9 +126,11 @@ private:
true);
},
[this, begin, end, results, cont]() {
if (seenReadSuccess) {
ASSERT(results->size() > 0);
ASSERT(results->front().key <= begin);
ASSERT(results->back().value >= end);
}
for (int i = 0; i < results->size(); i++) {
// no empty or inverted ranges

View File

@ -20,12 +20,19 @@
#
import sys
import subprocess
import argparse
import os
from subprocess import Popen, TimeoutExpired
import logging
import signal
from pathlib import Path
import glob
import random
import string
def random_string(len):
return ''.join(random.choice(string.ascii_letters + string.digits) for i in range(len))
def get_logger():
@ -48,6 +55,14 @@ def initialize_logger_level(logging_level):
logger.setLevel(logging.ERROR)
def dump_client_logs(log_dir):
for log_file in glob.glob(os.path.join(log_dir, "*")):
print(">>>>>>>>>>>>>>>>>>>> Contents of {}:".format(log_file))
with open(log_file, "r") as f:
print(f.read())
print(">>>>>>>>>>>>>>>>>>>> End of {}:".format(log_file))
def run_tester(args, test_file):
cmd = [args.tester_binary,
"--cluster-file", args.cluster_file,
@ -56,6 +71,12 @@ def run_tester(args, test_file):
cmd += ["--external-client-library", args.external_client_library]
if args.tmp_dir is not None:
cmd += ["--tmp-dir", args.tmp_dir]
log_dir = None
if args.log_dir is not None:
log_dir = Path(args.log_dir).joinpath(random_string(8))
log_dir.mkdir(exist_ok=True)
cmd += ['--log', "--log-dir", str(log_dir)]
if args.blob_granule_local_file_path is not None:
cmd += ["--blob-granule-local-file-path",
args.blob_granule_local_file_path]
@ -63,6 +84,7 @@ def run_tester(args, test_file):
get_logger().info('\nRunning tester \'%s\'...' % ' '.join(cmd))
proc = Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
timed_out = False
ret_code = 1
try:
ret_code = proc.wait(args.timeout)
except TimeoutExpired:
@ -72,15 +94,16 @@ def run_tester(args, test_file):
raise Exception('Unable to run tester (%s)' % e)
if ret_code != 0:
if ret_code < 0:
if timed_out:
reason = 'timed out after %d seconds' % args.timeout
elif ret_code < 0:
reason = signal.Signals(-ret_code).name
else:
reason = 'exit code: %d' % ret_code
if timed_out:
reason = 'timed out after %d seconds' % args.timeout
ret_code = 1
get_logger().error('\n\'%s\' did not complete succesfully (%s)' %
(cmd[0], reason))
if (log_dir is not None):
dump_client_logs(log_dir)
get_logger().info('')
return ret_code
@ -115,6 +138,8 @@ def parse_args(argv):
help='Path to a directory with test definitions. (default: ./)')
parser.add_argument('--timeout', type=int, default=300,
help='The timeout in seconds for running each individual test. (default 300)')
parser.add_argument('--log-dir', type=str, default=None,
help='The directory for storing logs (default: None)')
parser.add_argument('--logging-level', type=str, default='INFO',
choices=['ERROR', 'WARNING', 'INFO', 'DEBUG'], help='Specifies the level of detail in the tester output (default=\'INFO\').')
parser.add_argument('--tmp-dir', type=str, default=None,

561
bindings/c/test/fdb_api.hpp Normal file
View File

@ -0,0 +1,561 @@
/*
* fdb_api.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FDB_API_HPP
#define FDB_API_HPP
#pragma once
#ifndef FDB_API_VERSION
#define FDB_API_VERSION 720
#endif
#include <cassert>
#include <cstdint>
#include <memory>
#include <stdexcept>
#include <string>
#include <string_view>
#include <fmt/format.h>
// introduce the option enums
#include <fdb_c_options.g.h>
namespace fdb {
// hide C API to discourage mixing C/C++ API
namespace native {
#include <foundationdb/fdb_c.h>
}
using ByteString = std::basic_string<uint8_t>;
using BytesRef = std::basic_string_view<uint8_t>;
using CharsRef = std::string_view;
using KeyRef = BytesRef;
using ValueRef = BytesRef;
inline uint8_t const* toBytePtr(char const* ptr) noexcept {
return reinterpret_cast<uint8_t const*>(ptr);
}
// get bytestring view from charstring: e.g. std::basic_string{_view}<char>
template <template <class...> class StringLike, class Char>
BytesRef toBytesRef(const StringLike<Char>& s) noexcept {
static_assert(sizeof(Char) == 1);
return BytesRef(reinterpret_cast<uint8_t const*>(s.data()), s.size());
}
// get charstring view from bytestring: e.g. std::basic_string{_view}<uint8_t>
template <template <class...> class StringLike, class Char>
CharsRef toCharsRef(const StringLike<Char>& s) noexcept {
static_assert(sizeof(Char) == 1);
return CharsRef(reinterpret_cast<char const*>(s.data()), s.size());
}
[[maybe_unused]] constexpr const bool OverflowCheck = false;
inline int intSize(BytesRef b) {
if constexpr (OverflowCheck) {
if (b.size() > static_cast<size_t>(std::numeric_limits<int>::max()))
throw std::overflow_error("byte strlen goes beyond int bounds");
}
return static_cast<int>(b.size());
}
class Error {
public:
using CodeType = native::fdb_error_t;
Error() noexcept : err(0) {}
explicit Error(CodeType err) noexcept : err(err) {}
char const* what() noexcept { return native::fdb_get_error(err); }
explicit operator bool() const noexcept { return err != 0; }
bool is(CodeType other) const noexcept { return err == other; }
CodeType code() const noexcept { return err; }
bool retryable() const noexcept { return native::fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err) != 0; }
private:
CodeType err;
};
/* Traits of value types held by ready futures.
Holds type and value extraction function. */
namespace future_var {
struct None {
struct Type {};
static Error extract(native::FDBFuture*, Type&) noexcept { return Error(0); }
};
struct Int64 {
using Type = int64_t;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
return Error(native::fdb_future_get_int64(f, &out));
}
};
struct Key {
using Type = std::pair<uint8_t const*, int>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_key, out_key_length] = out;
return Error(native::fdb_future_get_key(f, &out_key, &out_key_length));
}
};
struct Value {
using Type = std::tuple<bool, uint8_t const*, int>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_present, out_value, out_value_length] = out;
auto out_present_native = native::fdb_bool_t{};
auto err = native::fdb_future_get_value(f, &out_present_native, &out_value, &out_value_length);
out_present = (out_present_native != 0);
return Error(err);
}
};
struct StringArray {
using Type = std::pair<const char**, int>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_strings, out_count] = out;
return Error(native::fdb_future_get_string_array(f, &out_strings, &out_count));
}
};
struct KeyValueArray {
using Type = std::tuple<native::FDBKeyValue const*, int, bool>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_kv, out_count, out_more] = out;
auto out_more_native = native::fdb_bool_t{};
auto err = native::fdb_future_get_keyvalue_array(f, &out_kv, &out_count, &out_more_native);
out_more = (out_more_native != 0);
return Error(err);
}
};
} // namespace future_var
[[noreturn]] inline void throwError(std::string_view preamble, Error err) {
auto msg = std::string(preamble);
msg.append(err.what());
throw std::runtime_error(msg);
}
inline int maxApiVersion() {
return native::fdb_get_max_api_version();
}
inline Error selectApiVersionNothrow(int version) {
return Error(native::fdb_select_api_version(version));
}
inline void selectApiVersion(int version) {
if (auto err = selectApiVersionNothrow(version)) {
throwError(fmt::format("ERROR: fdb_select_api_version({}): ", version), err);
}
}
namespace network {
inline Error setOptionNothrow(FDBNetworkOption option, BytesRef str) noexcept {
return Error(native::fdb_network_set_option(option, str.data(), intSize(str)));
}
inline Error setOptionNothrow(FDBNetworkOption option, int64_t value) noexcept {
return Error(native::fdb_network_set_option(
option, reinterpret_cast<const uint8_t*>(&value), static_cast<int>(sizeof(value))));
}
inline void setOption(FDBNetworkOption option, BytesRef str) {
if (auto err = setOptionNothrow(option, str)) {
throwError(fmt::format("ERROR: fdb_network_set_option({}): ",
static_cast<std::underlying_type_t<FDBNetworkOption>>(option)),
err);
}
}
inline void setOption(FDBNetworkOption option, int64_t value) {
if (auto err = setOptionNothrow(option, value)) {
throwError(fmt::format("ERROR: fdb_network_set_option({}, {}): ",
static_cast<std::underlying_type_t<FDBNetworkOption>>(option),
value),
err);
}
}
inline Error setupNothrow() noexcept {
return Error(native::fdb_setup_network());
}
inline void setup() {
if (auto err = setupNothrow())
throwError("ERROR: fdb_network_setup(): ", err);
}
inline Error run() {
return Error(native::fdb_run_network());
}
inline Error stop() {
return Error(native::fdb_stop_network());
}
} // namespace network
class Transaction;
class Database;
class Result {
friend class Transaction;
std::shared_ptr<native::FDBResult> r;
Result(native::FDBResult* result) {
if (result)
r = std::shared_ptr<native::FDBResult>(result, &native::fdb_result_destroy);
}
public:
using KeyValueArray = future_var::KeyValueArray::Type;
Error getKeyValueArrayNothrow(KeyValueArray& out) const noexcept {
auto out_more_native = native::fdb_bool_t{};
auto& [out_kv, out_count, out_more] = out;
auto err_raw = native::fdb_result_get_keyvalue_array(r.get(), &out_kv, &out_count, &out_more_native);
out_more = out_more_native != 0;
return Error(err_raw);
}
KeyValueArray getKeyValueArray() const {
auto ret = KeyValueArray{};
if (auto err = getKeyValueArrayNothrow(ret))
throwError("ERROR: result_get_keyvalue_array(): ", err);
return ret;
}
};
class Future {
protected:
friend class Transaction;
std::shared_ptr<native::FDBFuture> f;
Future(native::FDBFuture* future) {
if (future)
f = std::shared_ptr<native::FDBFuture>(future, &native::fdb_future_destroy);
}
// wrap any capturing lambda as callback passable to fdb_future_set_callback().
// destroy after invocation.
template <class Fn>
static void callback(native::FDBFuture*, void* param) {
auto fp = static_cast<Fn*>(param);
try {
(*fp)();
} catch (const std::exception& e) {
fmt::print(stderr, "ERROR: Exception thrown in user callback: {}", e.what());
}
delete fp;
}
// set as callback user-defined completion handler of signature void(Future)
template <class FutureType, class UserFunc>
void then(UserFunc&& fn) {
auto cb = [fut = FutureType(*this), fn = std::forward<UserFunc>(fn)]() { fn(fut); };
using cb_type = std::decay_t<decltype(cb)>;
auto fp = new cb_type(std::move(cb));
if (auto err = Error(native::fdb_future_set_callback(f.get(), &callback<cb_type>, fp))) {
throwError("ERROR: future_set_callback: ", err);
}
}
public:
Future() noexcept : Future(nullptr) {}
Future(const Future&) noexcept = default;
Future& operator=(const Future&) noexcept = default;
bool valid() const noexcept { return f != nullptr; }
explicit operator bool() const noexcept { return valid(); }
bool ready() const noexcept {
assert(valid());
return native::fdb_future_is_ready(f.get()) != 0;
}
Error blockUntilReady() const noexcept {
assert(valid());
return Error(native::fdb_future_block_until_ready(f.get()));
}
Error error() const noexcept {
assert(valid());
return Error(native::fdb_future_get_error(f.get()));
}
void cancel() noexcept { native::fdb_future_cancel(f.get()); }
template <class VarTraits>
typename VarTraits::Type get() const {
assert(valid());
assert(!error());
auto out = typename VarTraits::Type{};
if (auto err = VarTraits::extract(f.get(), out)) {
throwError("future_get: ", err);
}
return out;
}
template <class VarTraits>
Error getNothrow(typename VarTraits::Type& var) const noexcept {
assert(valid());
assert(!error());
auto out = typename VarTraits::Type{};
return VarTraits::extract(f.get(), out);
}
template <class UserFunc>
void then(UserFunc&& fn) {
then<Future>(std::forward<UserFunc>(fn));
}
};
template <typename VarTraits>
class TypedFuture : public Future {
friend class Future;
friend class Transaction;
using SelfType = TypedFuture<VarTraits>;
using Future::Future;
// hide type-unsafe inherited functions
using Future::get;
using Future::getNothrow;
using Future::then;
TypedFuture(const Future& f) noexcept : Future(f) {}
public:
using ContainedType = typename VarTraits::Type;
Future eraseType() const noexcept { return static_cast<Future const&>(*this); }
ContainedType get() const { return get<VarTraits>(); }
Error getNothrow(ContainedType& out) const noexcept { return getNothrow<VarTraits>(out); }
template <class UserFunc>
void then(UserFunc&& fn) {
Future::then<SelfType>(std::forward<UserFunc>(fn));
}
};
struct KeySelector {
const uint8_t* key;
int keyLength;
bool orEqual;
int offset;
};
namespace key_select {
inline KeySelector firstGreaterThan(KeyRef key, int offset = 0) {
return KeySelector{ FDB_KEYSEL_FIRST_GREATER_THAN(key.data(), intSize(key)) + offset };
}
inline KeySelector firstGreaterOrEqual(KeyRef key, int offset = 0) {
return KeySelector{ FDB_KEYSEL_FIRST_GREATER_OR_EQUAL(key.data(), intSize(key)) + offset };
}
inline KeySelector lastLessThan(KeyRef key, int offset = 0) {
return KeySelector{ FDB_KEYSEL_LAST_LESS_THAN(key.data(), intSize(key)) + offset };
}
inline KeySelector lastLessOrEqual(KeyRef key, int offset = 0) {
return KeySelector{ FDB_KEYSEL_LAST_LESS_OR_EQUAL(key.data(), intSize(key)) + offset };
}
} // namespace key_select
class Transaction {
friend class Database;
std::shared_ptr<native::FDBTransaction> tr;
explicit Transaction(native::FDBTransaction* tr_raw) {
if (tr_raw)
tr = std::shared_ptr<native::FDBTransaction>(tr_raw, &native::fdb_transaction_destroy);
}
public:
Transaction() noexcept : Transaction(nullptr) {}
Transaction(const Transaction&) noexcept = default;
Transaction& operator=(const Transaction&) noexcept = default;
bool valid() const noexcept { return tr != nullptr; }
explicit operator bool() const noexcept { return valid(); }
Error setOptionNothrow(FDBTransactionOption option, int64_t value) noexcept {
return Error(native::fdb_transaction_set_option(
tr.get(), option, reinterpret_cast<const uint8_t*>(&value), static_cast<int>(sizeof(value))));
}
Error setOptionNothrow(FDBTransactionOption option, BytesRef str) noexcept {
return Error(native::fdb_transaction_set_option(tr.get(), option, str.data(), intSize(str)));
}
void setOption(FDBTransactionOption option, int64_t value) {
if (auto err = setOptionNothrow(option, value)) {
throwError(fmt::format("transaction_set_option({}, {}) returned error: ",
static_cast<std::underlying_type_t<FDBTransactionOption>>(option),
value),
err);
}
}
void setOption(FDBTransactionOption option, BytesRef str) {
if (auto err = setOptionNothrow(option, str)) {
throwError(fmt::format("transaction_set_option({}) returned error: ",
static_cast<std::underlying_type_t<FDBTransactionOption>>(option)),
err);
}
}
TypedFuture<future_var::Int64> getReadVersion() { return native::fdb_transaction_get_read_version(tr.get()); }
Error getCommittedVersionNothrow(int64_t& out) {
return Error(native::fdb_transaction_get_committed_version(tr.get(), &out));
}
int64_t getCommittedVersion() {
auto out = int64_t{};
if (auto err = getCommittedVersionNothrow(out)) {
throwError("get_committed_version: ", err);
}
return out;
}
TypedFuture<future_var::Key> getKey(KeySelector sel, bool snapshot) {
return native::fdb_transaction_get_key(tr.get(), sel.key, sel.keyLength, sel.orEqual, sel.offset, snapshot);
}
TypedFuture<future_var::Value> get(KeyRef key, bool snapshot) {
return native::fdb_transaction_get(tr.get(), key.data(), intSize(key), snapshot);
}
// Usage: tx.getRange(key_select::firstGreaterOrEqual(firstKey), key_select::lastLessThan(lastKey), ...)
// gets key-value pairs in key range [begin, end)
TypedFuture<future_var::KeyValueArray> getRange(KeySelector first,
KeySelector last,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
bool snapshot,
bool reverse) {
return native::fdb_transaction_get_range(tr.get(),
first.key,
first.keyLength,
first.orEqual,
first.offset,
last.key,
last.keyLength,
last.orEqual,
last.offset,
limit,
target_bytes,
mode,
iteration,
snapshot,
reverse);
}
Result readBlobGranules(KeyRef begin,
KeyRef end,
int64_t begin_version,
int64_t read_version,
native::FDBReadBlobGranuleContext context) {
return Result(native::fdb_transaction_read_blob_granules(
tr.get(), begin.data(), intSize(begin), end.data(), intSize(end), begin_version, read_version, context));
}
TypedFuture<future_var::None> commit() { return native::fdb_transaction_commit(tr.get()); }
TypedFuture<future_var::None> onError(Error err) { return native::fdb_transaction_on_error(tr.get(), err.code()); }
void reset() { return native::fdb_transaction_reset(tr.get()); }
void set(KeyRef key, ValueRef value) {
native::fdb_transaction_set(tr.get(), key.data(), intSize(key), value.data(), intSize(value));
}
void clear(KeyRef key) { native::fdb_transaction_clear(tr.get(), key.data(), intSize(key)); }
void clearRange(KeyRef begin, KeyRef end) {
native::fdb_transaction_clear_range(tr.get(), begin.data(), intSize(begin), end.data(), intSize(end));
}
};
class Database {
std::shared_ptr<native::FDBDatabase> db;
public:
Database(const Database&) noexcept = default;
Database& operator=(const Database&) noexcept = default;
Database(const std::string& cluster_file_path) : db(nullptr) {
auto db_raw = static_cast<native::FDBDatabase*>(nullptr);
if (auto err = Error(native::fdb_create_database(cluster_file_path.c_str(), &db_raw)))
throwError(fmt::format("Failed to create database with '{}': ", cluster_file_path), err);
db = std::shared_ptr<native::FDBDatabase>(db_raw, &native::fdb_database_destroy);
}
Database() noexcept : db(nullptr) {}
Error setOptionNothrow(FDBDatabaseOption option, int64_t value) noexcept {
return Error(native::fdb_database_set_option(
db.get(), option, reinterpret_cast<const uint8_t*>(&value), static_cast<int>(sizeof(value))));
}
Error setOptionNothrow(FDBDatabaseOption option, BytesRef str) noexcept {
return Error(native::fdb_database_set_option(db.get(), option, str.data(), intSize(str)));
}
void setOption(FDBDatabaseOption option, int64_t value) {
if (auto err = setOptionNothrow(option, value)) {
throwError(fmt::format("database_set_option({}, {}) returned error: ",
static_cast<std::underlying_type_t<FDBDatabaseOption>>(option),
value),
err);
}
}
void setOption(FDBDatabaseOption option, BytesRef str) {
if (auto err = setOptionNothrow(option, str)) {
throwError(fmt::format("database_set_option({}) returned error: ",
static_cast<std::underlying_type_t<FDBDatabaseOption>>(option)),
err);
}
}
Transaction createTransaction() {
if (!db)
throw std::runtime_error("create_transaction from null database");
auto tx_native = static_cast<native::FDBTransaction*>(nullptr);
auto err = Error(native::fdb_database_create_transaction(db.get(), &tx_native));
if (err)
throwError("Failed to create transaction: ", err);
return Transaction(tx_native);
}
};
} // namespace fdb
#endif /*FDB_API_HPP*/

View File

@ -0,0 +1,288 @@
/*
* async.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <boost/asio.hpp>
#include "async.hpp"
#include "future.hpp"
#include "logger.hpp"
#include "operations.hpp"
#include "stats.hpp"
#include "time.hpp"
#include "utils.hpp"
extern thread_local mako::Logger logr;
using namespace fdb;
namespace mako {
void ResumableStateForPopulate::postNextTick() {
boost::asio::post(io_context, [this, state = shared_from_this()]() { runOneTick(); });
}
void ResumableStateForPopulate::runOneTick() {
const auto num_commit_every = args.txnspec.ops[OP_INSERT][OP_COUNT];
for (auto i = key_checkpoint; i <= key_end; i++) {
genKey(keystr.data(), KEY_PREFIX, args, i);
randomString(valstr.data(), args.value_length);
tx.set(keystr, valstr);
stats.incrOpCount(OP_INSERT);
if (i == key_end || (i - key_begin + 1) % num_commit_every == 0) {
watch_commit.start();
tx.commit().then([this, state = shared_from_this(), i](Future f) {
if (auto err = f.error()) {
logr.printWithLogLevel(err.retryable() ? VERBOSE_WARN : VERBOSE_NONE,
"ERROR",
"commit for populate returned '{}'",
err.what());
tx.onError(err).then([this, state = shared_from_this()](Future f) {
const auto f_rc = handleForOnError(tx, f, "ON_ERROR_FOR_POPULATE");
if (f_rc == FutureRC::ABORT) {
signalEnd();
return;
} else {
postNextTick();
}
});
} else {
// successfully committed
watch_commit.stop();
watch_tx.setStop(watch_commit.getStop());
if (stats.getOpCount(OP_TRANSACTION) % args.sampling == 0) {
const auto commit_latency = watch_commit.diff();
const auto tx_duration = watch_tx.diff();
stats.addLatency(OP_COMMIT, commit_latency);
stats.addLatency(OP_TRANSACTION, tx_duration);
sample_bins[OP_COMMIT].put(commit_latency);
sample_bins[OP_TRANSACTION].put(tx_duration);
}
stats.incrOpCount(OP_COMMIT);
stats.incrOpCount(OP_TRANSACTION);
tx.reset();
watch_tx.startFromStop();
key_checkpoint = i + 1;
if (i != key_end) {
postNextTick();
} else {
logr.debug("Populated {} rows [{}, {}]: {:6.3f} sec",
key_end - key_begin + 1,
key_begin,
key_end,
toDoubleSeconds(watch_total.stop().diff()));
signalEnd();
return;
}
}
});
break;
}
}
}
void ResumableStateForRunWorkload::postNextTick() {
boost::asio::post(io_context, [this, state = shared_from_this()]() { runOneTick(); });
}
void ResumableStateForRunWorkload::runOneTick() {
assert(iter != OpEnd);
if (iter.step == 0 /* first step */)
prepareKeys(iter.op, key1, key2, args);
watch_step.start();
if (iter.step == 0)
watch_op = Stopwatch(watch_step.getStart());
auto f = Future{};
// to minimize context switch overhead, repeat immediately completed ops
// in a loop, not an async continuation.
repeat_immediate_steps:
f = opTable[iter.op].stepFunction(iter.step)(tx, args, key1, key2, val);
if (!f) {
// immediately completed client-side ops: e.g. set, setrange, clear, clearrange, ...
updateStepStats();
iter = getOpNext(args, iter);
if (iter == OpEnd)
onTransactionSuccess();
else
goto repeat_immediate_steps;
} else {
// step is blocking. register a continuation and return
f.then([this, state = shared_from_this()](Future f) {
if (auto postStepFn = opTable[iter.op].postStepFunction(iter.step))
postStepFn(f, tx, args, key1, key2, val);
if (iter.stepKind() != StepKind::ON_ERROR) {
if (auto err = f.error()) {
logr.printWithLogLevel(err.retryable() ? VERBOSE_WARN : VERBOSE_NONE,
"ERROR",
"{}:{} returned '{}'",
iter.opName(),
iter.step,
err.what());
tx.onError(err).then([this, state = shared_from_this()](Future f) {
const auto rc = handleForOnError(tx, f, fmt::format("{}:{}", iter.opName(), iter.step));
if (rc == FutureRC::RETRY) {
stats.incrErrorCount(iter.op);
} else if (rc == FutureRC::CONFLICT) {
stats.incrConflictCount();
} else if (rc == FutureRC::ABORT) {
tx.reset();
signalEnd();
return;
}
// restart this iteration from beginning
iter = getOpBegin(args);
needs_commit = false;
postNextTick();
});
} else {
// async step succeeded
updateStepStats();
iter = getOpNext(args, iter);
if (iter == OpEnd) {
onTransactionSuccess();
} else {
postNextTick();
}
}
} else {
// blob granules op error
auto rc = handleForOnError(tx, f, "BG_ON_ERROR");
if (rc == FutureRC::RETRY) {
stats.incrErrorCount(iter.op);
} else if (rc == FutureRC::CONFLICT) {
stats.incrConflictCount();
} else if (rc == FutureRC::ABORT) {
tx.reset();
stopcount.fetch_add(1);
return;
}
iter = getOpBegin(args);
needs_commit = false;
// restart this iteration from beginning
postNextTick();
}
});
}
}
void ResumableStateForRunWorkload::updateStepStats() {
logr.debug("Step {}:{} succeeded", iter.opName(), iter.step);
// step successful
watch_step.stop();
const auto do_sample = stats.getOpCount(OP_TRANSACTION) % args.sampling == 0;
if (iter.stepKind() == StepKind::COMMIT) {
// reset transaction boundary
const auto step_latency = watch_step.diff();
if (do_sample) {
stats.addLatency(OP_COMMIT, step_latency);
sample_bins[OP_COMMIT].put(step_latency);
}
tx.reset();
stats.incrOpCount(OP_COMMIT);
needs_commit = false;
}
// op completed successfully
if (iter.step + 1 == opTable[iter.op].steps()) {
if (opTable[iter.op].needsCommit())
needs_commit = true;
watch_op.setStop(watch_step.getStop());
if (do_sample) {
const auto op_latency = watch_op.diff();
stats.addLatency(iter.op, op_latency);
sample_bins[iter.op].put(op_latency);
}
stats.incrOpCount(iter.op);
}
}
void ResumableStateForRunWorkload::onTransactionSuccess() {
if (needs_commit || args.commit_get) {
// task completed, need to commit before finish
watch_commit.start();
tx.commit().then([this, state = shared_from_this()](Future f) {
if (auto err = f.error()) {
// commit had errors
logr.printWithLogLevel(err.retryable() ? VERBOSE_WARN : VERBOSE_NONE,
"ERROR",
"Post-iteration commit returned error: {}",
err.what());
tx.onError(err).then([this, state = shared_from_this()](Future f) {
const auto rc = handleForOnError(tx, f, "ON_ERROR");
if (rc == FutureRC::CONFLICT)
stats.incrConflictCount();
else
stats.incrErrorCount(OP_COMMIT);
if (rc == FutureRC::ABORT) {
signalEnd();
return;
}
if (ended()) {
signalEnd();
} else {
iter = getOpBegin(args);
needs_commit = false;
postNextTick();
}
});
} else {
// commit successful
watch_commit.stop();
watch_tx.setStop(watch_commit.getStop());
if (stats.getOpCount(OP_TRANSACTION) % args.sampling == 0) {
const auto commit_latency = watch_commit.diff();
const auto tx_duration = watch_tx.diff();
stats.addLatency(OP_COMMIT, commit_latency);
stats.addLatency(OP_TRANSACTION, commit_latency);
sample_bins[OP_COMMIT].put(commit_latency);
sample_bins[OP_TRANSACTION].put(tx_duration);
}
stats.incrOpCount(OP_COMMIT);
stats.incrOpCount(OP_TRANSACTION);
tx.reset();
watch_tx.startFromStop();
if (ended()) {
signalEnd();
} else {
// start next iteration
iter = getOpBegin(args);
postNextTick();
}
}
});
} else {
// transaction completed but no need to commit
watch_tx.stop();
if (stats.getOpCount(OP_TRANSACTION) % args.sampling == 0) {
const auto tx_duration = watch_tx.diff();
stats.addLatency(OP_TRANSACTION, tx_duration);
sample_bins[OP_TRANSACTION].put(tx_duration);
}
stats.incrOpCount(OP_TRANSACTION);
watch_tx.startFromStop();
tx.reset();
if (ended()) {
signalEnd();
} else {
iter = getOpBegin(args);
// start next iteration
postNextTick();
}
}
}
} // namespace mako

View File

@ -0,0 +1,127 @@
/*
* async.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_ASYNC_HPP
#define MAKO_ASYNC_HPP
#include <atomic>
#include <memory>
#include <boost/asio.hpp>
#include "logger.hpp"
#include "mako.hpp"
#include "shm.hpp"
#include "stats.hpp"
#include "time.hpp"
namespace mako {
// as we don't have coroutines yet, we need to store in heap the complete state of execution,
// such that we can resume exactly where we were from last database op.
struct ResumableStateForPopulate : std::enable_shared_from_this<ResumableStateForPopulate> {
Logger logr;
fdb::Database db;
fdb::Transaction tx;
boost::asio::io_context& io_context;
Arguments const& args;
ThreadStatistics& stats;
std::atomic<int>& stopcount;
LatencySampleBinArray sample_bins;
int key_begin;
int key_end;
int key_checkpoint;
fdb::ByteString keystr;
fdb::ByteString valstr;
Stopwatch watch_tx;
Stopwatch watch_commit;
Stopwatch watch_total;
ResumableStateForPopulate(Logger logr,
fdb::Database db,
fdb::Transaction tx,
boost::asio::io_context& io_context,
Arguments const& args,
ThreadStatistics& stats,
std::atomic<int>& stopcount,
int key_begin,
int key_end)
: logr(logr), db(db), tx(tx), io_context(io_context), args(args), stats(stats), stopcount(stopcount),
key_begin(key_begin), key_end(key_end), key_checkpoint(key_begin) {
keystr.resize(args.key_length);
valstr.resize(args.value_length);
}
void runOneTick();
void postNextTick();
void signalEnd() { stopcount.fetch_add(1); }
};
using PopulateStateHandle = std::shared_ptr<ResumableStateForPopulate>;
struct ResumableStateForRunWorkload : std::enable_shared_from_this<ResumableStateForRunWorkload> {
Logger logr;
fdb::Database db;
fdb::Transaction tx;
boost::asio::io_context& io_context;
Arguments const& args;
ThreadStatistics& stats;
std::atomic<int>& stopcount;
std::atomic<int> const& signal;
int max_iters;
OpIterator iter;
LatencySampleBinArray sample_bins;
fdb::ByteString key1;
fdb::ByteString key2;
fdb::ByteString val;
Stopwatch watch_step;
Stopwatch watch_op;
Stopwatch watch_commit;
Stopwatch watch_tx;
bool needs_commit;
ResumableStateForRunWorkload(Logger logr,
fdb::Database db,
fdb::Transaction tx,
boost::asio::io_context& io_context,
Arguments const& args,
ThreadStatistics& stats,
std::atomic<int>& stopcount,
std::atomic<int> const& signal,
int max_iters,
OpIterator iter)
: logr(logr), db(db), tx(tx), io_context(io_context), args(args), stats(stats), stopcount(stopcount),
signal(signal), max_iters(max_iters), iter(iter), needs_commit(false) {
key1.resize(args.key_length);
key2.resize(args.key_length);
val.resize(args.value_length);
}
void signalEnd() noexcept { stopcount.fetch_add(1); }
bool ended() noexcept {
return (max_iters != -1 && max_iters >= stats.getOpCount(OP_TRANSACTION)) || signal.load() == SIGNAL_RED;
}
void postNextTick();
void runOneTick();
void updateStepStats();
void onTransactionSuccess();
};
using RunWorkloadStateHandle = std::shared_ptr<ResumableStateForRunWorkload>;
} // namespace mako
#endif /*MAKO_ASYNC_HPP*/

View File

@ -0,0 +1,116 @@
/*
* blob_granules.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "blob_granules.hpp"
#include "limit.hpp"
#include "logger.hpp"
#include <cstdio>
#include <fdb_api.hpp>
extern thread_local mako::Logger logr;
namespace mako::blob_granules::local_file {
int64_t startLoad(const char* filename,
int filenameLength,
int64_t offset,
int64_t length,
int64_t fullFileLength,
void* userContext) {
FILE* fp;
char full_fname[PATH_MAX]{
0,
};
int loadId;
uint8_t* data;
size_t readSize;
auto context = static_cast<UserContext*>(userContext);
loadId = context->nextId;
if (context->dataById[loadId] != 0) {
logr.error("too many granule file loads at once: {}", MAX_BG_IDS);
return -1;
}
context->nextId = (context->nextId + 1) % MAX_BG_IDS;
int ret = snprintf(full_fname, PATH_MAX, "%s%s", context->bgFilePath, filename);
if (ret < 0 || ret >= PATH_MAX) {
logr.error("BG filename too long: {}{}", context->bgFilePath, filename);
return -1;
}
fp = fopen(full_fname, "r");
if (!fp) {
logr.error("BG could not open file: {}", full_fname);
return -1;
}
// don't seek if offset == 0
if (offset && fseek(fp, offset, SEEK_SET)) {
// if fseek was non-zero, it failed
logr.error("BG could not seek to %{} in file {}", offset, full_fname);
fclose(fp);
return -1;
}
data = new uint8_t[length];
readSize = fread(data, sizeof(uint8_t), length, fp);
fclose(fp);
if (readSize != length) {
logr.error("BG could not read {} bytes from file: {}", length, full_fname);
return -1;
}
context->dataById[loadId] = data;
return loadId;
}
uint8_t* getLoad(int64_t loadId, void* userContext) {
auto context = static_cast<UserContext*>(userContext);
if (context->dataById[loadId] == 0) {
logr.error("BG loadId invalid for get_load: {}", loadId);
return 0;
}
return context->dataById[loadId];
}
void freeLoad(int64_t loadId, void* userContext) {
auto context = static_cast<UserContext*>(userContext);
if (context->dataById[loadId] == 0) {
logr.error("BG loadId invalid for free_load: {}", loadId);
}
delete[] context->dataById[loadId];
context->dataById[loadId] = 0;
}
fdb::native::FDBReadBlobGranuleContext createApiContext(UserContext& ctx, bool materialize_files) {
auto ret = fdb::native::FDBReadBlobGranuleContext{};
ret.userContext = &ctx;
ret.start_load_f = &startLoad;
ret.get_load_f = &getLoad;
ret.free_load_f = &freeLoad;
ret.debugNoMaterialize = !materialize_files;
ret.granuleParallelism = 2; // TODO make knob or setting for changing this?
return ret;
}
} // namespace mako::blob_granules::local_file

View File

@ -0,0 +1,50 @@
/*
* blob_granules.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_BLOB_GRANULES_HPP
#define MAKO_BLOB_GRANULES_HPP
#include <cstdint>
#include <memory>
#include <fdb_api.hpp>
namespace mako::blob_granules::local_file {
constexpr const int MAX_BG_IDS = 1000;
// TODO: could always abstract this into something more generically usable by something other than mako.
// But outside of testing there are likely few use cases for local granules
struct UserContext {
char const* bgFilePath;
int nextId;
std::unique_ptr<uint8_t*[]> dataByIdMem;
uint8_t** dataById;
UserContext(char const* filePath)
: bgFilePath(filePath), nextId(0), dataByIdMem(new uint8_t*[MAX_BG_IDS]()), dataById(dataByIdMem.get()) {}
void clear() { dataByIdMem.reset(); }
};
fdb::native::FDBReadBlobGranuleContext createApiContext(UserContext& ctx, bool materialize_files);
} // namespace mako::blob_granules::local_file
#endif /*MAKO_BLOB_GRANULES_HPP*/

View File

@ -0,0 +1,89 @@
/*
* future.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_FUTURE_HPP
#define MAKO_FUTURE_HPP
#include <fdb_api.hpp>
#include <cassert>
#include <string_view>
#include "logger.hpp"
#include "macro.hpp"
extern thread_local mako::Logger logr;
namespace mako {
enum class FutureRC { OK, RETRY, CONFLICT, ABORT };
template <class FutureType>
force_inline FutureRC handleForOnError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
if (auto err = f.error()) {
if (err.is(1020 /*not_committed*/)) {
return FutureRC::CONFLICT;
} else if (err.retryable()) {
logr.warn("Retryable error '{}' found at on_error(), step: {}", err.what(), step);
return FutureRC::RETRY;
} else {
logr.error("Unretryable error '{}' found at on_error(), step: {}", err.what(), step);
tx.reset();
return FutureRC::ABORT;
}
} else {
return FutureRC::RETRY;
}
}
template <class FutureType>
force_inline FutureRC waitAndHandleForOnError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
assert(f);
if (auto err = f.blockUntilReady()) {
logr.error("'{}' found while waiting for on_error() future, step: {}", err.what(), step);
return FutureRC::ABORT;
}
return handleForOnError(tx, f, step);
}
// wait on any non-immediate tx-related step to complete. Follow up with on_error().
template <class FutureType>
force_inline FutureRC waitAndHandleError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
assert(f);
auto err = fdb::Error{};
if ((err = f.blockUntilReady())) {
const auto retry = err.retryable();
logr.error("{} error '{}' found during step: {}", (retry ? "Retryable" : "Unretryable"), err.what(), step);
return retry ? FutureRC::RETRY : FutureRC::ABORT;
}
err = f.error();
if (!err)
return FutureRC::OK;
if (err.retryable()) {
logr.warn("step {} returned '{}'", step, err.what());
} else {
logr.error("step {} returned '{}'", step, err.what());
}
// implicit backoff
auto follow_up = tx.onError(err);
return waitAndHandleForOnError(tx, f, step);
}
} // namespace mako
#endif /*MAKO_FUTURE_HPP*/

View File

@ -0,0 +1,32 @@
/*
* limit.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LIMIT_HPP
#define LIMIT_HPP
#if defined(__linux__)
#include <linux/limits.h>
#elif defined(__APPLE__)
#include <sys/syslimits.h>
#else
#include <limits.h>
#endif
#endif

View File

@ -0,0 +1,117 @@
/*
* logger.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_LOGGER_HPP
#define MAKO_LOGGER_HPP
#include <fmt/format.h>
#include <cassert>
#include <cstdio>
#include <iterator>
#include <string_view>
#include "process.hpp"
namespace mako {
constexpr const int VERBOSE_NONE = 0; // will still print errors
constexpr const int VERBOSE_DEFAULT = 1; // will print info and work stats
constexpr const int VERBOSE_WARN = 2; // will print expected errors
constexpr const int VERBOSE_DEBUG = 3; // will print everything
template <ProcKind P>
using ProcKindConstant = std::integral_constant<ProcKind, P>;
using MainProcess = ProcKindConstant<ProcKind::MAIN>;
using StatsProcess = ProcKindConstant<ProcKind::STATS>;
using WorkerProcess = ProcKindConstant<ProcKind::WORKER>;
class Logger {
ProcKind proc;
int verbosity{ VERBOSE_DEFAULT };
int process_id{ -1 };
int thread_id{ -1 };
void putHeader(fmt::memory_buffer& buf, std::string_view category) {
if (proc == ProcKind::MAIN) {
fmt::format_to(std::back_inserter(buf), "[MAIN] {}: ", category);
} else if (proc == ProcKind::STATS) {
fmt::format_to(std::back_inserter(buf), "[STATS] {}: ", category);
} else {
if (thread_id == -1) {
fmt::format_to(std::back_inserter(buf), "[WORKER{:3d}] {}: ", process_id + 1, category);
} else {
fmt::format_to(
std::back_inserter(buf), "[WORKER{:3d}:{:3d}] {}: ", process_id + 1, thread_id + 1, category);
}
}
}
public:
Logger(MainProcess, int verbosity) noexcept : proc(MainProcess::value), verbosity(verbosity) {}
Logger(StatsProcess, int verbosity) noexcept : proc(StatsProcess::value), verbosity(verbosity) {}
Logger(WorkerProcess, int verbosity, int process_id, int thread_id = -1) noexcept
: proc(WorkerProcess::value), verbosity(verbosity), process_id(process_id), thread_id(thread_id) {}
Logger(const Logger&) noexcept = default;
Logger& operator=(const Logger&) noexcept = default;
void setVerbosity(int value) noexcept {
assert(value >= VERBOSE_NONE && value <= VERBOSE_DEBUG);
verbosity = value;
}
template <typename... Args>
void printWithLogLevel(int log_level, std::string_view header, Args&&... args) {
assert(log_level >= VERBOSE_NONE && log_level <= VERBOSE_DEBUG);
if (log_level <= verbosity) {
const auto fp = log_level == VERBOSE_NONE ? stderr : stdout;
// 500B inline buffer
auto buf = fmt::memory_buffer{};
putHeader(buf, header);
fmt::format_to(std::back_inserter(buf), std::forward<Args>(args)...);
fmt::print(fp, "{}\n", std::string_view(buf.data(), buf.size()));
}
}
template <typename... Args>
void error(Args&&... args) {
printWithLogLevel(VERBOSE_NONE, "ERROR", std::forward<Args>(args)...);
}
template <typename... Args>
void info(Args&&... args) {
printWithLogLevel(VERBOSE_DEFAULT, "INFO", std::forward<Args>(args)...);
}
template <typename... Args>
void warn(Args&&... args) {
printWithLogLevel(VERBOSE_WARN, "WARNING", std::forward<Args>(args)...);
}
template <typename... Args>
void debug(Args&&... args) {
printWithLogLevel(VERBOSE_DEBUG, "DEBUG", std::forward<Args>(args)...);
}
};
} // namespace mako
#endif /*MAKO_LOGGER_HPP*/

View File

@ -0,0 +1,32 @@
/*
* macro.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_MACRO_HPP
#define MAKO_MACRO_HPP
#if defined(__GNUG__)
#define force_inline inline __attribute__((__always_inline__))
#elif defined(_MSC_VER)
#define force_inline __forceinline
#else
#error Missing force inline
#endif
#endif /*MAKO_MACRO_HPP*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,209 +0,0 @@
#ifndef MAKO_H
#define MAKO_H
#pragma once
#ifndef FDB_API_VERSION
#define FDB_API_VERSION 720
#endif
#include <foundationdb/fdb_c.h>
#include <pthread.h>
#include <sys/types.h>
#include <stdbool.h>
#if defined(__linux__)
#include <linux/limits.h>
#elif defined(__APPLE__)
#include <sys/syslimits.h>
#else
#include <limits.h>
#endif
#define VERBOSE_NONE 0
#define VERBOSE_DEFAULT 1
#define VERBOSE_ANNOYING 2
#define VERBOSE_DEBUG 3
#define MODE_INVALID -1
#define MODE_CLEAN 0
#define MODE_BUILD 1
#define MODE_RUN 2
#define FDB_SUCCESS 0
#define FDB_ERROR_RETRY -1
#define FDB_ERROR_ABORT -2
#define FDB_ERROR_CONFLICT -3
#define LAT_BLOCK_SIZE 511 /* size of each block to get detailed latency for each operation */
/* transaction specification */
enum Operations {
OP_GETREADVERSION,
OP_GET,
OP_GETRANGE,
OP_SGET,
OP_SGETRANGE,
OP_UPDATE,
OP_INSERT,
OP_INSERTRANGE,
OP_OVERWRITE,
OP_CLEAR,
OP_SETCLEAR,
OP_CLEARRANGE,
OP_SETCLEARRANGE,
OP_COMMIT,
OP_TRANSACTION, /* pseudo-operation - cumulative time for the operation + commit */
OP_READ_BG,
MAX_OP /* must be the last item */
};
#define OP_COUNT 0
#define OP_RANGE 1
#define OP_REVERSE 2
/* for long arguments */
enum Arguments {
ARG_KEYLEN,
ARG_VALLEN,
ARG_TPS,
ARG_COMMITGET,
ARG_SAMPLING,
ARG_VERSION,
ARG_KNOBS,
ARG_FLATBUFFERS,
ARG_LOGGROUP,
ARG_PREFIXPADDING,
ARG_TRACE,
ARG_TRACEPATH,
ARG_TRACEFORMAT,
ARG_TPSMAX,
ARG_TPSMIN,
ARG_TPSINTERVAL,
ARG_TPSCHANGE,
ARG_TXNTRACE,
ARG_TXNTAGGING,
ARG_TXNTAGGINGPREFIX,
ARG_STREAMING_MODE,
ARG_DISABLE_RYW,
ARG_CLIENT_THREADS_PER_VERSION,
ARG_JSON_REPORT,
ARG_BG_FILE_PATH // if blob granule files are stored locally, mako will read and materialize them if this is set
};
enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE };
#define KEYPREFIX "mako"
#define KEYPREFIXLEN 4
#define TEMP_DATA_STORE "/tmp/makoTemp"
/* we set mako_txnspec_t and mako_args_t only once in the master process,
* and won't be touched by child processes.
*/
typedef struct {
/* for each operation, it stores "count", "range" and "reverse" */
int ops[MAX_OP][3];
} mako_txnspec_t;
#define LOGGROUP_MAX 256
#define KNOB_MAX 256
#define TAGPREFIXLENGTH_MAX 8
#define NUM_CLUSTERS_MAX 3
#define NUM_DATABASES_MAX 10
#define MAX_BG_IDS 1000
/* benchmark parameters */
typedef struct {
int api_version;
int json;
int num_processes;
int num_threads;
int mode;
int rows; /* is 2 billion enough? */
int seconds;
int iteration;
int tpsmax;
int tpsmin;
int tpsinterval;
int tpschange;
int sampling;
int key_length;
int value_length;
int zipf;
int commit_get;
int verbose;
mako_txnspec_t txnspec;
char cluster_files[NUM_CLUSTERS_MAX][PATH_MAX];
int num_fdb_clusters;
int num_databases;
char log_group[LOGGROUP_MAX];
int prefixpadding;
int trace;
char tracepath[PATH_MAX];
int traceformat; /* 0 - XML, 1 - JSON */
char knobs[KNOB_MAX];
uint8_t flatbuffers;
int txntrace;
int txntagging;
char txntagging_prefix[TAGPREFIXLENGTH_MAX];
FDBStreamingMode streaming_mode;
int client_threads_per_version;
int disable_ryw;
char json_output_path[PATH_MAX];
bool bg_materialize_files;
char bg_file_path[PATH_MAX];
} mako_args_t;
/* shared memory */
#define SIGNAL_RED 0
#define SIGNAL_GREEN 1
#define SIGNAL_OFF 2
typedef struct {
int signal;
int readycount;
double throttle_factor;
int stopcount;
} mako_shmhdr_t;
/* memory block allocated to each operation when collecting detailed latency */
typedef struct {
uint64_t data[LAT_BLOCK_SIZE];
void* next_block;
} lat_block_t;
typedef struct {
uint64_t xacts;
uint64_t conflicts;
uint64_t ops[MAX_OP];
uint64_t errors[MAX_OP];
uint64_t latency_samples[MAX_OP];
uint64_t latency_us_total[MAX_OP];
uint64_t latency_us_min[MAX_OP];
uint64_t latency_us_max[MAX_OP];
} mako_stats_t;
/* per-process information */
typedef struct {
int worker_id;
pid_t parent_id;
mako_args_t* args;
mako_shmhdr_t* shm;
FDBDatabase* databases[NUM_DATABASES_MAX];
} process_info_t;
/* args for threads */
typedef struct {
int thread_id;
int database_index; // index of the database to do work to
int elem_size[MAX_OP]; /* stores the multiple of LAT_BLOCK_SIZE to check the memory allocation of each operation */
bool is_memory_allocated[MAX_OP]; /* flag specified for each operation, whether the memory was allocated to that
specific operation */
lat_block_t* block[MAX_OP];
process_info_t* process;
} thread_args_t;
/* process type */
typedef enum { proc_master = 0, proc_worker, proc_stats } proc_type_t;
#endif /* MAKO_H */

View File

@ -0,0 +1,168 @@
/*
* mako.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_HPP
#define MAKO_HPP
#ifndef FDB_API_VERSION
#define FDB_API_VERSION 720
#endif
#include <array>
#include <atomic>
#include <cassert>
#include <chrono>
#include <list>
#include <vector>
#include <string_view>
#include <fdb_api.hpp>
#include <pthread.h>
#include <sys/types.h>
#include <stdbool.h>
#include "limit.hpp"
namespace mako {
constexpr const int MODE_INVALID = -1;
constexpr const int MODE_CLEAN = 0;
constexpr const int MODE_BUILD = 1;
constexpr const int MODE_RUN = 2;
/* for long arguments */
enum ArgKind {
ARG_KEYLEN,
ARG_VALLEN,
ARG_TPS,
ARG_ASYNC,
ARG_COMMITGET,
ARG_SAMPLING,
ARG_VERSION,
ARG_KNOBS,
ARG_FLATBUFFERS,
ARG_LOGGROUP,
ARG_PREFIXPADDING,
ARG_TRACE,
ARG_TRACEPATH,
ARG_TRACEFORMAT,
ARG_TPSMAX,
ARG_TPSMIN,
ARG_TPSINTERVAL,
ARG_TPSCHANGE,
ARG_TXNTRACE,
ARG_TXNTAGGING,
ARG_TXNTAGGINGPREFIX,
ARG_STREAMING_MODE,
ARG_DISABLE_RYW,
ARG_CLIENT_THREADS_PER_VERSION,
ARG_JSON_REPORT,
ARG_BG_FILE_PATH // if blob granule files are stored locally, mako will read and materialize them if this is set
};
constexpr const int OP_COUNT = 0;
constexpr const int OP_RANGE = 1;
constexpr const int OP_REVERSE = 2;
/* transaction specification */
enum OpKind {
OP_GETREADVERSION,
OP_GET,
OP_GETRANGE,
OP_SGET,
OP_SGETRANGE,
OP_UPDATE,
OP_INSERT,
OP_INSERTRANGE,
OP_OVERWRITE,
OP_CLEAR,
OP_SETCLEAR,
OP_CLEARRANGE,
OP_SETCLEARRANGE,
OP_COMMIT,
OP_TRANSACTION, /* pseudo-operation - time it takes to run one iteration of ops sequence */
OP_READ_BG,
MAX_OP /* must be the last item */
};
enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE };
/* we set WorkloadSpec and Arguments only once in the master process,
* and won't be touched by child processes.
*/
struct WorkloadSpec {
/* for each operation, it stores "count", "range" and "reverse" */
int ops[MAX_OP][3];
};
constexpr const int LOGGROUP_MAX = 256;
constexpr const int KNOB_MAX = 256;
constexpr const int TAGPREFIXLENGTH_MAX = 8;
constexpr const int NUM_CLUSTERS_MAX = 3;
constexpr const int NUM_DATABASES_MAX = 10;
constexpr const std::string_view KEY_PREFIX{ "mako" };
constexpr const std::string_view TEMP_DATA_STORE{ "/tmp/makoTemp" };
/* benchmark parameters */
struct Arguments {
int api_version;
int json;
int num_processes;
int num_threads;
int async_xacts;
int mode;
int rows; /* is 2 billion enough? */
int row_digits;
int seconds;
int iteration;
int tpsmax;
int tpsmin;
int tpsinterval;
int tpschange;
int sampling;
int key_length;
int value_length;
int zipf;
int commit_get;
int verbose;
WorkloadSpec txnspec;
char cluster_files[NUM_CLUSTERS_MAX][PATH_MAX];
int num_fdb_clusters;
int num_databases;
char log_group[LOGGROUP_MAX];
int prefixpadding;
int trace;
char tracepath[PATH_MAX];
int traceformat; /* 0 - XML, 1 - JSON */
char knobs[KNOB_MAX];
uint8_t flatbuffers;
int txntrace;
int txntagging;
char txntagging_prefix[TAGPREFIXLENGTH_MAX];
FDBStreamingMode streaming_mode;
int64_t client_threads_per_version;
int disable_ryw;
char json_output_path[PATH_MAX];
bool bg_materialize_files;
char bg_file_path[PATH_MAX];
};
} // namespace mako
#endif /* MAKO_HPP */

View File

@ -53,6 +53,13 @@ Arguments
- | ``-t | --threads <threads>``
| Number of threads per worker process (Default: 1)
| With ``--async_xacts <xacts>`` == 0 (Default), each of the ``<threads>`` operates on a transaction object with blocking API calls
| Otherwise, all of the ``<threads>`` run an asynchronous job scheduler, serving ``<xacts>`` transactions
- | ``--async_xacts <xacts>``
| Number of transactions per worker process to run asynchronously (Default: 0)
| ``<xacts>`` > 0 switches the execution mode to non-blocking (See ``-t | --threads``), with the exception of blob granules API
| Note: throttling options, e.g. ``--tpsmax``, ``--tpsmin``, ``--tpschange``, ``--tpsinterval``, are ignored in asynchronous mode
- | ``-r | --rows <rows>``
| Number of rows initially populated (Default: 100000)

View File

@ -0,0 +1,275 @@
/*
* operations.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "blob_granules.hpp"
#include "operations.hpp"
#include "mako.hpp"
#include "logger.hpp"
#include "utils.hpp"
#include <array>
extern thread_local mako::Logger logr;
namespace mako {
using namespace fdb;
const std::array<Operation, MAX_OP> opTable{
{ { "GRV",
{ { StepKind::READ,
[](Transaction& tx, Arguments const&, ByteString&, ByteString&, ByteString&) {
return tx.getReadVersion().eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString&) {
if (f && !f.error()) {
f.get<future_var::Int64>();
}
} } },
1,
false },
{ "GET",
{ { StepKind::READ,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
return tx.get(key, false /*snapshot*/).eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::Value>();
}
} } },
1,
false },
{ "GETRANGE",
{ { StepKind::READ,
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
return tx
.getRange(key_select::firstGreaterOrEqual(begin),
key_select::lastLessOrEqual(end, 1),
0 /*limit*/,
0 /*target_bytes*/,
args.streaming_mode,
0 /*iteration*/,
false /*snapshot*/,
args.txnspec.ops[OP_GETRANGE][OP_REVERSE])
.eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::KeyValueArray>();
}
} } },
1,
false },
{ "SGET",
{ { StepKind::READ,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
return tx.get(key, true /*snapshot*/).eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::Value>();
}
} } },
1,
false },
{ "SGETRANGE",
{ {
StepKind::READ,
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
return tx
.getRange(key_select::firstGreaterOrEqual(begin),
key_select::lastLessOrEqual(end, 1),
0 /*limit*/,
0 /*target_bytes*/,
args.streaming_mode,
0 /*iteration*/,
true /*snapshot*/,
args.txnspec.ops[OP_GETRANGE][OP_REVERSE])
.eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::KeyValueArray>();
}
} } },
1,
false },
{ "UPDATE",
{ { StepKind::READ,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
return tx.get(key, false /*snapshot*/).eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::Value>();
}
} },
{ StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
randomString(value.data(), args.value_length);
tx.set(key, value);
return Future();
} } },
2,
true },
{ "INSERT",
{ { StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
// key[0..args.key_length] := concat(key_prefix, random_string)
randomString(key.data() + intSize(KEY_PREFIX), args.key_length - intSize(KEY_PREFIX));
randomString(value.data(), args.value_length);
tx.set(key, value);
return Future();
} } },
1,
true },
{ "INSERTRANGE",
{ { StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
randomString(value.data(), args.value_length);
// key[0..args.key_length] := concat(prefix, random_string, num[0..range_digits])
const auto range = args.txnspec.ops[OP_INSERTRANGE][OP_RANGE];
assert(range > 0);
const auto range_digits = digits(range);
const auto random_len = args.key_length - intSize(KEY_PREFIX) - range_digits;
randomString(&key[intSize(KEY_PREFIX)], random_len);
for (auto i = 0; i < range; i++) {
numericWithFill(&key[args.key_length - range_digits], range_digits, i);
tx.set(key, value);
}
return Future();
} } },
1,
true },
{ "OVERWRITE",
{ { StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
randomString(value.data(), args.value_length);
tx.set(key, value);
return Future();
} } },
1,
true },
{ "CLEAR",
{ { StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
tx.clear(key);
return Future();
} } },
1,
true },
{ "SETCLEAR",
{ { StepKind::COMMIT,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
randomString(&key[KEY_PREFIX.size()], args.key_length - intSize(KEY_PREFIX));
randomString(value.data(), args.value_length);
tx.set(key, value);
return tx.commit().eraseType();
} },
{ StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
tx.reset(); // assuming commit from step 0 worked.
tx.clear(key); // key should forward unchanged from step 0
return Future();
} } },
2,
true },
{ "CLEARRANGE",
{ { StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
tx.clearRange(begin, end);
return Future();
} } },
1,
true },
{ "SETCLEARRANGE",
{ { StepKind::COMMIT,
[](Transaction& tx, Arguments const& args, ByteString& key_begin, ByteString& key, ByteString& value) {
randomString(value.data(), args.value_length);
// key[0..args.key_length] := concat(prefix, random_string, num[0..range_digits])
const auto range = args.txnspec.ops[OP_SETCLEARRANGE][OP_RANGE];
assert(range > 0);
const auto range_digits = digits(range);
const auto random_len = args.key_length - intSize(KEY_PREFIX) - range_digits;
randomString(&key[KEY_PREFIX.size()], random_len);
for (auto i = 0; i < range; i++) {
numericWithFill(&key[args.key_length - range_digits], range_digits, i);
tx.set(key, value);
if (i == 0)
key_begin.assign(key);
}
return tx.commit().eraseType();
} },
{ StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
tx.reset();
tx.clearRange(begin, end);
return Future();
} } },
2,
true },
{ "COMMIT", { { StepKind::NONE, nullptr } }, 0, false },
{ "TRANSACTION", { { StepKind::NONE, nullptr } }, 0, false },
{ "READBLOBGRANULE",
{ { StepKind::ON_ERROR,
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
auto err = Error{};
err = tx.setOptionNothrow(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, BytesRef());
if (err) {
// Issuing read/writes before disabling RYW results in error.
// Possible malformed workload?
// As workloads execute in sequence, retrying would likely repeat this error.
fmt::print(stderr, "ERROR: TR_OPTION_READ_YOUR_WRITES_DISABLE: {}", err.what());
return Future();
}
// Allocate a separate context per call to avoid multiple threads accessing
auto user_context = blob_granules::local_file::UserContext(args.bg_file_path);
auto api_context = blob_granules::local_file::createApiContext(user_context, args.bg_materialize_files);
auto r = tx.readBlobGranules(begin,
end,
0 /* beginVersion*/,
-2, /* endVersion. -2 (latestVersion) is use txn read version */
api_context);
user_context.clear();
auto out = Result::KeyValueArray{};
err = r.getKeyValueArrayNothrow(out);
if (!err || err.is(2037 /*blob_granule_not_materialized*/))
return Future();
const auto level = (err.is(1020 /*not_committed*/) || err.is(1021 /*commit_unknown_result*/) ||
err.is(1213 /*tag_throttled*/))
? VERBOSE_WARN
: VERBOSE_NONE;
logr.printWithLogLevel(level, "ERROR", "get_keyvalue_array() after readBlobGranules(): {}", err.what());
return tx.onError(err).eraseType();
} } },
1,
false } }
};
} // namespace mako

View File

@ -0,0 +1,140 @@
/*
* operations.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_OPERATIONS_HPP
#define MAKO_OPERATIONS_HPP
#include <fdb_api.hpp>
#include <array>
#include <cassert>
#include <string_view>
#include <tuple>
#include <utility>
#include <vector>
#include "macro.hpp"
#include "mako.hpp"
namespace mako {
// determines how resultant future will be handled
enum class StepKind {
NONE, ///< not part of the table: OP_TRANSACTION, OP_COMMIT
IMM, ///< non-future ops that return immediately: e.g. set, clear_range
READ, ///< blockable reads: get(), get_range(), get_read_version, ...
COMMIT, ///< self-explanatory
ON_ERROR ///< future is a result of tx.on_error()
};
// Ops that doesn't have concrete steps to execute and are there for measurements only
force_inline bool isAbstractOp(int op) noexcept {
return op == OP_COMMIT || op == OP_TRANSACTION;
}
using StepFunction = fdb::Future (*)(fdb::Transaction& tx,
Arguments const&,
fdb::ByteString& /*key1*/,
fdb::ByteString& /*key2*/,
fdb::ByteString& /*value*/);
using PostStepFunction = void (*)(fdb::Future&,
fdb::Transaction& tx,
Arguments const&,
fdb::ByteString& /*key1*/,
fdb::ByteString& /*key2*/,
fdb::ByteString& /*value*/);
struct Step {
StepKind kind;
StepFunction step_func_;
PostStepFunction post_step_func_{ nullptr };
};
struct Operation {
std::string_view name_;
Step steps_[2];
int num_steps_;
bool needs_commit_;
std::string_view name() const noexcept { return name_; }
StepKind stepKind(int step) const noexcept {
assert(step < steps());
return steps_[step].kind;
}
StepFunction stepFunction(int step) const noexcept { return steps_[step].step_func_; }
PostStepFunction postStepFunction(int step) const noexcept { return steps_[step].post_step_func_; }
// how many steps in this op?
int steps() const noexcept { return num_steps_; }
// does the op needs to commit some time after its final step?
bool needsCommit() const noexcept { return needs_commit_; }
};
extern const std::array<Operation, MAX_OP> opTable;
force_inline char const* getOpName(int ops_code) {
if (ops_code >= 0 && ops_code < MAX_OP)
return opTable[ops_code].name().data();
return "";
}
struct OpIterator {
int op, count, step;
bool operator==(const OpIterator& other) const noexcept {
return op == other.op && count == other.count && step == other.step;
}
bool operator!=(const OpIterator& other) const noexcept { return !(*this == other); }
StepKind stepKind() const noexcept { return opTable[op].stepKind(step); }
char const* opName() const noexcept { return getOpName(op); }
};
constexpr const OpIterator OpEnd = OpIterator{ MAX_OP, -1, -1 };
force_inline OpIterator getOpBegin(Arguments const& args) noexcept {
for (auto op = 0; op < MAX_OP; op++) {
if (isAbstractOp(op) || args.txnspec.ops[op][OP_COUNT] == 0)
continue;
return OpIterator{ op, 0, 0 };
}
return OpEnd;
}
force_inline OpIterator getOpNext(Arguments const& args, OpIterator current) noexcept {
auto& [op, count, step] = current;
assert(op < MAX_OP && !isAbstractOp(op));
if (opTable[op].steps() > step + 1)
return OpIterator{ op, count, step + 1 };
count++;
for (; op < MAX_OP; op++, count = 0) {
if (isAbstractOp(op) || args.txnspec.ops[op][OP_COUNT] <= count)
continue;
return OpIterator{ op, count, 0 };
}
return OpEnd;
}
} // namespace mako
#endif /* MAKO_OPERATIONS_HPP */

View File

@ -0,0 +1,26 @@
/*
* process.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_PROCESS_HPP
#define MAKO_PROCESS_HPP
enum class ProcKind { MAIN, WORKER, STATS };
#endif /*MAKO_PROCESS_HPP*/

View File

@ -0,0 +1,108 @@
/*
* shm.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_SHM_HPP
#define MAKO_SHM_HPP
#include <atomic>
#include <cassert>
#include <cstdint>
#include "stats.hpp"
/* shared memory */
constexpr const int SIGNAL_RED = 0;
constexpr const int SIGNAL_GREEN = 1;
constexpr const int SIGNAL_OFF = 2;
// controlled, safer access to shared memory
namespace mako::shared_memory {
struct Header {
std::atomic<int> signal = ATOMIC_VAR_INIT(SIGNAL_OFF);
std::atomic<int> readycount = ATOMIC_VAR_INIT(0);
std::atomic<double> throttle_factor = ATOMIC_VAR_INIT(1.0);
std::atomic<int> stopcount = ATOMIC_VAR_INIT(0);
};
struct LayoutHelper {
Header hdr;
ThreadStatistics stats;
};
inline size_t storageSize(int num_processes, int num_threads) noexcept {
assert(num_processes >= 1 && num_threads >= 1);
return sizeof(LayoutHelper) + sizeof(ThreadStatistics) * ((num_processes * num_threads) - 1);
}
class Access {
void* base;
int num_processes;
int num_threads;
static inline ThreadStatistics& statsSlot(void* shm_base,
int num_threads,
int process_idx,
int thread_idx) noexcept {
return (&static_cast<LayoutHelper*>(shm_base)->stats)[process_idx * num_threads + thread_idx];
}
public:
Access(void* shm, int num_processes, int num_threads) noexcept
: base(shm), num_processes(num_processes), num_threads(num_threads) {}
Access() noexcept : Access(nullptr, 0, 0) {}
Access(const Access&) noexcept = default;
Access& operator=(const Access&) noexcept = default;
size_t size() const noexcept { return storageSize(num_processes, num_threads); }
void initMemory() noexcept {
new (&header()) Header{};
for (auto i = 0; i < num_processes; i++)
for (auto j = 0; j < num_threads; j++)
new (&statsSlot(i, j)) ThreadStatistics();
}
Header const& headerConst() const noexcept { return *static_cast<Header const*>(base); }
Header& header() const noexcept { return *static_cast<Header*>(base); }
ThreadStatistics const* statsConstArray() const noexcept {
return &statsSlot(base, num_threads, 0 /*process_id*/, 0 /*thread_id*/);
}
ThreadStatistics* statsArray() const noexcept {
return &statsSlot(base, num_threads, 0 /*process_id*/, 0 /*thread_id*/);
}
ThreadStatistics const& statsConstSlot(int process_idx, int thread_idx) const noexcept {
return statsSlot(base, num_threads, process_idx, thread_idx);
}
ThreadStatistics& statsSlot(int process_idx, int thread_idx) const noexcept {
return statsSlot(base, num_threads, process_idx, thread_idx);
}
};
} // namespace mako::shared_memory
#endif /* MAKO_SHM_HPP */

View File

@ -0,0 +1,177 @@
/*
* stats.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_STATS_HPP
#define MAKO_STATS_HPP
#include <array>
#include <cstdint>
#include <cstring>
#include <list>
#include <new>
#include <utility>
#include "operations.hpp"
#include "time.hpp"
namespace mako {
/* rough cap on the number of samples to avoid OOM hindering benchmark */
constexpr const size_t SAMPLE_CAP = 2000000;
/* size of each block to get detailed latency for each operation */
constexpr const size_t LAT_BLOCK_SIZE = 4093;
/* hard cap on the number of sample blocks = 488 */
constexpr const size_t MAX_LAT_BLOCKS = SAMPLE_CAP / LAT_BLOCK_SIZE;
/* memory block allocated to each operation when collecting detailed latency */
class LatencySampleBlock {
uint64_t samples[LAT_BLOCK_SIZE]{
0,
};
uint64_t index{ 0 };
public:
LatencySampleBlock() noexcept = default;
bool full() const noexcept { return index >= LAT_BLOCK_SIZE; }
void put(timediff_t td) {
assert(!full());
samples[index++] = toIntegerMicroseconds(td);
}
// return {data block, number of samples}
std::pair<uint64_t const*, size_t> data() const noexcept { return { samples, index }; }
};
/* collect sampled latencies until OOM is hit */
class LatencySampleBin {
std::list<LatencySampleBlock> blocks;
bool noMoreAlloc{ false };
bool tryAlloc() {
try {
blocks.emplace_back();
} catch (const std::bad_alloc&) {
noMoreAlloc = true;
return false;
}
return true;
}
public:
void reserveOneBlock() {
if (blocks.empty())
tryAlloc();
}
void put(timediff_t td) {
if (blocks.empty() || blocks.back().full()) {
if (blocks.size() >= MAX_LAT_BLOCKS || noMoreAlloc || !tryAlloc())
return;
}
blocks.back().put(td);
}
// iterate & apply for each block user function void(uint64_t const*, size_t)
template <typename Func>
void forEachBlock(Func&& fn) const {
for (const auto& block : blocks) {
auto [ptr, cnt] = block.data();
fn(ptr, cnt);
}
}
};
class alignas(64) ThreadStatistics {
uint64_t conflicts;
uint64_t total_errors;
uint64_t ops[MAX_OP];
uint64_t errors[MAX_OP];
uint64_t latency_samples[MAX_OP];
uint64_t latency_us_total[MAX_OP];
uint64_t latency_us_min[MAX_OP];
uint64_t latency_us_max[MAX_OP];
public:
ThreadStatistics() noexcept {
memset(this, 0, sizeof(ThreadStatistics));
memset(latency_us_min, 0xff, sizeof(latency_us_min));
}
ThreadStatistics(const ThreadStatistics& other) noexcept = default;
ThreadStatistics& operator=(const ThreadStatistics& other) noexcept = default;
uint64_t getConflictCount() const noexcept { return conflicts; }
uint64_t getOpCount(int op) const noexcept { return ops[op]; }
uint64_t getErrorCount(int op) const noexcept { return errors[op]; }
uint64_t getTotalErrorCount() const noexcept { return total_errors; }
uint64_t getLatencySampleCount(int op) const noexcept { return latency_samples[op]; }
uint64_t getLatencyUsTotal(int op) const noexcept { return latency_us_total[op]; }
uint64_t getLatencyUsMin(int op) const noexcept { return latency_us_min[op]; }
uint64_t getLatencyUsMax(int op) const noexcept { return latency_us_max[op]; }
// with 'this' as final aggregation, factor in 'other'
void combine(const ThreadStatistics& other) {
conflicts += other.conflicts;
for (auto op = 0; op < MAX_OP; op++) {
ops[op] += other.ops[op];
errors[op] += other.errors[op];
total_errors += other.errors[op];
latency_samples[op] += other.latency_samples[op];
latency_us_total[op] += other.latency_us_total[op];
if (latency_us_min[op] > other.latency_us_min[op])
latency_us_min[op] = other.latency_us_min[op];
if (latency_us_max[op] < other.latency_us_max[op])
latency_us_max[op] = other.latency_us_max[op];
}
}
void incrConflictCount() noexcept { conflicts++; }
// non-commit write operations aren't measured for time.
void incrOpCount(int op) noexcept { ops[op]++; }
void incrErrorCount(int op) noexcept {
total_errors++;
errors[op]++;
}
void addLatency(int op, timediff_t diff) noexcept {
const auto latency_us = toIntegerMicroseconds(diff);
latency_samples[op]++;
latency_us_total[op] += latency_us;
if (latency_us_min[op] > latency_us)
latency_us_min[op] = latency_us;
if (latency_us_max[op] < latency_us)
latency_us_max[op] = latency_us;
}
};
using LatencySampleBinArray = std::array<LatencySampleBin, MAX_OP>;
} // namespace mako
#endif /* MAKO_STATS_HPP */

View File

@ -0,0 +1,77 @@
/*
* time.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_TIME_HPP
#define MAKO_TIME_HPP
#include <chrono>
namespace mako {
/* time measurement helpers */
using std::chrono::steady_clock;
using timepoint_t = decltype(steady_clock::now());
using timediff_t = decltype(std::declval<timepoint_t>() - std::declval<timepoint_t>());
template <typename Duration>
double toDoubleSeconds(Duration duration) {
return std::chrono::duration_cast<std::chrono::duration<double>>(duration).count();
}
template <typename Duration>
uint64_t toIntegerSeconds(Duration duration) {
return std::chrono::duration_cast<std::chrono::duration<uint64_t>>(duration).count();
}
template <typename Duration>
uint64_t toIntegerMicroseconds(Duration duration) {
return std::chrono::duration_cast<std::chrono::duration<uint64_t, std::micro>>(duration).count();
}
// timing helpers
struct StartAtCtor {};
class Stopwatch {
timepoint_t p1, p2;
public:
Stopwatch() noexcept : p1(), p2() {}
Stopwatch(StartAtCtor) noexcept { start(); }
Stopwatch(timepoint_t start_time) noexcept : p1(start_time), p2() {}
Stopwatch(const Stopwatch&) noexcept = default;
Stopwatch& operator=(const Stopwatch&) noexcept = default;
timepoint_t getStart() const noexcept { return p1; }
timepoint_t getStop() const noexcept { return p2; }
void start() noexcept { p1 = steady_clock::now(); }
Stopwatch& stop() noexcept {
p2 = steady_clock::now();
return *this;
}
Stopwatch& setStop(timepoint_t p_stop) noexcept {
p2 = p_stop;
return *this;
}
void startFromStop() noexcept { p1 = p2; }
auto diff() const noexcept { return p2 - p1; }
};
} // namespace mako
#endif /* MAKO_TIME_HPP */

View File

@ -1,136 +0,0 @@
#include "utils.h"
#include "mako.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* uniform-distribution random */
int urand(int low, int high) {
double r = rand() / (1.0 + RAND_MAX);
int range = high - low + 1;
return (int)((r * range) + low);
}
/* random string */
/* len is the buffer size, must include null */
void randstr(char* str, int len) {
int i;
for (i = 0; i < len - 1; i++) {
str[i] = '!' + urand(0, 'z' - '!'); /* generage a char from '!' to 'z' */
}
str[len - 1] = '\0';
}
/* random numeric string */
/* len is the buffer size, must include null */
void randnumstr(char* str, int len) {
int i;
for (i = 0; i < len - 1; i++) {
str[i] = '0' + urand(0, 9); /* generage a char from '!' to 'z' */
}
str[len - 1] = '\0';
}
/* return the first key to be inserted */
int insert_begin(int rows, int p_idx, int t_idx, int total_p, int total_t) {
double interval = (double)rows / total_p / total_t;
return (int)(round(interval * ((p_idx * total_t) + t_idx)));
}
/* return the last key to be inserted */
int insert_end(int rows, int p_idx, int t_idx, int total_p, int total_t) {
double interval = (double)rows / total_p / total_t;
return (int)(round(interval * ((p_idx * total_t) + t_idx + 1) - 1));
}
/* devide val equally among threads */
int compute_thread_portion(int val, int p_idx, int t_idx, int total_p, int total_t) {
int interval = val / total_p / total_t;
int remaining = val - (interval * total_p * total_t);
if ((p_idx * total_t + t_idx) < remaining) {
return interval + 1;
} else if (interval == 0) {
return -1;
}
/* else */
return interval;
}
/* number of digits */
int digits(int num) {
int digits = 0;
while (num > 0) {
num /= 10;
digits++;
}
return digits;
}
/* generate a key for a given key number */
/* prefix is "mako" by default, prefixpadding = 1 means 'x' will be in front rather than trailing the keyname */
/* len is the buffer size, key length + null */
void genkey(char* str, char* prefix, int prefixlen, int prefixpadding, int num, int rows, int len) {
const int rowdigit = digits(rows);
const int prefixoffset = prefixpadding ? len - (prefixlen + rowdigit) - 1 : 0;
char* prefixstr = (char*)alloca(sizeof(char) * (prefixlen + rowdigit + 1));
snprintf(prefixstr, prefixlen + rowdigit + 1, "%s%0.*d", prefix, rowdigit, num);
memset(str, 'x', len);
memcpy(str + prefixoffset, prefixstr, prefixlen + rowdigit);
str[len - 1] = '\0';
}
/* This is another sorting algorithm used to calculate latency parameters */
/* We moved from radix sort to quick sort to avoid extra space used in radix sort */
#if 0
uint64_t get_max(uint64_t arr[], int n) {
uint64_t mx = arr[0];
for (int i = 1; i < n; i++) {
if (arr[i] > mx) {
mx = arr[i];
}
}
return mx;
}
void bucket_data(uint64_t arr[], int n, uint64_t exp) {
// uint64_t output[n];
int i, count[10] = { 0 };
uint64_t* output = (uint64_t*)malloc(sizeof(uint64_t) * n);
for (i = 0; i < n; i++) {
count[(arr[i] / exp) % 10]++;
}
for (i = 1; i < 10; i++) {
count[i] += count[i - 1];
}
for (i = n - 1; i >= 0; i--) {
output[count[(arr[i] / exp) % 10] - 1] = arr[i];
count[(arr[i] / exp) % 10]--;
}
for (i = 0; i < n; i++) {
arr[i] = output[i];
}
free(output);
}
// The main function is to sort arr[] of size n using Radix Sort
void radix_sort(uint64_t* arr, int n) {
// Find the maximum number to know number of digits
uint64_t m = get_max(arr, n);
for (uint64_t exp = 1; m / exp > 0; exp *= 10) bucket_data(arr, n, exp);
}
#endif
int compare(const void* a, const void* b) {
const uint64_t* da = (const uint64_t*)a;
const uint64_t* db = (const uint64_t*)b;
return (*da > *db) - (*da < *db);
}
// The main function is to sort arr[] of size n using Quick Sort
void quick_sort(uint64_t* arr, int n) {
qsort(arr, n, sizeof(uint64_t), compare);
}

View File

@ -0,0 +1,54 @@
/*
* utils.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "utils.hpp"
#include "mako.hpp"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fmt/format.h>
namespace mako {
/* return the last key to be inserted */
/* devide val equally among threads */
int computeThreadPortion(int val, int p_idx, int t_idx, int total_p, int total_t) {
int interval = val / total_p / total_t;
int remaining = val - (interval * total_p * total_t);
if ((p_idx * total_t + t_idx) < remaining) {
return interval + 1;
} else if (interval == 0) {
return -1;
}
/* else */
return interval;
}
/* number of digits */
int digits(int num) {
int digits = 0;
while (num > 0) {
num /= 10;
digits++;
}
return digits;
}
} // namespace mako

View File

@ -1,65 +0,0 @@
#ifndef UTILS_H
#define UTILS_H
#pragma once
#include <stdint.h>
/* uniform-distribution random */
/* return a uniform random number between low and high, both inclusive */
int urand(int low, int high);
/* write a random string of the length of (len-1) to memory pointed by str
* with a null-termination character at str[len-1].
*/
void randstr(char* str, int len);
/* write a random numeric string of the length of (len-1) to memory pointed by str
* with a null-termination character at str[len-1].
*/
void randnumstr(char* str, int len);
/* given the total number of rows to be inserted,
* the worker process index p_idx and the thread index t_idx (both 0-based),
* and the total number of processes, total_p, and threads, total_t,
* returns the first row number assigned to this partition.
*/
int insert_begin(int rows, int p_idx, int t_idx, int total_p, int total_t);
/* similar to insert_begin, insert_end returns the last row numer */
int insert_end(int rows, int p_idx, int t_idx, int total_p, int total_t);
/* devide a value equally among threads */
int compute_thread_portion(int val, int p_idx, int t_idx, int total_p, int total_t);
/* similar to insert_begin/end, compute_thread_tps computes
* the per-thread target TPS for given configuration.
*/
#define compute_thread_tps(val, p_idx, t_idx, total_p, total_t) \
compute_thread_portion(val, p_idx, t_idx, total_p, total_t)
/* similar to compute_thread_tps,
* compute_thread_iters computs the number of iterations.
*/
#define compute_thread_iters(val, p_idx, t_idx, total_p, total_t) \
compute_thread_portion(val, p_idx, t_idx, total_p, total_t)
/* get the number of digits */
int digits(int num);
/* generate a key for a given key number */
/* prefix is "mako" by default, prefixpadding = 1 means 'x' will be in front rather than trailing the keyname */
/* len is the buffer size, key length + null */
void genkey(char* str, char* prefix, int prefixlen, int prefixpadding, int num, int rows, int len);
#if 0
// The main function is to sort arr[] of size n using Radix Sort
void radix_sort(uint64_t arr[], int n);
void bucket_data(uint64_t arr[], int n, uint64_t exp);
uint64_t get_max(uint64_t arr[], int n);
#endif
// The main function is to sort arr[] of size n using Quick Sort
void quick_sort(uint64_t arr[], int n);
int compare(const void* a, const void* b);
#endif /* UTILS_H */

View File

@ -0,0 +1,195 @@
/*
* utils.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef UTILS_HPP
#define UTILS_HPP
#pragma once
#include "macro.hpp"
#include "mako.hpp"
#include "fdbclient/zipf.h"
#include <cassert>
#include <chrono>
#include <cstdint>
#include <string_view>
#include <type_traits>
#include <fmt/format.h>
namespace mako {
/* uniform-distribution random */
/* return a uniform random number between low and high, both inclusive */
force_inline int urand(int low, int high) {
double r = rand() / (1.0 + RAND_MAX);
int range = high - low + 1;
return (int)((r * range) + low);
}
force_inline int nextKey(Arguments const& args) {
if (args.zipf)
return zipfian_next();
return urand(0, args.rows - 1);
}
force_inline int intSize(std::string_view sv) {
return static_cast<int>(sv.size());
}
/* random string */
template <typename Char>
force_inline void randomString(Char* str, int len) {
assert(len >= 0);
for (auto i = 0; i < len; i++) {
str[i] = ('!' + urand(0, 'z' - '!')); /* generate a char from '!' to 'z' */
}
}
/* given the total number of rows to be inserted,
* the worker process index p_idx and the thread index t_idx (both 0-based),
* and the total number of processes, total_p, and threads, total_t,
* returns the first row number assigned to this partition.
*/
force_inline int insertBegin(int rows, int p_idx, int t_idx, int total_p, int total_t) {
double interval = (double)rows / total_p / total_t;
return (int)(round(interval * ((p_idx * total_t) + t_idx)));
}
/* similar to insertBegin, insertEnd returns the last row numer */
force_inline int insertEnd(int rows, int p_idx, int t_idx, int total_p, int total_t) {
double interval = (double)rows / total_p / total_t;
return (int)(round(interval * ((p_idx * total_t) + t_idx + 1) - 1));
}
/* devide a value equally among threads */
int computeThreadPortion(int val, int p_idx, int t_idx, int total_p, int total_t);
/* similar to insertBegin/end, computeThreadTps computes
* the per-thread target TPS for given configuration.
*/
#define computeThreadTps(val, p_idx, t_idx, total_p, total_t) computeThreadPortion(val, p_idx, t_idx, total_p, total_t)
/* similar to computeThreadTps,
* computeThreadIters computs the number of iterations.
*/
#define computeThreadIters(val, p_idx, t_idx, total_p, total_t) \
computeThreadPortion(val, p_idx, t_idx, total_p, total_t)
/* get the number of digits */
int digits(int num);
/* fill memory slice [str, str + len) as stringified, zero-padded num */
template <typename Char>
force_inline void numericWithFill(Char* str, int len, int num) {
static_assert(sizeof(Char) == 1);
assert(num >= 0);
memset(str, '0', len);
for (auto i = len - 1; num > 0 && i >= 0; i--, num /= 10) {
str[i] = (num % 10) + '0';
}
}
/* generate a key for a given key number */
/* prefix is "mako" by default, prefixpadding = 1 means 'x' will be in front rather than trailing the keyname */
template <typename Char>
void genKey(Char* str, std::string_view prefix, Arguments const& args, int num) {
static_assert(sizeof(Char) == 1);
memset(str, 'x', args.key_length);
const auto prefix_len = static_cast<int>(prefix.size());
auto pos = args.prefixpadding ? (args.key_length - prefix_len - args.row_digits) : 0;
memcpy(&str[pos], prefix.data(), prefix_len);
pos += prefix_len;
numericWithFill(&str[pos], args.row_digits, num);
}
template <typename Char>
force_inline void prepareKeys(int op,
std::basic_string<Char>& key1,
std::basic_string<Char>& key2,
Arguments const& args) {
const auto key1_num = nextKey(args);
genKey(key1.data(), KEY_PREFIX, args, key1_num);
if (args.txnspec.ops[op][OP_RANGE] > 0) {
const auto key2_num = std::min(key1_num + args.txnspec.ops[op][OP_RANGE] - 1, args.rows - 1);
genKey(key2.data(), KEY_PREFIX, args, key2_num);
}
}
// invoke user-provided callable when object goes out of scope.
template <typename Func>
class ExitGuard {
std::decay_t<Func> fn;
public:
ExitGuard(Func&& fn) : fn(std::forward<Func>(fn)) {}
~ExitGuard() { fn(); }
};
// invoke user-provided callable when stack unwinds by exception.
template <typename Func>
class FailGuard {
std::decay_t<Func> fn;
public:
FailGuard(Func&& fn) : fn(std::forward<Func>(fn)) {}
~FailGuard() {
if (std::uncaught_exceptions()) {
fn();
}
}
};
// trace helpers
constexpr const int STATS_TITLE_WIDTH = 12;
constexpr const int STATS_FIELD_WIDTH = 12;
template <typename Value>
void putTitle(Value&& value) {
fmt::print("{0: <{1}} ", std::forward<Value>(value), STATS_TITLE_WIDTH);
}
template <typename Value>
void putTitleRight(Value&& value) {
fmt::print("{0: >{1}} ", std::forward<Value>(value), STATS_TITLE_WIDTH);
}
inline void putTitleBar() {
fmt::print("{0:=<{1}} ", "", STATS_TITLE_WIDTH);
}
template <typename Value>
void putField(Value&& value) {
fmt::print("{0: >{1}} ", std::forward<Value>(value), STATS_FIELD_WIDTH);
}
inline void putFieldBar() {
fmt::print("{0:=>{1}} ", "", STATS_FIELD_WIDTH);
}
template <typename Value>
void putFieldFloat(Value&& value, int precision) {
fmt::print("{0: >{1}.{2}f} ", std::forward<Value>(value), STATS_FIELD_WIDTH, precision);
}
} // namespace mako
#endif /* UTILS_HPP */

View File

@ -21,9 +21,25 @@ endif()
include(CheckSymbolExists)
set(DISABLE_TLS OFF CACHE BOOL "Don't try to find OpenSSL and always build without TLS support")
set(USE_WOLFSSL OFF CACHE BOOL "Build against WolfSSL instead of OpenSSL")
set(USE_OPENSSL ON CACHE BOOL "Build against OpenSSL")
if(DISABLE_TLS)
set(WITH_TLS OFF)
else()
if(USE_WOLFSSL)
set(WOLFSSL_USE_STATIC_LIBS TRUE)
find_package(WolfSSL)
if(WOLFSSL_FOUND)
set(CMAKE_REQUIRED_INCLUDES ${WOLFSSL_INCLUDE_DIR})
set(WITH_TLS ON)
add_compile_options(-DHAVE_OPENSSL)
add_compile_options(-DHAVE_WOLFSSL)
else()
message(STATUS "WolfSSL was not found - Will compile without TLS Support")
message(STATUS "You can set WOLFSSL_ROOT_DIR to help cmake find it")
set(WITH_TLS OFF)
endif()
elseif(USE_OPENSSL)
set(OPENSSL_USE_STATIC_LIBS TRUE)
if(WIN32)
set(OPENSSL_MSVC_STATIC_RT ON)
@ -39,6 +55,7 @@ else()
set(WITH_TLS OFF)
endif()
endif()
endif()
################################################################################
# Python Bindings

View File

@ -198,7 +198,7 @@ function(fdb_configure_and_install)
string(TOLOWER "${pkg}" package)
string(TOUPPER "${IN_DESTINATION}" destination)
get_install_dest(${pkg} INCLUDE INCLUDE_DIR)
get_install_dest(${pkg} INCLUDE LIB_DIR)
get_install_dest(${pkg} LIB LIB_DIR)
get_install_dest(${pkg} ${destination} install_path)
string(REGEX REPLACE "\.in$" "" name "${IN_FILE}")
get_filename_component(name "${name}" NAME)

63
cmake/FindWolfSSL.cmake Normal file
View File

@ -0,0 +1,63 @@
# FindWolfSSL
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
if(WOLFSSL_USE_STATIC_LIBS)
if(WIN32)
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
else()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a)
endif()
endif()
find_path(WOLFSSL_ROOT_DIR
NAMES
include/wolfssl/options.h
)
find_path(WOLFSSL_INCLUDE_DIR
NAMES
wolfssl/ssl.h
PATHS
${WOLFSSL_ROOT_DIR}/include
)
find_library(WOLFSSL_LIBRARY
NAMES
wolfssl
PATHS
${WOLFSSL_ROOT_DIR}/lib
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(WolfSSL
REQUIRED_VARS
WOLFSSL_LIBRARY
WOLFSSL_INCLUDE_DIR
FAIL_MESSAGE
"Could NOT find WolfSSL"
)
mark_as_advanced(
WOLFSSL_ROOT_DIR
WOLFSSL_LIBRARY
WOLFSSL_INCLUDE_DIR
)
if(WOLFSSL_FOUND)
message(STATUS "Found wolfssl library: ${WOLFSSL_LIBRARY}")
message(STATUS "Found wolfssl includes: ${WOLFSSL_INCLUDE_DIR}")
set(WOLFSSL_INCLUDE_DIRS ${WOLFSSL_INCLUDE_DIR})
set(WOLFSSL_LIBRARIES ${WOLFSSL_LIBRARY})
add_library(WolfSSL UNKNOWN IMPORTED GLOBAL)
add_library(OpenSSL::SSL ALIAS WolfSSL)
add_library(OpenSSL::CRYPTO ALIAS WolfSSL)
target_include_directories(WolfSSL INTERFACE "${WOLFSSL_INCLUDE_DIR}")
target_link_libraries(WolfSSL INTERFACE "${WOLFSSL_TLS_LIBRARY}" "${WOLFSSL_SSL_LIBRARY}" "${WOLFSSL_CRYPTO_LIBRARY}")
set_target_properties(WolfSSL PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${WOLFSSL_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${WOLFSSL_LIBRARY}")
endif()

View File

@ -48,29 +48,25 @@
---
# name: test_execstack_permissions_libfdb_c[centos-versioned]
'
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 RW 0x0
- /lib64/libfdb_c.so
'
---
# name: test_execstack_permissions_libfdb_c[centos]
'
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 RW 0x0
- /lib64/libfdb_c.so
'
---
# name: test_execstack_permissions_libfdb_c[ubuntu-versioned]
'
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 RW 0x0
- /lib/libfdb_c.so
'
---
# name: test_execstack_permissions_libfdb_c[ubuntu]
'
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 RW 0x0
- /lib/libfdb_c.so
'
---

View File

@ -22,6 +22,7 @@ import pathlib
import pytest
import shlex
import subprocess
import sys
import uuid
from typing import Iterator, List, Optional, Union
@ -29,9 +30,14 @@ from typing import Iterator, List, Optional, Union
def run(args: List[str]) -> str:
print("$ {}".format(" ".join(map(shlex.quote, args))))
result = subprocess.check_output(args).decode("utf-8")
print(result, end="")
return result
result = []
proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
while proc.poll() is None:
text = proc.stdout.readline().decode("utf-8")
result.append(text)
sys.stdout.write(text)
assert proc.returncode == 0
return "".join(result)
class Image:
@ -106,7 +112,16 @@ def ubuntu_image_with_fdb_helper(versioned: bool) -> Iterator[Optional[Image]]:
for deb in debs:
container.copy_to(deb, "/opt")
container.run(["bash", "-c", "apt-get update"])
container.run(["bash", "-c", "apt-get install --yes binutils"]) # this is for testing libfdb_c execstack permissions
container.run(
["bash", "-c", "apt-get install --yes execstack"]
) # this is for testing libfdb_c execstack permissions
container.run(
[
"bash",
"-c",
"DEBIAN_FRONTEND=noninteractive DEBCONF_NONINTERACTIVE_SEEN=true apt-get install --yes gcc pkg-config cmake",
]
) # this is for testing building client apps
container.run(["bash", "-c", "dpkg -i /opt/*.deb"])
container.run(["bash", "-c", "rm /opt/*.deb"])
image = container.commit()
@ -151,7 +166,12 @@ def centos_image_with_fdb_helper(versioned: bool) -> Iterator[Optional[Image]]:
for rpm in rpms:
container.copy_to(rpm, "/opt")
container.run(["bash", "-c", "yum update -y"])
container.run(["bash", "-c", "yum install -y binutils"]) # this is for testing libfdb_c execstack permissions
container.run(
["bash", "-c", "yum install -y prelink"]
) # this is for testing libfdb_c execstack permissions
container.run(
["bash", "-c", "yum install -y gcc pkg-config cmake make"]
) # this is for testing building client apps
container.run(["bash", "-c", "yum install -y /opt/*.rpm"])
container.run(["bash", "-c", "rm /opt/*.rpm"])
image = container.commit()
@ -232,6 +252,70 @@ def test_db_available(linux_container: Container):
linux_container.run(["fdbcli", "--exec", "get x"])
def test_client_app(linux_container: Container):
test_client_app_script = r"""#!/bin/bash
set -euxo pipefail
cat > app.c << EOF
// FDB_API_VERSION doesn't necessarily need to be kept up to date here
#define FDB_API_VERSION 700
#include <foundationdb/fdb_c.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
static void check(fdb_error_t e) {
if (e) {
fprintf(stderr, "%s\n", fdb_get_error(e));
fflush(NULL);
abort();
}
}
int result = 0;
static void callback(FDBFuture* f, void* _ignored) {
check(fdb_stop_network());
}
int main() {
check(fdb_select_api_version(700));
check(fdb_setup_network());
FDBDatabase* db;
check(fdb_create_database(NULL, &db));
FDBTransaction* tr;
check(fdb_database_create_transaction(db, &tr));
FDBFuture* f = fdb_transaction_get_read_version(tr);
check(fdb_future_set_callback(f, callback, NULL));
check(fdb_run_network());
fdb_future_destroy(f);
fdb_transaction_destroy(tr);
fdb_database_destroy(db);
return 0;
}
EOF
cc app.c `pkg-config foundationdb-client --cflags --libs`
./a.out
cat > CMakeLists.txt << EOF
project(app C)
find_package(FoundationDB-Client REQUIRED)
add_executable(app app.c)
target_link_libraries(app PRIVATE fdb_c)
EOF
mkdir build
cd build
cmake ..
make
./app
"""
linux_container.run(["bash", "-c", test_client_app_script])
def test_write(linux_container: Container, snapshot):
linux_container.run(["fdbcli", "--exec", "writemode on; set x y"])
assert snapshot == linux_container.run(["fdbcli", "--exec", "get x"])
@ -243,7 +327,7 @@ def test_execstack_permissions_libfdb_c(linux_container: Container, snapshot):
[
"bash",
"-c",
"readelf -l $(ldconfig -p | grep libfdb_c | awk '{print $(NF)}') | grep -A1 GNU_STACK",
"execstack -q $(ldconfig -p | grep libfdb_c | awk '{print $(NF)}')",
]
)

View File

@ -148,7 +148,7 @@ is equivalent to something like:
tr.set(Tuple.from("class", "class1").pack(), encodeInt(100));
t.commit().join();
} catch (RuntimeException e) {
t = t.onError(e).get();
t = t.onError(e).join();
}
}
@ -290,10 +290,10 @@ This is easy -- we simply add a condition to check that the value is non-zero. L
private static void signup(TransactionContext db, final String s, final String c) {
db.run((Transaction tr) -> {
byte[] rec = Tuple.from("attends", s, c).pack();
if (tr.get(rec).get() != null)
if (tr.get(rec).join() != null)
return null; // already signed up
int seatsLeft = decodeInt(tr.get(Tuple.from("class", c).pack()).get());
int seatsLeft = decodeInt(tr.get(Tuple.from("class", c).pack()).join());
if (seatsLeft == 0)
throw new IllegalStateException("No remaining seats");

View File

@ -1189,7 +1189,6 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
ClusterConnectionFile::lookupClusterFileName(opt.clusterFile);
try {
ccf = makeReference<ClusterConnectionFile>(resolvedClusterFile.first);
wait(ccf->resolveHostnames());
} catch (Error& e) {
if (e.code() == error_code_operation_cancelled) {
throw;

View File

@ -28,16 +28,30 @@
#include "fdbclient/CoordinationInterface.h"
// Determine public IP address by calling the first coordinator.
// Determine public IP address by calling the first available coordinator.
// If fail connecting all coordinators, throw bind_failed().
IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) {
int size = ccs.coordinators().size() + ccs.hostnames.size();
int index = 0;
loop {
try {
using namespace boost::asio;
io_service ioService;
ip::udp::socket socket(ioService);
ccs.resolveHostnamesBlocking();
const auto& coordAddr = ccs.coordinators()[0];
NetworkAddress coordAddr;
// Try coords first, because they don't need to be resolved.
if (index < ccs.coordinators().size()) {
coordAddr = ccs.coordinators()[index];
} else {
Hostname& h = ccs.hostnames[index - ccs.coordinators().size()];
Optional<NetworkAddress> resolvedAddr = h.resolveBlocking();
if (!resolvedAddr.present()) {
throw lookup_failed();
}
coordAddr = resolvedAddr.get();
}
const auto boostIp = coordAddr.ip.isV6() ? ip::address(ip::address_v6(coordAddr.ip.toV6()))
: ip::address(ip::address_v4(coordAddr.ip.toV4()));
@ -48,8 +62,12 @@ IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) {
socket.close();
return ip;
} catch (boost::system::system_error e) {
fprintf(stderr, "Error determining public address: %s\n", e.what());
} catch (...) {
++index;
if (index == size) {
fprintf(stderr, "Error determining public address.\n");
throw bind_failed();
}
}
}
}

View File

@ -65,7 +65,6 @@ set(FDBCLIENT_SRCS
GlobalConfig.actor.cpp
GrvProxyInterface.h
HighContentionPrefixAllocator.actor.h
HTTP.actor.cpp
IClientApi.h
IConfigTransaction.cpp
IConfigTransaction.h

View File

@ -21,6 +21,7 @@
#include "fdbclient/Knobs.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/SystemData.h"
#include "fdbclient/Tenant.h"
#include "flow/UnitTest.h"
#define init(...) KNOB_FN(__VA_ARGS__, INIT_ATOMIC_KNOB, INIT_KNOB)(__VA_ARGS__)
@ -82,6 +83,7 @@ void ClientKnobs::initialize(Randomize randomize) {
init( CHANGE_FEED_CACHE_SIZE, 100000 ); if( randomize && BUGGIFY ) CHANGE_FEED_CACHE_SIZE = 1;
init( CHANGE_FEED_POP_TIMEOUT, 5.0 );
init( CHANGE_FEED_STREAM_MIN_BYTES, 1e4 ); if( randomize && BUGGIFY ) CHANGE_FEED_STREAM_MIN_BYTES = 1;
init( TENANT_PREFIX_SIZE_LIMIT, 28 ); ASSERT(TENANT_PREFIX_SIZE_LIMIT >= TenantMapEntry::ROOT_PREFIX_SIZE); // includes 8-byte ID and optional tenant subspace
init( MAX_BATCH_SIZE, 1000 ); if( randomize && BUGGIFY ) MAX_BATCH_SIZE = 1;
init( GRV_BATCH_TIMEOUT, 0.005 ); if( randomize && BUGGIFY ) GRV_BATCH_TIMEOUT = 0.1;

View File

@ -81,6 +81,7 @@ public:
int64_t CHANGE_FEED_CACHE_SIZE;
double CHANGE_FEED_POP_TIMEOUT;
int64_t CHANGE_FEED_STREAM_MIN_BYTES;
int64_t TENANT_PREFIX_SIZE_LIMIT;
int MAX_BATCH_SIZE;
double GRV_BATCH_TIMEOUT;

View File

@ -61,61 +61,31 @@ struct ClientLeaderRegInterface {
// - There is no address present more than once
class ClusterConnectionString {
public:
enum ConnectionStringStatus { RESOLVED, RESOLVING, UNRESOLVED };
ClusterConnectionString() {}
ClusterConnectionString(const std::string& connStr);
ClusterConnectionString(const std::string& connectionString);
ClusterConnectionString(const std::vector<NetworkAddress>& coordinators, Key key);
ClusterConnectionString(const std::vector<Hostname>& hosts, Key key);
ClusterConnectionString(const ClusterConnectionString& rhs) { operator=(rhs); }
ClusterConnectionString& operator=(const ClusterConnectionString& rhs) {
// Copy everything except AsyncTrigger resolveFinish.
status = rhs.status;
coords = rhs.coords;
hostnames = rhs.hostnames;
networkAddressToHostname = rhs.networkAddressToHostname;
key = rhs.key;
keyDesc = rhs.keyDesc;
connectionString = rhs.connectionString;
return *this;
}
std::vector<NetworkAddress> const& coordinators() const { return coords; }
void addResolved(const Hostname& hostname, const NetworkAddress& address) {
coords.push_back(address);
networkAddressToHostname.emplace(address, hostname);
}
Key clusterKey() const { return key; }
Key clusterKeyName() const {
return keyDesc;
} // Returns the "name" or "description" part of the clusterKey (the part before the ':')
std::string toString() const;
static std::string getErrorString(std::string const& source, Error const& e);
Future<Void> resolveHostnames();
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
// should be preferred.
void resolveHostnamesBlocking();
// This function derives the member connectionString from the current key, coordinators and hostnames.
void resetConnectionString();
void resetToUnresolved();
void parseKey(const std::string& key);
ConnectionStringStatus status = RESOLVED;
AsyncTrigger resolveFinish;
// This function tries to resolve all hostnames once, and return them with coords.
// Best effort, does not guarantee that the resolves succeed.
Future<std::vector<NetworkAddress>> tryResolveHostnames();
std::vector<NetworkAddress> coords;
std::vector<Hostname> hostnames;
std::unordered_map<NetworkAddress, Hostname> networkAddressToHostname;
private:
void parseConnString();
Key key, keyDesc;
std::string connectionString;
};
FDB_DECLARE_BOOLEAN_PARAM(ConnectionStringNeedsPersisted);
@ -165,12 +135,6 @@ public:
// Signals to the connection record that it was successfully used to connect to a cluster.
void notifyConnected();
ClusterConnectionString::ConnectionStringStatus connectionStringStatus() const;
Future<Void> resolveHostnames();
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
// should be preferred.
void resolveHostnamesBlocking();
virtual void addref() = 0;
virtual void delref() = 0;
@ -275,12 +239,21 @@ struct OpenDatabaseCoordRequest {
Standalone<VectorRef<ClientVersionRef>> supportedVersions;
UID knownClientInfoID;
Key clusterKey;
std::vector<Hostname> hostnames;
std::vector<NetworkAddress> coordinators;
ReplyPromise<CachedSerialization<struct ClientDBInfo>> reply;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, issues, supportedVersions, traceLogGroup, knownClientInfoID, clusterKey, coordinators, reply);
serializer(ar,
issues,
supportedVersions,
traceLogGroup,
knownClientInfoID,
clusterKey,
hostnames,
coordinators,
reply);
}
};

View File

@ -20,6 +20,7 @@
#include "fdbclient/FDBTypes.h"
#include "fdbclient/Knobs.h"
#include "fdbclient/NativeAPI.actor.h"
KeyRef keyBetween(const KeyRangeRef& keys) {
int pos = 0; // will be the position of the first difference between keys.begin and keys.end
@ -40,17 +41,15 @@ KeyRef keyBetween(const KeyRangeRef& keys) {
}
void KeySelectorRef::setKey(KeyRef const& key) {
// There are no keys in the database with size greater than KEY_SIZE_LIMIT, so if this key selector has a key
// There are no keys in the database with size greater than the max key size, so if this key selector has a key
// which is large, then we can translate it to an equivalent key selector with a smaller key
if (key.size() >
(key.startsWith(LiteralStringRef("\xff")) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
this->key = key.substr(0,
(key.startsWith(LiteralStringRef("\xff")) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
else
int64_t maxKeySize = getMaxKeySize(key);
if (key.size() > maxKeySize) {
this->key = key.substr(0, maxKeySize + 1);
} else {
this->key = key;
}
}
void KeySelectorRef::setKeyUnlimited(KeyRef const& key) {
this->key = key;

View File

@ -746,6 +746,17 @@ Future<Optional<TenantMapEntry>> createTenantTransaction(Transaction tr, TenantN
state Optional<Value> lastIdVal = wait(safeThreadFutureToFuture(lastIdFuture));
Optional<Value> tenantDataPrefix = wait(safeThreadFutureToFuture(tenantDataPrefixFuture));
if (tenantDataPrefix.present() &&
tenantDataPrefix.get().size() + TenantMapEntry::ROOT_PREFIX_SIZE > CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT) {
TraceEvent(SevWarnAlways, "TenantPrefixTooLarge")
.detail("TenantSubspace", tenantDataPrefix.get())
.detail("TenantSubspaceLength", tenantDataPrefix.get().size())
.detail("RootPrefixLength", TenantMapEntry::ROOT_PREFIX_SIZE)
.detail("MaxTenantPrefixSize", CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT);
throw client_invalid_operation();
}
state TenantMapEntry newTenant(lastIdVal.present() ? TenantMapEntry::prefixToId(lastIdVal.get()) + 1 : 0,
tenantDataPrefix.present() ? (KeyRef)tenantDataPrefix.get() : ""_sr);

View File

@ -782,7 +782,7 @@ ACTOR Future<std::vector<ProcessData>> getWorkers(Database cx) {
}
}
ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
ACTOR Future<Optional<ClusterConnectionString>> getConnectionString(Database cx) {
state Transaction tr(cx);
loop {
try {
@ -790,9 +790,8 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
Optional<Value> currentKey = wait(tr.get(coordinatorsKey));
if (!currentKey.present())
return std::vector<NetworkAddress>();
return ClusterConnectionString(currentKey.get().toString()).coordinators();
return Optional<ClusterConnectionString>();
return ClusterConnectionString(currentKey.get().toString());
} catch (Error& e) {
wait(tr.onError(e));
}
@ -801,7 +800,7 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
Reference<IQuorumChange> change,
ClusterConnectionString* conn) {
std::vector<NetworkAddress> desiredCoordinators) {
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
@ -812,47 +811,45 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone deleted this key entirely?
state ClusterConnectionString old(currentKey.get().toString());
wait(old.resolveHostnames());
if (tr->getDatabase()->getConnectionRecord() &&
old.clusterKeyName().toString() !=
tr->getDatabase()->getConnectionRecord()->getConnectionString().clusterKeyName())
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
if (!conn->coords.size()) {
std::vector<NetworkAddress> desiredCoordinatorAddresses = wait(change->getDesiredCoordinators(
if (!desiredCoordinators.size()) {
std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
tr,
old.coordinators(),
oldCoordinators,
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result));
conn->coords = desiredCoordinatorAddresses;
desiredCoordinators = _desiredCoordinators;
}
if (result != CoordinatorsResult::SUCCESS)
return result;
if (!conn->coordinators().size())
if (!desiredCoordinators.size())
return CoordinatorsResult::INVALID_NETWORK_ADDRESSES;
std::sort(conn->coords.begin(), conn->coords.end());
std::sort(conn->hostnames.begin(), conn->hostnames.end());
std::sort(desiredCoordinators.begin(), desiredCoordinators.end());
std::string newName = change->getDesiredClusterKeyName();
if (newName.empty())
newName = old.clusterKeyName().toString();
if (old.coordinators() == conn->coordinators() && old.clusterKeyName() == newName)
if (oldCoordinators == desiredCoordinators && old.clusterKeyName() == newName)
return CoordinatorsResult::SAME_NETWORK_ADDRESSES;
std::string key(newName + ':' + deterministicRandom()->randomAlphaNumeric(32));
conn->parseKey(key);
conn->resetConnectionString();
state ClusterConnectionString conn(desiredCoordinators,
StringRef(newName + ':' + deterministicRandom()->randomAlphaNumeric(32)));
if (g_network->isSimulated()) {
int i = 0;
int protectedCount = 0;
while ((protectedCount < ((conn->coordinators().size() / 2) + 1)) && (i < conn->coordinators().size())) {
auto process = g_simulator.getProcessByAddress(conn->coordinators()[i]);
while ((protectedCount < ((desiredCoordinators.size() / 2) + 1)) && (i < desiredCoordinators.size())) {
auto process = g_simulator.getProcessByAddress(desiredCoordinators[i]);
auto addresses = process->addresses;
if (!process->isReliable()) {
@ -864,14 +861,14 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
if (addresses.secondaryAddress.present()) {
g_simulator.protectedAddresses.insert(process->addresses.secondaryAddress.get());
}
TraceEvent("ProtectCoordinator").detail("Address", conn->coordinators()[i]).backtrace();
TraceEvent("ProtectCoordinator").detail("Address", desiredCoordinators[i]).backtrace();
protectedCount++;
i++;
}
}
std::vector<Future<Optional<LeaderInfo>>> leaderServers;
ClientCoordinators coord(Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(*conn)));
ClientCoordinators coord(Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(conn)));
leaderServers.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
@ -883,7 +880,7 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
when(wait(waitForAll(leaderServers))) {}
when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; }
}
tr->set(coordinatorsKey, conn->toString());
tr->set(coordinatorsKey, conn.toString());
return Optional<CoordinatorsResult>();
}
@ -909,11 +906,12 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
old.clusterKeyName().toString() != cx->getConnectionRecord()->getConnectionString().clusterKeyName())
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
if (!desiredCoordinators.size()) {
std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
&tr,
old.coordinators(),
oldCoordinators,
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result));
desiredCoordinators = _desiredCoordinators;
@ -937,7 +935,7 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
if (newName.empty())
newName = old.clusterKeyName().toString();
if (old.coordinators() == desiredCoordinators && old.clusterKeyName() == newName)
if (oldCoordinators == desiredCoordinators && old.clusterKeyName() == newName)
return retries ? CoordinatorsResult::SUCCESS : CoordinatorsResult::SAME_NETWORK_ADDRESSES;
state ClusterConnectionString conn(
@ -1075,10 +1073,17 @@ struct AutoQuorumChange final : IQuorumChange {
std::vector<Future<Optional<LeaderInfo>>> leaderServers;
leaderServers.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
if (coord.clientLeaderServers[i].hostname.present()) {
leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coord.clusterKey, UID()),
coord.clientLeaderServers[i].hostname.get(),
WLTOKEN_CLIENTLEADERREG_GETLEADER,
TaskPriority::CoordinationReply));
} else {
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
GetLeaderRequest(coord.clusterKey, UID()),
TaskPriority::CoordinationReply));
}
}
Optional<std::vector<Optional<LeaderInfo>>> results =
wait(timeout(getAll(leaderServers), CLIENT_KNOBS->IS_ACCEPTABLE_DELAY));
if (!results.present()) {

View File

@ -56,7 +56,7 @@ struct IQuorumChange : ReferenceCounted<IQuorumChange> {
// Change to use the given set of coordination servers
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
Reference<IQuorumChange> change,
ClusterConnectionString* conn);
std::vector<NetworkAddress> desiredCoordinators);
ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change);
Reference<IQuorumChange> autoQuorumChange(int desired = -1);
Reference<IQuorumChange> noQuorumChange();
@ -146,7 +146,7 @@ ACTOR Future<bool> setHealthyZone(Database cx, StringRef zoneId, double seconds,
ACTOR Future<Void> waitForPrimaryDC(Database cx, StringRef dcId);
// Gets the cluster connection string
ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx);
ACTOR Future<Optional<ClusterConnectionString>> getConnectionString(Database cx);
void schemaCoverage(std::string const& spath, bool covered = true);
bool schemaMatch(json_spirit::mValue const& schema,

View File

@ -77,18 +77,6 @@ void IClusterConnectionRecord::setPersisted() {
connectionStringNeedsPersisted = false;
}
ClusterConnectionString::ConnectionStringStatus IClusterConnectionRecord::connectionStringStatus() const {
return cs.status;
}
Future<Void> IClusterConnectionRecord::resolveHostnames() {
return cs.resolveHostnames();
}
void IClusterConnectionRecord::resolveHostnamesBlocking() {
cs.resolveHostnamesBlocking();
}
std::string ClusterConnectionString::getErrorString(std::string const& source, Error const& e) {
if (e.code() == error_code_connection_string_invalid) {
return format("Invalid connection string `%s: %d %s", source.c_str(), e.code(), e.what());
@ -97,101 +85,19 @@ std::string ClusterConnectionString::getErrorString(std::string const& source, E
}
}
ACTOR Future<Void> resolveHostnamesImpl(ClusterConnectionString* self) {
loop {
if (self->status == ClusterConnectionString::UNRESOLVED) {
self->status = ClusterConnectionString::RESOLVING;
std::vector<Future<Void>> fs;
for (auto const& hostname : self->hostnames) {
fs.push_back(map(INetworkConnections::net()->resolveTCPEndpoint(hostname.host, hostname.service),
[=](std::vector<NetworkAddress> const& addresses) -> Void {
NetworkAddress address =
addresses[deterministicRandom()->randomInt(0, addresses.size())];
address.flags = 0; // Reset the parsed address to public
address.fromHostname = NetworkAddressFromHostname::True;
if (hostname.isTLS) {
address.flags |= NetworkAddress::FLAG_TLS;
}
self->addResolved(hostname, address);
return Void();
}));
}
wait(waitForAll(fs));
std::sort(self->coords.begin(), self->coords.end());
if (std::unique(self->coords.begin(), self->coords.end()) != self->coords.end()) {
self->status = ClusterConnectionString::UNRESOLVED;
self->resolveFinish.trigger();
throw connection_string_invalid();
}
self->status = ClusterConnectionString::RESOLVED;
self->resolveFinish.trigger();
break;
} else if (self->status == ClusterConnectionString::RESOLVING) {
wait(self->resolveFinish.onTrigger());
if (self->status == ClusterConnectionString::RESOLVED) {
break;
}
// Otherwise, this means other threads failed on resolve, so here we go back to the loop and try to resolve
// again.
} else {
// status is RESOLVED, nothing to do.
break;
}
}
return Void();
}
Future<Void> ClusterConnectionString::resolveHostnames() {
return resolveHostnamesImpl(this);
}
void ClusterConnectionString::resolveHostnamesBlocking() {
if (status != RESOLVED) {
status = RESOLVING;
for (auto const& hostname : hostnames) {
std::vector<NetworkAddress> addresses =
INetworkConnections::net()->resolveTCPEndpointBlocking(hostname.host, hostname.service);
NetworkAddress address = addresses[deterministicRandom()->randomInt(0, addresses.size())];
address.flags = 0; // Reset the parsed address to public
address.fromHostname = NetworkAddressFromHostname::True;
if (hostname.isTLS) {
address.flags |= NetworkAddress::FLAG_TLS;
}
addResolved(hostname, address);
}
std::sort(coords.begin(), coords.end());
if (std::unique(coords.begin(), coords.end()) != coords.end()) {
status = UNRESOLVED;
throw connection_string_invalid();
}
status = RESOLVED;
}
}
void ClusterConnectionString::resetToUnresolved() {
if (status == RESOLVED && hostnames.size() > 0) {
coords.clear();
hostnames.clear();
networkAddressToHostname.clear();
status = UNRESOLVED;
parseConnString();
}
}
void ClusterConnectionString::resetConnectionString() {
connectionString = toString();
}
void ClusterConnectionString::parseConnString() {
ClusterConnectionString::ClusterConnectionString(const std::string& connectionString) {
auto trimmed = trim(connectionString);
// Split on '@' into key@addrs
int pAt = connectionString.find_first_of('@');
if (pAt == connectionString.npos) {
int pAt = trimmed.find_first_of('@');
if (pAt == trimmed.npos) {
throw connection_string_invalid();
}
std::string key = connectionString.substr(0, pAt);
std::string addrs = connectionString.substr(pAt + 1);
std::string key = trimmed.substr(0, pAt);
std::string addrs = trimmed.substr(pAt + 1);
parseKey(key);
std::set<Hostname> hostnameSet;
std::set<NetworkAddress> addressSet;
std::string curAddr;
for (int p = 0; p <= addrs.size();) {
int pComma = addrs.find_first_of(',', p);
@ -199,31 +105,29 @@ void ClusterConnectionString::parseConnString() {
pComma = addrs.size();
curAddr = addrs.substr(p, pComma - p);
if (Hostname::isHostname(curAddr)) {
Hostname h = Hostname::parse(curAddr);
// Check that there are no duplicate hostnames
if (hostnameSet.find(h) != hostnameSet.end()) {
throw connection_string_invalid();
}
hostnames.push_back(Hostname::parse(curAddr));
hostnameSet.insert(h);
} else {
coords.push_back(NetworkAddress::parse(curAddr));
NetworkAddress n = NetworkAddress::parse(curAddr);
// Check that there are no duplicate addresses
if (addressSet.find(n) != addressSet.end()) {
throw connection_string_invalid();
}
coords.push_back(n);
addressSet.insert(n);
}
p = pComma + 1;
}
if (hostnames.size() > 0) {
status = UNRESOLVED;
}
ASSERT((coords.size() + hostnames.size()) > 0);
std::sort(coords.begin(), coords.end());
// Check that there are no duplicate addresses
if (std::unique(coords.begin(), coords.end()) != coords.end()) {
throw connection_string_invalid();
}
}
ClusterConnectionString::ClusterConnectionString(const std::string& connStr) {
connectionString = trim(connStr);
parseConnString();
}
TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
std::string input;
state std::string input;
{
input = "asdf:2345@1.1.1.1:345";
@ -231,6 +135,15 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
ASSERT(input == cs.toString());
}
{
input = "asdf:2345@1.1.1.1:345,1.1.1.1:345";
try {
ClusterConnectionString cs(input);
} catch (Error& e) {
ASSERT(e.code() == error_code_connection_string_invalid);
}
}
{
input = "0xxdeadbeef:100100100@1.1.1.1:34534,5.1.5.3:23443";
ClusterConnectionString cs(input);
@ -274,20 +187,27 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
}
TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
std::string input;
state std::string input;
{
input = "asdf:2345@localhost:1234";
ClusterConnectionString cs(input);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 1);
ASSERT(input == cs.toString());
}
{
input = "asdf:2345@localhost:1234,localhost:1234";
try {
ClusterConnectionString cs(input);
} catch (Error& e) {
ASSERT(e.code() == error_code_connection_string_invalid);
}
}
{
input = "0xxdeadbeef:100100100@localhost:34534,host-name:23443";
ClusterConnectionString cs(input);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(input == cs.toString());
}
@ -300,7 +220,6 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
commented += "# asdfasdf ##";
ClusterConnectionString cs(commented);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(input == cs.toString());
}
@ -313,7 +232,6 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
commented += "# asdfasdf ##";
ClusterConnectionString cs(commented);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(input == cs.toString());
}
@ -321,45 +239,31 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
return Void();
}
TEST_CASE("/fdbclient/MonitorLeader/ConnectionString") {
state std::string connectionString = "TestCluster:0@localhost:1234,host-name:5678";
std::string hn1 = "localhost", port1 = "1234";
state std::string hn2 = "host-name";
state std::string port2 = "5678";
state std::vector<Hostname> hostnames;
TEST_CASE("/fdbclient/MonitorLeader/ConnectionString/hostname") {
std::string connectionString = "TestCluster:0@localhost:1234,host-name:5678";
std::string hn1 = "localhost", port1 = "1234", hn2 = "host-name", port2 = "5678";
std::vector<Hostname> hostnames;
{
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
hostnames.push_back(Hostname::parse(hn2 + ":" + port2));
NetworkAddress address1 = NetworkAddress::parse("127.0.0.0:1234");
NetworkAddress address2 = NetworkAddress::parse("127.0.0.1:5678");
INetworkConnections::net()->addMockTCPEndpoint(hn1, port1, { address1 });
INetworkConnections::net()->addMockTCPEndpoint(hn2, port2, { address2 });
state ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(cs.coordinators().size() == 0);
wait(cs.resolveHostnames());
ASSERT(cs.status == ClusterConnectionString::RESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(cs.coordinators().size() == 2);
ASSERT(cs.toString() == connectionString);
cs.resetToUnresolved();
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
ASSERT(cs.hostnames.size() == 2);
ASSERT(cs.coordinators().size() == 0);
ASSERT(cs.toString() == connectionString);
}
INetworkConnections::net()->removeMockTCPEndpoint(hn2, port2);
NetworkAddress address3 = NetworkAddress::parse("127.0.0.0:5678");
INetworkConnections::net()->addMockTCPEndpoint(hn2, port2, { address3 });
{
hostnames.clear();
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
try {
wait(cs.resolveHostnames());
ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
} catch (Error& e) {
ASSERT(e.code() == error_code_connection_string_invalid);
}
}
return Void();
}
@ -380,6 +284,7 @@ ACTOR Future<std::vector<NetworkAddress>> tryResolveHostnamesImpl(ClusterConnect
allCoordinatorsSet.insert(coord);
}
std::vector<NetworkAddress> allCoordinators(allCoordinatorsSet.begin(), allCoordinatorsSet.end());
std::sort(allCoordinators.begin(), allCoordinators.end());
return allCoordinators;
}
@ -484,17 +389,22 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/fuzz") {
}
ClusterConnectionString::ClusterConnectionString(const std::vector<NetworkAddress>& servers, Key key)
: status(RESOLVED), coords(servers) {
: coords(servers) {
std::set<NetworkAddress> s(servers.begin(), servers.end());
if (s.size() != servers.size()) {
throw connection_string_invalid();
}
std::string keyString = key.toString();
parseKey(keyString);
resetConnectionString();
}
ClusterConnectionString::ClusterConnectionString(const std::vector<Hostname>& hosts, Key key)
: status(UNRESOLVED), hostnames(hosts) {
ClusterConnectionString::ClusterConnectionString(const std::vector<Hostname>& hosts, Key key) : hostnames(hosts) {
std::set<Hostname> h(hosts.begin(), hosts.end());
if (h.size() != hosts.size()) {
throw connection_string_invalid();
}
std::string keyString = key.toString();
parseKey(keyString);
resetConnectionString();
}
void ClusterConnectionString::parseKey(const std::string& key) {
@ -529,13 +439,11 @@ void ClusterConnectionString::parseKey(const std::string& key) {
std::string ClusterConnectionString::toString() const {
std::string s = key.toString();
s += '@';
for (int i = 0; i < coords.size(); i++) {
if (networkAddressToHostname.find(coords[i]) == networkAddressToHostname.end()) {
for (auto const& coord : coords) {
if (s.find('@') != s.length() - 1) {
s += ',';
}
s += coords[i].toString();
}
s += coord.toString();
}
for (auto const& host : hostnames) {
if (s.find('@') != s.length() - 1) {
@ -547,11 +455,14 @@ std::string ClusterConnectionString::toString() const {
}
ClientCoordinators::ClientCoordinators(Reference<IClusterConnectionRecord> ccr) : ccr(ccr) {
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
ClusterConnectionString cs = ccr->getConnectionString();
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s)
clientLeaderServers.push_back(ClientLeaderRegInterface(*s));
clusterKey = cs.clusterKey();
for (auto h : cs.hostnames) {
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
}
for (auto s : cs.coordinators()) {
clientLeaderServers.push_back(ClientLeaderRegInterface(s));
}
}
ClientCoordinators::ClientCoordinators(Key clusterKey, std::vector<NetworkAddress> coordinators)
@ -576,49 +487,32 @@ ClientLeaderRegInterface::ClientLeaderRegInterface(INetwork* local) {
// Nominee is the worker among all workers that are considered as leader by one coordinator
// This function contacts a coordinator coord to ask who is its nominee.
// Note: for coordinators whose NetworkAddress is parsed out of a hostname, a connection failure will cause this actor
// to throw `coordinators_changed()` error
ACTOR Future<Void> monitorNominee(Key key,
ClientLeaderRegInterface coord,
AsyncTrigger* nomineeChange,
Optional<LeaderInfo>* info,
Optional<Hostname> hostname = Optional<Hostname>()) {
Optional<LeaderInfo>* info) {
loop {
state Optional<LeaderInfo> li;
if (coord.getLeader.getEndpoint().getPrimaryAddress().fromHostname) {
state ErrorOr<Optional<LeaderInfo>> rep =
wait(coord.getLeader.tryGetReply(GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
TaskPriority::CoordinationReply));
if (rep.isError()) {
// Connecting to nominee failed, most likely due to connection failed.
TraceEvent("MonitorNomineeError")
.error(rep.getError())
.detail("Hostname", hostname.present() ? hostname.get().toString() : "UnknownHostname")
.detail("OldAddr", coord.getLeader.getEndpoint().getPrimaryAddress().toString());
if (rep.getError().code() == error_code_request_maybe_delivered) {
// Delay to prevent tight resolving loop due to outdated DNS cache
wait(delay(FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL));
throw coordinators_changed();
if (coord.hostname.present()) {
wait(store(li,
retryGetReplyFromHostname(GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
coord.hostname.get(),
WLTOKEN_CLIENTLEADERREG_GETLEADER,
TaskPriority::CoordinationReply)));
} else {
throw rep.getError();
}
} else if (rep.present()) {
li = rep.get();
}
} else {
Optional<LeaderInfo> tmp =
wait(retryBrokenPromise(coord.getLeader,
wait(store(li,
retryBrokenPromise(coord.getLeader,
GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
TaskPriority::CoordinationReply));
li = tmp;
TaskPriority::CoordinationReply)));
}
wait(Future<Void>(Void())); // Make sure we weren't cancelled
TraceEvent("GetLeaderReply")
.suppressFor(1.0)
.detail("Coordinator", coord.getLeader.getEndpoint().getPrimaryAddress())
.detail("Coordinator",
coord.hostname.present() ? coord.hostname.get().toString()
: coord.getLeader.getEndpoint().getPrimaryAddress().toString())
.detail("Nominee", li.present() ? li.get().changeID : UID())
.detail("ClusterKey", key.printable());
@ -687,9 +581,6 @@ Optional<std::pair<LeaderInfo, bool>> getLeader(const std::vector<Optional<Leade
ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> outSerializedLeaderInfo,
MonitorLeaderInfo info) {
loop {
wait(connRecord->resolveHostnames());
wait(info.intermediateConnRecord->resolveHostnames());
state ClientCoordinators coordinators(info.intermediateConnRecord);
state AsyncTrigger nomineeChange;
state std::vector<Optional<LeaderInfo>> nominees;
@ -701,14 +592,8 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterCon
// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator.
actors.reserve(coordinators.clientLeaderServers.size());
for (int i = 0; i < coordinators.clientLeaderServers.size(); i++) {
Optional<Hostname> hostname;
auto r = connRecord->getConnectionString().networkAddressToHostname.find(
coordinators.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress());
if (r != connRecord->getConnectionString().networkAddressToHostname.end()) {
hostname = r->second;
}
actors.push_back(monitorNominee(
coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i], hostname));
actors.push_back(
monitorNominee(coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i]));
}
allActors = waitForAll(actors);
@ -743,18 +628,7 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterCon
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
}
try {
wait(nomineeChange.onTrigger() || allActors);
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
TraceEvent("MonitorLeaderCoordinatorsChanged").suppressFor(1.0);
connRecord->getConnectionString().resetToUnresolved();
break;
} else {
throw e;
}
}
}
}
}
@ -885,10 +759,10 @@ ACTOR Future<Void> getClientInfoFromLeader(Reference<AsyncVar<Optional<ClusterCo
}
ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
std::vector<Hostname> hostnames,
std::vector<NetworkAddress> coordinators,
ClientData* clientData,
Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo,
Reference<AsyncVar<Void>> coordinatorsChanged) {
Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo) {
state std::vector<ClientLeaderRegInterface> clientLeaderServers;
state AsyncTrigger nomineeChange;
state std::vector<Optional<LeaderInfo>> nominees;
@ -896,8 +770,12 @@ ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
state Reference<AsyncVar<Optional<ClusterControllerClientInterface>>> knownLeader(
new AsyncVar<Optional<ClusterControllerClientInterface>>{});
for (auto s = coordinators.begin(); s != coordinators.end(); ++s) {
clientLeaderServers.push_back(ClientLeaderRegInterface(*s));
clientLeaderServers.reserve(hostnames.size() + coordinators.size());
for (auto h : hostnames) {
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
}
for (auto s : coordinators) {
clientLeaderServers.push_back(ClientLeaderRegInterface(s));
}
nominees.resize(clientLeaderServers.size());
@ -936,14 +814,7 @@ ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
leaderInfo->set(leader.get().first);
}
}
try {
wait(nomineeChange.onTrigger() || allActors);
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
coordinatorsChanged->trigger();
}
throw e;
}
}
}
@ -995,7 +866,7 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
Key traceLogGroup) {
state ClusterConnectionString cs = info.intermediateConnRecord->getConnectionString();
state std::vector<NetworkAddress> addrs = cs.coordinators();
state int coordinatorsSize = cs.hostnames.size() + cs.coordinators().size();
state int index = 0;
state int successIndex = 0;
state Optional<double> incorrectTime;
@ -1003,15 +874,26 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
state std::vector<CommitProxyInterface> lastCommitProxies;
state std::vector<UID> lastGrvProxyUIDs;
state std::vector<GrvProxyInterface> lastGrvProxies;
state std::vector<ClientLeaderRegInterface> clientLeaderServers;
clientLeaderServers.reserve(coordinatorsSize);
for (const auto& h : cs.hostnames) {
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
}
for (const auto& c : cs.coordinators()) {
clientLeaderServers.push_back(ClientLeaderRegInterface(c));
}
deterministicRandom()->randomShuffle(clientLeaderServers);
deterministicRandom()->randomShuffle(addrs);
loop {
state ClientLeaderRegInterface clientLeaderServer(addrs[index]);
state ClientLeaderRegInterface clientLeaderServer = clientLeaderServers[index];
state OpenDatabaseCoordRequest req;
coordinator->set(clientLeaderServer);
req.clusterKey = cs.clusterKey();
req.hostnames = cs.hostnames;
req.coordinators = cs.coordinators();
req.knownClientInfoID = clientInfo->get().id;
req.supportedVersions = supportedVersions->get();
@ -1040,8 +922,16 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
incorrectTime = Optional<double>();
}
state ErrorOr<CachedSerialization<ClientDBInfo>> rep =
wait(clientLeaderServer.openDatabase.tryGetReply(req, TaskPriority::CoordinationReply));
state ErrorOr<CachedSerialization<ClientDBInfo>> rep;
if (clientLeaderServer.hostname.present()) {
wait(store(rep,
tryGetReplyFromHostname(req,
clientLeaderServer.hostname.get(),
WLTOKEN_CLIENTLEADERREG_OPENDATABASE,
TaskPriority::CoordinationReply)));
} else {
wait(store(rep, clientLeaderServer.openDatabase.tryGetReply(req, TaskPriority::CoordinationReply)));
}
if (rep.present()) {
if (rep.get().read().forward.present()) {
TraceEvent("MonitorProxiesForwarding")
@ -1072,15 +962,10 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
successIndex = index;
} else {
TEST(rep.getError().code() == error_code_failed_to_progress); // Coordinator cant talk to cluster controller
if (rep.getError().code() == error_code_coordinators_changed) {
throw coordinators_changed();
}
index = (index + 1) % addrs.size();
TEST(rep.getError().code() == error_code_lookup_failed); // Coordinator hostname resolving failure
index = (index + 1) % coordinatorsSize;
if (index == successIndex) {
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
// When the client fails talking to all coordinators, we throw coordinators_changed() and let the caller
// re-resolve the connection string and retry.
throw coordinators_changed();
}
}
}
@ -1092,11 +977,8 @@ ACTOR Future<Void> monitorProxies(
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
Key traceLogGroup) {
wait(connRecord->get()->resolveHostnames());
state MonitorLeaderInfo info(connRecord->get());
loop {
try {
wait(info.intermediateConnRecord->resolveHostnames());
choose {
when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration(
connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
@ -1107,13 +989,5 @@ ACTOR Future<Void> monitorProxies(
info.intermediateConnRecord = connRecord->get();
}
}
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
TraceEvent("MonitorProxiesCoordinatorsChanged").suppressFor(1.0);
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
} else {
throw e;
}
}
}
}

View File

@ -75,10 +75,10 @@ Future<Void> monitorLeader(Reference<IClusterConnectionRecord> const& connFile,
// nominees, the nominee with the most nomination is the leader, and collects client data from the leader. This function
// also monitors the change of the leader.
Future<Void> monitorLeaderAndGetClientInfo(Key const& clusterKey,
std::vector<Hostname> const& hostnames,
std::vector<NetworkAddress> const& coordinators,
ClientData* const& clientData,
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo,
Reference<AsyncVar<Void>> const& coordinatorsChanged);
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo);
Future<Void> monitorProxies(
Reference<AsyncVar<Reference<IClusterConnectionRecord>>> const& connRecord,

View File

@ -18,6 +18,10 @@
* limitations under the License.
*/
#ifdef ADDRESS_SANITIZER
#include <sanitizer/lsan_interface.h>
#endif
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/FDBTypes.h"
#include "fdbclient/GenericManagementAPI.actor.h"
@ -2763,6 +2767,11 @@ template <class T>
THREAD_FUNC runSingleAssignmentVarTest(void* arg) {
noUnseed = true;
// This test intentionally leaks memory
#ifdef ADDRESS_SANITIZER
__lsan::ScopedDisabler disableLeakChecks;
#endif
volatile bool* done = (volatile bool*)arg;
try {
for (int i = 0; i < 25; ++i) {

View File

@ -265,11 +265,11 @@ void DatabaseContext::getLatestCommitVersions(const Reference<LocationInfo>& loc
void updateCachedReadVersionShared(double t, Version v, DatabaseSharedState* p) {
MutexHolder mutex(p->mutexLock);
if (v >= p->grvCacheSpace.cachedReadVersion) {
TraceEvent(SevDebug, "CacheReadVersionUpdate")
.detail("Version", v)
.detail("CurTime", t)
.detail("LastVersion", p->grvCacheSpace.cachedReadVersion)
.detail("LastTime", p->grvCacheSpace.lastGrvTime);
//TraceEvent(SevDebug, "CacheReadVersionUpdate")
// .detail("Version", v)
// .detail("CurTime", t)
// .detail("LastVersion", p->grvCacheSpace.cachedReadVersion)
// .detail("LastTime", p->grvCacheSpace.lastGrvTime);
p->grvCacheSpace.cachedReadVersion = v;
if (t > p->grvCacheSpace.lastGrvTime) {
p->grvCacheSpace.lastGrvTime = t;
@ -282,11 +282,11 @@ void DatabaseContext::updateCachedReadVersion(double t, Version v) {
return updateCachedReadVersionShared(t, v, sharedStatePtr);
}
if (v >= cachedReadVersion) {
TraceEvent(SevDebug, "CachedReadVersionUpdate")
.detail("Version", v)
.detail("GrvStartTime", t)
.detail("LastVersion", cachedReadVersion)
.detail("LastTime", lastGrvTime);
//TraceEvent(SevDebug, "CachedReadVersionUpdate")
// .detail("Version", v)
// .detail("GrvStartTime", t)
// .detail("LastVersion", cachedReadVersion)
// .detail("LastTime", lastGrvTime);
cachedReadVersion = v;
// Since the time is based on the start of the request, it's possible that we
// get a newer version with an older time.
@ -5100,10 +5100,10 @@ Future<Optional<Value>> Transaction::get(const Key& key, Snapshot snapshot) {
++trState->cx->transactionGetValueRequests;
// ASSERT (key < allKeys.end);
// There are no keys in the database with size greater than KEY_SIZE_LIMIT
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
// There are no keys in the database with size greater than the max key size
if (key.size() > getMaxReadKeySize(key)) {
return Optional<Value>();
}
auto ver = getReadVersion();
@ -5484,23 +5484,19 @@ Future<Void> Transaction::getRangeStream(const PromiseStream<RangeResult>& resul
void Transaction::addReadConflictRange(KeyRangeRef const& keys) {
ASSERT(!keys.empty());
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin;
KeyRef end = keys.end;
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxReadKeySize(begin);
int64_t endMaxSize = getMaxReadKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
KeyRangeRef r = KeyRangeRef(begin, end);
@ -5522,8 +5518,7 @@ void Transaction::makeSelfConflicting() {
void Transaction::set(const KeyRef& key, const ValueRef& value, AddConflictRange addConflictRange) {
++trState->cx->transactionSetMutations;
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxWriteKeySize(key, trState->options.rawAccess))
throw key_too_large();
if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large();
@ -5544,8 +5539,7 @@ void Transaction::atomicOp(const KeyRef& key,
MutationRef::Type operationType,
AddConflictRange addConflictRange) {
++trState->cx->transactionAtomicMutations;
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxWriteKeySize(key, trState->options.rawAccess))
throw key_too_large();
if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large();
@ -5578,20 +5572,16 @@ void Transaction::clear(const KeyRangeRef& range, AddConflictRange addConflictRa
KeyRef begin = range.begin;
KeyRef end = range.end;
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxClearKeySize(begin);
int64_t endMaxSize = getMaxClearKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
auto r = KeyRangeRef(req.arena, KeyRangeRef(begin, end));
if (r.empty())
@ -5604,10 +5594,10 @@ void Transaction::clear(const KeyRangeRef& range, AddConflictRange addConflictRa
}
void Transaction::clear(const KeyRef& key, AddConflictRange addConflictRange) {
++trState->cx->transactionClearMutations;
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
// There aren't any keys in the database with size larger than the max key size
if (key.size() > getMaxClearKeySize(key)) {
return;
}
auto& req = tr;
auto& t = req.transaction;
@ -5626,24 +5616,19 @@ void Transaction::addWriteConflictRange(const KeyRangeRef& keys) {
auto& req = tr;
auto& t = req.transaction;
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin;
KeyRef end = keys.end;
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxKeySize(begin);
int64_t endMaxSize = getMaxKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
KeyRangeRef r = KeyRangeRef(begin, end);
if (r.empty()) {
@ -6942,11 +6927,18 @@ Future<Standalone<StringRef>> Transaction::getVersionstamp() {
}
// Gets the protocol version reported by a coordinator via the protocol info interface
ACTOR Future<ProtocolVersion> getCoordinatorProtocol(NetworkAddressList coordinatorAddresses) {
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint::wellKnown({ coordinatorAddresses },
WLTOKEN_PROTOCOL_INFO) };
ProtocolInfoReply reply = wait(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
ACTOR Future<ProtocolVersion> getCoordinatorProtocol(
Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator) {
state ProtocolInfoReply reply;
if (coordinator->get().get().hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(
ProtocolInfoRequest{}, coordinator->get().get().hostname.get(), WLTOKEN_PROTOCOL_INFO)));
} else {
RequestStream<ProtocolInfoRequest> requestStream(
Endpoint::wellKnown({ coordinator->get().get().getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO));
wait(store(reply, retryBrokenPromise(requestStream, ProtocolInfoRequest{})));
}
return reply.version;
}
@ -6955,8 +6947,16 @@ ACTOR Future<ProtocolVersion> getCoordinatorProtocol(NetworkAddressList coordina
// function will return with an unset result.
// If an expected version is given, this future won't return if the actual protocol version matches the expected version
ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket(
NetworkAddress coordinatorAddress,
Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator,
Optional<ProtocolVersion> expectedVersion) {
state NetworkAddress coordinatorAddress;
if (coordinator->get().get().hostname.present()) {
Hostname h = coordinator->get().get().hostname.get();
wait(store(coordinatorAddress, h.resolveWithRetry()));
} else {
coordinatorAddress = coordinator->get().get().getLeader.getEndpoint().getPrimaryAddress();
}
state Reference<AsyncVar<Optional<ProtocolVersion>> const> protocolVersion =
FlowTransport::transport().getPeerProtocolAsyncVar(coordinatorAddress);
@ -6991,11 +6991,10 @@ ACTOR Future<ProtocolVersion> getClusterProtocolImpl(
if (!coordinator->get().present()) {
wait(coordinator->onChange());
} else {
Endpoint coordinatorEndpoint = coordinator->get().get().getLeader.getEndpoint();
if (needToConnect) {
// Even though we typically rely on the connect packet to get the protocol version, we need to send some
// request in order to start a connection. This protocol version request serves that purpose.
protocolVersion = getCoordinatorProtocol(coordinatorEndpoint.addresses);
protocolVersion = getCoordinatorProtocol(coordinator);
needToConnect = false;
}
choose {
@ -7011,8 +7010,8 @@ ACTOR Future<ProtocolVersion> getClusterProtocolImpl(
// Older versions of FDB don't have an endpoint to return the protocol version, so we get this info from
// the connect packet
when(Optional<ProtocolVersion> pv = wait(getCoordinatorProtocolFromConnectPacket(
coordinatorEndpoint.getPrimaryAddress(), expectedVersion))) {
when(Optional<ProtocolVersion> pv =
wait(getCoordinatorProtocolFromConnectPacket(coordinator, expectedVersion))) {
if (pv.present()) {
return pv.get();
} else {
@ -8186,15 +8185,21 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, std::vector<AddressExclusion
throw;
}
TraceEvent("ExclusionSafetyCheckCoordinators").log();
wait(cx->getConnectionRecord()->resolveHostnames());
state ClientCoordinators coordinatorList(cx->getConnectionRecord());
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
leaderServers.reserve(coordinatorList.clientLeaderServers.size());
for (int i = 0; i < coordinatorList.clientLeaderServers.size(); i++) {
if (coordinatorList.clientLeaderServers[i].hostname.present()) {
leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coordinatorList.clusterKey, UID()),
coordinatorList.clientLeaderServers[i].hostname.get(),
WLTOKEN_CLIENTLEADERREG_GETLEADER,
TaskPriority::CoordinationReply));
} else {
leaderServers.push_back(retryBrokenPromise(coordinatorList.clientLeaderServers[i].getLeader,
GetLeaderRequest(coordinatorList.clusterKey, UID()),
TaskPriority::CoordinationReply));
}
}
// Wait for quorum so we don't dismiss live coordinators as unreachable by acting too fast
choose {
when(wait(smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.0))) {}
@ -9395,3 +9400,21 @@ ACTOR Future<Void> waitPurgeGranulesCompleteActor(Reference<DatabaseContext> db,
Future<Void> DatabaseContext::waitPurgeGranulesComplete(Key purgeKey) {
return waitPurgeGranulesCompleteActor(Reference<DatabaseContext>::addRef(this), purgeKey);
}
int64_t getMaxKeySize(KeyRef const& key) {
return getMaxWriteKeySize(key, true);
}
int64_t getMaxReadKeySize(KeyRef const& key) {
return getMaxKeySize(key);
}
int64_t getMaxWriteKeySize(KeyRef const& key, bool hasRawAccess) {
int64_t tenantSize = hasRawAccess ? CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT : 0;
return key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT + tenantSize;
}
int64_t getMaxClearKeySize(KeyRef const& key) {
return getMaxKeySize(key);
}

View File

@ -539,5 +539,19 @@ ACTOR Future<std::vector<std::pair<UID, StorageWiggleValue>>> readStorageWiggleV
bool primary,
bool use_system_priority);
// Returns the maximum legal size of a key. This size will be determined by the prefix of the passed in key
// (system keys have a larger maximum size). This should be used for generic max key size requests.
int64_t getMaxKeySize(KeyRef const& key);
// Returns the maximum legal size of a key that can be read. Keys larger than this will be assumed not to exist.
int64_t getMaxReadKeySize(KeyRef const& key);
// Returns the maximum legal size of a key that can be written. If using raw access, writes to normal keys will
// be allowed to be slighly larger to accommodate the prefix.
int64_t getMaxWriteKeySize(KeyRef const& key, bool hasRawAccess);
// Returns the maximum legal size of a key that can be cleared. Keys larger than this will be assumed not to exist.
int64_t getMaxClearKeySize(KeyRef const& key);
#include "flow/unactorcompiler.h"
#endif

View File

@ -59,8 +59,14 @@ class CommitQuorum {
ConfigGeneration generation,
ConfigTransactionInterface cti) {
try {
if (cti.hostname.present()) {
wait(timeoutError(retryGetReplyFromHostname(
self->getCommitRequest(generation), cti.hostname.get(), WLTOKEN_CONFIGTXN_COMMIT),
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
} else {
wait(timeoutError(cti.commit.getReply(self->getCommitRequest(generation)),
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
}
++self->successful;
} catch (Error& e) {
// self might be destroyed if this actor is cancelled
@ -122,9 +128,20 @@ class GetGenerationQuorum {
ACTOR static Future<Void> addRequestActor(GetGenerationQuorum* self, ConfigTransactionInterface cti) {
loop {
try {
ConfigTransactionGetGenerationReply reply = wait(timeoutError(
cti.getGeneration.getReply(ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion }),
state ConfigTransactionGetGenerationReply reply;
if (cti.hostname.present()) {
wait(timeoutError(store(reply,
retryGetReplyFromHostname(
ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion },
cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETGENERATION)),
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
} else {
wait(timeoutError(store(reply,
cti.getGeneration.getReply(
ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion })),
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
}
++self->totalRepliesReceived;
auto gen = reply.generation;
@ -225,9 +242,18 @@ class PaxosConfigTransactionImpl {
state ConfigKey configKey = ConfigKey::decodeKey(key);
loop {
try {
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state Reference<ConfigTransactionInfo> configNodes(
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state std::vector<ConfigTransactionInterface> readReplicas =
self->getGenerationQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigTransactionInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&readReplica.get, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GET));
}
}
wait(waitForAll(fs));
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
ConfigTransactionGetReply reply =
wait(timeoutError(basicLoadBalance(configNodes,
&ConfigTransactionInterface::get,
@ -248,9 +274,17 @@ class PaxosConfigTransactionImpl {
}
ACTOR static Future<RangeResult> getConfigClasses(PaxosConfigTransactionImpl* self) {
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state Reference<ConfigTransactionInfo> configNodes(
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state std::vector<ConfigTransactionInterface> readReplicas = self->getGenerationQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigTransactionInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&readReplica.getClasses, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETCLASSES));
}
}
wait(waitForAll(fs));
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
ConfigTransactionGetConfigClassesReply reply =
wait(basicLoadBalance(configNodes,
&ConfigTransactionInterface::getClasses,
@ -264,9 +298,17 @@ class PaxosConfigTransactionImpl {
}
ACTOR static Future<RangeResult> getKnobs(PaxosConfigTransactionImpl* self, Optional<Key> configClass) {
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state Reference<ConfigTransactionInfo> configNodes(
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state std::vector<ConfigTransactionInterface> readReplicas = self->getGenerationQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigTransactionInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&readReplica.getKnobs, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETKNOBS));
}
}
wait(waitForAll(fs));
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
ConfigTransactionGetKnobsReply reply =
wait(basicLoadBalance(configNodes,
&ConfigTransactionInterface::getKnobs,
@ -366,10 +408,13 @@ public:
Future<Void> commit() { return commit(this); }
PaxosConfigTransactionImpl(Database const& cx) : cx(cx) {
auto coordinators = cx->getConnectionRecord()->getConnectionString().coordinators();
ctis.reserve(coordinators.size());
for (const auto& coordinator : coordinators) {
ctis.emplace_back(coordinator);
const ClusterConnectionString& cs = cx->getConnectionRecord()->getConnectionString();
ctis.reserve(cs.hostnames.size() + cs.coordinators().size());
for (const auto& h : cs.hostnames) {
ctis.emplace_back(h);
}
for (const auto& c : cs.coordinators()) {
ctis.emplace_back(c);
}
getGenerationQuorum = GetGenerationQuorum{ ctis };
commitQuorum = CommitQuorum{ ctis };

View File

@ -19,6 +19,7 @@
*/
#include "fdbclient/ReadYourWrites.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/Atomic.h"
#include "fdbclient/DatabaseContext.h"
#include "fdbclient/SpecialKeySpace.actor.h"
@ -1578,10 +1579,10 @@ Future<Optional<Value>> ReadYourWritesTransaction::get(const Key& key, Snapshot
if (key >= getMaxReadKey() && key != metadataVersionKey)
return key_outside_legal_range();
// There are no keys in the database with size greater than KEY_SIZE_LIMIT
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
// There are no keys in the database with size greater than the max key size
if (key.size() > getMaxReadKeySize(key)) {
return Optional<Value>();
}
Future<Optional<Value>> result = RYWImpl::readWithConflictRange(this, RYWImpl::GetValueReq(key), snapshot);
reading.add(success(result));
@ -1822,23 +1823,19 @@ void ReadYourWritesTransaction::addReadConflictRange(KeyRangeRef const& keys) {
}
}
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin;
KeyRef end = keys.end;
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxReadKeySize(begin);
int64_t endMaxSize = getMaxReadKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
KeyRangeRef r = KeyRangeRef(begin, end);
@ -2111,9 +2108,9 @@ void ReadYourWritesTransaction::atomicOp(const KeyRef& key, const ValueRef& oper
if (!isValidMutationType(operationType) || !isAtomicOp((MutationRef::Type)operationType))
throw invalid_mutation_type();
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
throw key_too_large();
}
if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large();
@ -2218,9 +2215,9 @@ void ReadYourWritesTransaction::set(const KeyRef& key, const ValueRef& value) {
}
// TODO: check transaction size here
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
throw key_too_large();
}
if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large();
@ -2254,23 +2251,19 @@ void ReadYourWritesTransaction::clear(const KeyRangeRef& range) {
return tr.clear(range, addWriteConflict);
}
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
KeyRef begin = range.begin;
KeyRef end = range.end;
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxClearKeySize(begin);
int64_t endMaxSize = getMaxClearKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
KeyRangeRef r = KeyRangeRef(begin, end);
@ -2300,9 +2293,9 @@ void ReadYourWritesTransaction::clear(const KeyRef& key) {
if (key >= getMaxWriteKey())
throw key_outside_legal_range();
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxClearKeySize(key)) {
return;
}
if (options.readYourWritesDisabled) {
return tr.clear(key, addWriteConflict);
@ -2332,9 +2325,9 @@ Future<Void> ReadYourWritesTransaction::watch(const Key& key) {
if (key >= allKeys.end || (key >= getMaxReadKey() && key != metadataVersionKey && tr.apiVersionAtLeast(300)))
return key_outside_legal_range();
if (key.size() >
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
return key_too_large();
}
return RYWImpl::watch(this, key);
}
@ -2350,23 +2343,19 @@ void ReadYourWritesTransaction::addWriteConflictRange(KeyRangeRef const& keys) {
}
}
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin;
KeyRef end = keys.end;
if (begin.size() >
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
begin = begin.substr(
0,
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
if (end.size() >
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
int64_t beginMaxSize = getMaxKeySize(begin);
int64_t endMaxSize = getMaxKeySize(end);
if (begin.size() > beginMaxSize) {
begin = begin.substr(0, beginMaxSize + 1);
}
if (end.size() > endMaxSize) {
end = end.substr(0, endMaxSize + 1);
}
KeyRangeRef r = KeyRangeRef(begin, end);

View File

@ -25,9 +25,15 @@
#include "fdbclient/sha1/SHA1.h"
#include <time.h>
#include <iomanip>
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/sha.h>
#include <openssl/evp.h>
#include <openssl/hmac.h>
#if defined(HAVE_WOLFSSL)
#undef SHA1 // wolfSSL will will shadow FDB SHA1.h
#endif
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string.hpp>

View File

@ -26,7 +26,7 @@
#include "flow/Net2Packet.h"
#include "fdbclient/Knobs.h"
#include "fdbrpc/IRateControl.h"
#include "fdbclient/HTTP.h"
#include "fdbrpc/HTTP.h"
#include "fdbclient/JSONDoc.h"
// Representation of all the things you need to connect to a blob store instance with some credentials.

View File

@ -450,6 +450,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1;
init( MAX_PROXY_COMPUTE, 2.0 );
init( MAX_COMPUTE_PER_OPERATION, 0.1 );
init( MAX_COMPUTE_DURATION_LOG_CUTOFF, 0.05 );
init( PROXY_COMPUTE_BUCKETS, 20000 );
init( PROXY_COMPUTE_GROWTH_RATE, 0.01 );
init( TXN_STATE_SEND_AMOUNT, 4 );
@ -541,6 +542,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( CC_ENABLE_ENTIRE_SATELLITE_MONITORING, false );
init( CC_SATELLITE_DEGRADATION_MIN_COMPLAINER, 3 );
init( CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER, 3 );
init( CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL, 0.5 );
init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0;
init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit );

View File

@ -374,6 +374,7 @@ public:
int MAX_COMMIT_UPDATES;
double MAX_PROXY_COMPUTE;
double MAX_COMPUTE_PER_OPERATION;
double MAX_COMPUTE_DURATION_LOG_CUTOFF;
int PROXY_COMPUTE_BUCKETS;
double PROXY_COMPUTE_GROWTH_RATE;
int TXN_STATE_SEND_AMOUNT;
@ -480,6 +481,8 @@ public:
// be determined as degraded worker.
int CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER; // The minimum amount of degraded server in satellite DC to be
// determined as degraded satellite.
double CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL; // The interval to prevent re-recruiting the same singleton if a
// recruiting fight between two cluster controllers occurs.
// Knobs used to select the best policy (via monte carlo)
int POLICY_RATING_TESTS; // number of tests per policy (in order to compare)

View File

@ -41,9 +41,15 @@ class SimpleConfigTransactionImpl {
if (self->dID.present()) {
TraceEvent("SimpleConfigTransactionGettingReadVersion", self->dID.get());
}
ConfigTransactionGetGenerationRequest req;
ConfigTransactionGetGenerationReply reply =
wait(retryBrokenPromise(self->cti.getGeneration, ConfigTransactionGetGenerationRequest{}));
state ConfigTransactionGetGenerationReply reply;
if (self->cti.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetGenerationRequest{},
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETGENERATION)));
} else {
wait(store(reply, retryBrokenPromise(self->cti.getGeneration, ConfigTransactionGetGenerationRequest{})));
}
if (self->dID.present()) {
TraceEvent("SimpleConfigTransactionGotReadVersion", self->dID.get())
.detail("Version", reply.generation.liveVersion);
@ -62,8 +68,15 @@ class SimpleConfigTransactionImpl {
.detail("ConfigClass", configKey.configClass)
.detail("KnobName", configKey.knobName);
}
ConfigTransactionGetReply reply =
wait(retryBrokenPromise(self->cti.get, ConfigTransactionGetRequest{ generation, configKey }));
state ConfigTransactionGetReply reply;
if (self->cti.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetRequest{ generation, configKey },
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GET)));
} else {
wait(store(reply, retryBrokenPromise(self->cti.get, ConfigTransactionGetRequest{ generation, configKey })));
}
if (self->dID.present()) {
TraceEvent("SimpleConfigTransactionGotValue", self->dID.get())
.detail("Value", reply.value.get().toString());
@ -80,8 +93,17 @@ class SimpleConfigTransactionImpl {
self->getGenerationFuture = getGeneration(self);
}
ConfigGeneration generation = wait(self->getGenerationFuture);
ConfigTransactionGetConfigClassesReply reply =
wait(retryBrokenPromise(self->cti.getClasses, ConfigTransactionGetConfigClassesRequest{ generation }));
state ConfigTransactionGetConfigClassesReply reply;
if (self->cti.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetConfigClassesRequest{ generation },
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETCLASSES)));
} else {
wait(store(
reply,
retryBrokenPromise(self->cti.getClasses, ConfigTransactionGetConfigClassesRequest{ generation })));
}
RangeResult result;
for (const auto& configClass : reply.configClasses) {
result.push_back_deep(result.arena(), KeyValueRef(configClass, ""_sr));
@ -94,8 +116,17 @@ class SimpleConfigTransactionImpl {
self->getGenerationFuture = getGeneration(self);
}
ConfigGeneration generation = wait(self->getGenerationFuture);
ConfigTransactionGetKnobsReply reply =
wait(retryBrokenPromise(self->cti.getKnobs, ConfigTransactionGetKnobsRequest{ generation, configClass }));
state ConfigTransactionGetKnobsReply reply;
if (self->cti.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetKnobsRequest{ generation, configClass },
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETKNOBS)));
} else {
wait(store(
reply,
retryBrokenPromise(self->cti.getKnobs, ConfigTransactionGetKnobsRequest{ generation, configClass })));
}
RangeResult result;
for (const auto& knobName : reply.knobNames) {
result.push_back_deep(result.arena(), KeyValueRef(knobName, ""_sr));
@ -109,7 +140,11 @@ class SimpleConfigTransactionImpl {
}
wait(store(self->toCommit.generation, self->getGenerationFuture));
self->toCommit.annotation.timestamp = now();
if (self->cti.hostname.present()) {
wait(retryGetReplyFromHostname(self->toCommit, self->cti.hostname.get(), WLTOKEN_CONFIGTXN_COMMIT));
} else {
wait(retryBrokenPromise(self->cti.commit, self->toCommit));
}
self->committed = true;
return Void();
}
@ -126,9 +161,14 @@ class SimpleConfigTransactionImpl {
public:
SimpleConfigTransactionImpl(Database const& cx) : cx(cx) {
auto coordinators = cx->getConnectionRecord()->getConnectionString().coordinators();
const ClusterConnectionString& cs = cx->getConnectionRecord()->getConnectionString();
if (cs.coordinators().size()) {
std::vector<NetworkAddress> coordinators = cs.coordinators();
std::sort(coordinators.begin(), coordinators.end());
cti = ConfigTransactionInterface(coordinators[0]);
} else {
cti = ConfigTransactionInterface(cs.hostnames[0]);
}
}
SimpleConfigTransactionImpl(ConfigTransactionInterface const& cti) : cti(cti) {}

View File

@ -1644,13 +1644,10 @@ void TracingOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key
CoordinatorsImpl::CoordinatorsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
ACTOR Future<RangeResult> coordinatorsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRef prefix, KeyRangeRef kr) {
state ClusterConnectionString cs = ryw->getDatabase()->getConnectionRecord()->getConnectionString();
state std::vector<NetworkAddress> coordinator_processes = wait(cs.tryResolveHostnames());
RangeResult result;
KeyRef prefix(getKeyRange().begin);
auto cs = ryw->getDatabase()->getConnectionRecord()->getConnectionString();
auto coordinator_processes = cs.coordinators();
Key cluster_decription_key = prefix.withSuffix(LiteralStringRef("cluster_description"));
if (kr.contains(cluster_decription_key)) {
result.push_back_deep(result.arena(), KeyValueRef(cluster_decription_key, cs.clusterKeyName()));
@ -1673,10 +1670,16 @@ Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
return rywGetRange(ryw, kr, result);
}
Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
KeyRef prefix(getKeyRange().begin);
return coordinatorsGetRangeActor(ryw, prefix, kr);
}
ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
state Reference<IQuorumChange> change;
state ClusterConnectionString
conn; // We don't care about the Key here, it will be overrode in changeQuorumChecker().
state ClusterConnectionString conn; // We don't care about the Key here.
state std::vector<std::string> process_address_or_hostname_strs;
state Optional<std::string> msg;
state int index;
@ -1700,7 +1703,6 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
try {
if (Hostname::isHostname(process_address_or_hostname_strs[index])) {
conn.hostnames.push_back(Hostname::parse(process_address_or_hostname_strs[index]));
conn.status = ClusterConnectionString::ConnectionStringStatus::UNRESOLVED;
} else {
NetworkAddress a = NetworkAddress::parse(process_address_or_hostname_strs[index]);
if (!a.isValid()) {
@ -1717,18 +1719,19 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
if (parse_error) {
std::string error = "ERROR: \'" + process_address_or_hostname_strs[index] +
"\' is not a valid network endpoint address\n";
if (process_address_or_hostname_strs[index].find(":tls") != std::string::npos)
error += " Do not include the `:tls' suffix when naming a process\n";
return ManagementAPIError::toJsonString(false, "coordinators", error);
}
}
}
wait(conn.resolveHostnames());
if (conn.coordinators().size())
change = specifiedQuorumChange(conn.coordinators());
else
std::vector<NetworkAddress> addressesVec = wait(conn.tryResolveHostnames());
if (addressesVec.size() != conn.hostnames.size() + conn.coordinators().size()) {
return ManagementAPIError::toJsonString(false, "coordinators", "One or more hostnames are not resolvable.");
} else if (addressesVec.size()) {
change = specifiedQuorumChange(addressesVec);
} else {
change = noQuorumChange();
}
// check update for cluster_description
Key cluster_decription_key = LiteralStringRef("cluster_description").withPrefix(kr.begin);
@ -1740,19 +1743,18 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
change = nameQuorumChange(entry.second.get().toString(), change);
} else {
// throw the error
return Optional<std::string>(ManagementAPIError::toJsonString(
false, "coordinators", "Cluster description must match [A-Za-z0-9_]+"));
return ManagementAPIError::toJsonString(
false, "coordinators", "Cluster description must match [A-Za-z0-9_]+");
}
}
ASSERT(change.isValid());
TraceEvent(SevDebug, "SKSChangeCoordinatorsStart")
.detail("NewHostnames", conn.hostnames.size() ? describe(conn.hostnames) : "N/A")
.detail("NewAddresses", describe(conn.coordinators()))
.detail("NewAddresses", describe(addressesVec))
.detail("Description", entry.first ? entry.second.get().toString() : "");
Optional<CoordinatorsResult> r = wait(changeQuorumChecker(&ryw->getTransaction(), change, &conn));
Optional<CoordinatorsResult> r = wait(changeQuorumChecker(&ryw->getTransaction(), change, addressesVec));
TraceEvent(SevDebug, "SKSChangeCoordinatorsFinish")
.detail("Result", r.present() ? static_cast<int>(r.get()) : -1); // -1 means success
@ -1804,9 +1806,10 @@ ACTOR static Future<RangeResult> CoordinatorsAutoImplActor(ReadYourWritesTransac
state ClusterConnectionString old(currentKey.get().toString());
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
std::vector<NetworkAddress> _desiredCoordinators = wait(autoQuorumChange()->getDesiredCoordinators(
&tr,
old.coordinators(),
oldCoordinators,
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result));

View File

@ -307,24 +307,36 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<I
bool* quorum_reachable,
int* coordinatorsFaultTolerance) {
try {
wait(connRecord->resolveHostnames());
state ClientCoordinators coord(connRecord);
state StatusObject statusObj;
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
leaderServers.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
if (coord.clientLeaderServers[i].hostname.present()) {
leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coord.clusterKey, UID()),
coord.clientLeaderServers[i].hostname.get(),
WLTOKEN_CLIENTLEADERREG_GETLEADER,
TaskPriority::CoordinationReply));
} else {
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
GetLeaderRequest(coord.clusterKey, UID()),
TaskPriority::CoordinationReply));
}
}
state std::vector<Future<ProtocolInfoReply>> coordProtocols;
coordProtocols.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
if (coord.clientLeaderServers[i].hostname.present()) {
coordProtocols.push_back(retryGetReplyFromHostname(
ProtocolInfoRequest{}, coord.clientLeaderServers[i].hostname.get(), WLTOKEN_PROTOCOL_INFO));
} else {
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint::wellKnown(
{ coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO) };
coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
}
}
wait(smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) &&
smartQuorum(coordProtocols, coordProtocols.size() / 2 + 1, 1.5) ||
@ -337,8 +349,12 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<I
int coordinatorsUnavailable = 0;
for (int i = 0; i < leaderServers.size(); i++) {
StatusObject coordStatus;
if (coord.clientLeaderServers[i].hostname.present()) {
coordStatus["address"] = coord.clientLeaderServers[i].hostname.get().toString();
} else {
coordStatus["address"] =
coord.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress().toString();
}
if (leaderServers[i].isReady()) {
coordStatus["reachable"] = true;

View File

@ -48,6 +48,8 @@ struct TenantMapEntry {
int64_t id;
Key prefix;
constexpr static int ROOT_PREFIX_SIZE = sizeof(id);
private:
void initPrefix(KeyRef subspace) {
ASSERT(id >= 0);

View File

@ -24,6 +24,9 @@
*/
#if defined(HAVE_OPENSSL) && !defined(TLS_DISABLED)
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/md5.h>
#elif !defined(_MD5_H)
#define _MD5_H

View File

@ -15,6 +15,7 @@ set(FDBRPC_SRCS
genericactors.actor.h
genericactors.actor.cpp
HealthMonitor.actor.cpp
HTTP.actor.cpp
IAsyncFile.actor.cpp
IPAllowList.cpp
LoadBalance.actor.cpp
@ -28,6 +29,10 @@ set(FDBRPC_SRCS
ReplicationPolicy.cpp
ReplicationTypes.cpp
ReplicationUtils.cpp
RESTClient.h
RESTClient.actor.cpp
RESTUtils.h
RESTUtils.actor.cpp
SimExternalConnection.actor.cpp
SimExternalConnection.h
Stats.actor.cpp

View File

@ -18,10 +18,12 @@
* limitations under the License.
*/
#include "fdbclient/HTTP.h"
#include "fdbrpc/HTTP.h"
#include "fdbclient/md5/md5.h"
#include "fdbclient/libb64/encode.h"
#include <cctype>
#include "flow/actorcompiler.h" // has to be last include
namespace HTTP {

View File

@ -18,6 +18,11 @@
* limitations under the License.
*/
#ifndef FDBRPC_HTTP_H
#define FDBRPC_HTTP_H
#pragma once
#include "flow/flow.h"
#include "flow/Net2Packet.h"
#include "fdbrpc/IRateControl.h"
@ -63,4 +68,27 @@ Future<Reference<Response>> doRequest(Reference<IConnection> const& conn,
int64_t* const& pSent,
Reference<IRateControl> const& recvRate,
const std::string& requestHeader = std::string());
constexpr int HTTP_STATUS_CODE_OK = 200;
constexpr int HTTP_STATUS_CODE_CREATED = 201;
constexpr int HTTP_STATUS_CODE_ACCEPTED = 202;
constexpr int HTTP_STATUS_CODE_NO_CONTENT = 204;
constexpr int HTTP_STATUS_CODE_UNAUTHORIZED = 401;
constexpr int HTTP_STATUS_CODE_NOT_ACCEPTABLE = 406;
constexpr int HTTP_STATUS_CODE_TOO_MANY_REQUESTS = 429;
constexpr int HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR = 500;
constexpr int HTTP_STATUS_CODE_BAD_GATEWAY = 502;
constexpr int HTTP_STATUS_CODE_SERVICE_UNAVAILABLE = 503;
constexpr int HTTP_RETRYAFTER_DELAY_SECS = 300;
const std::string HTTP_VERB_GET = "GET";
const std::string HTTP_VERB_HEAD = "HEAD";
const std::string HTTP_VERB_DELETE = "DELETE";
const std::string HTTP_VERB_TRACE = "TRACE";
const std::string HTTP_VERB_PUT = "PUT";
const std::string HTTP_VERB_POST = "POST";
} // namespace HTTP
#endif

363
fdbrpc/RESTClient.actor.cpp Normal file
View File

@ -0,0 +1,363 @@
/*
* RESTClient.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbrpc/RESTClient.h"
#include "fdbrpc/HTTP.h"
#include "fdbrpc/IRateControl.h"
#include "fdbrpc/RESTUtils.h"
#include "flow/Arena.h"
#include "flow/Error.h"
#include "flow/FastRef.h"
#include "flow/Knobs.h"
#include "flow/Net2Packet.h"
#include "flow/flow.h"
#include "flow/network.h"
#include "flow/serialize.h"
#include "flow/Trace.h"
#include "flow/UnitTest.h"
#include <memory>
#include <unordered_map>
#include "flow/actorcompiler.h" // always the last include
json_spirit::mObject RESTClient::Stats::getJSON() {
json_spirit::mObject o;
o["host_service"] = host_service;
o["requests_failed"] = requests_failed;
o["requests_successful"] = requests_successful;
o["bytes_sent"] = bytes_sent;
return o;
}
RESTClient::Stats RESTClient::Stats::operator-(const Stats& rhs) {
Stats r(host_service);
r.requests_failed = requests_failed - rhs.requests_failed;
r.requests_successful = requests_successful - rhs.requests_successful;
r.bytes_sent = bytes_sent - rhs.bytes_sent;
return r;
}
RESTClient::RESTClient() {}
RESTClient::RESTClient(std::unordered_map<std::string, int>& knobSettings) {
knobs.set(knobSettings);
}
void RESTClient::setKnobs(const std::unordered_map<std::string, int>& knobSettings) {
knobs.set(knobSettings);
}
std::unordered_map<std::string, int> RESTClient::getKnobs() const {
return knobs.get();
}
ACTOR Future<Reference<HTTP::Response>> doRequest_impl(Reference<RESTClient> client,
std::string verb,
HTTP::Headers headers,
RESTUrl* url,
std::set<unsigned int> successCodes) {
state UnsentPacketQueue content;
state int contentLen = url->body.size();
if (url->body.size() > 0) {
PacketWriter pw(content.getWriteBuffer(url->body.size()), nullptr, Unversioned());
pw.serializeBytes(url->body);
}
std::string statsKey = RESTClient::getStatsKey(url->service, url->service);
auto sItr = client->statsMap.find(statsKey);
if (sItr == client->statsMap.end()) {
client->statsMap.emplace(statsKey, std::make_unique<RESTClient::Stats>(statsKey));
}
headers["Content-Length"] = format("%d", contentLen);
headers["Host"] = url->host;
state int maxTries = std::min(client->knobs.request_tries, client->knobs.connect_tries);
state int thisTry = 1;
state double nextRetryDelay = 2.0;
state Reference<IRateControl> sendReceiveRate = makeReference<Unlimited>();
state double reqTimeout = (client->knobs.request_timeout_secs * 1.0) / 60;
state RESTConnectionPoolKey connectPoolKey = RESTConnectionPool::getConnectionPoolKey(url->host, url->service);
state RESTClient::Stats* statsPtr = client->statsMap[statsKey].get();
loop {
state Optional<Error> err;
state Optional<NetworkAddress> remoteAddress;
state bool connectionEstablished = false;
state Reference<HTTP::Response> r;
try {
// Start connecting
Future<RESTConnectionPool::ReusableConnection> frconn = client->conectionPool->connect(
connectPoolKey, client->knobs.secure_connection, client->knobs.max_connection_life);
// Finish connecting, do request
state RESTConnectionPool::ReusableConnection rconn =
wait(timeoutError(frconn, client->knobs.connect_timeout));
connectionEstablished = true;
remoteAddress = rconn.conn->getPeerAddress();
Reference<HTTP::Response> _r = wait(timeoutError(HTTP::doRequest(rconn.conn,
verb,
url->resource,
headers,
contentLen > 0 ? &content : nullptr,
contentLen,
sendReceiveRate,
&statsPtr->bytes_sent,
sendReceiveRate),
reqTimeout));
r = _r;
// Since the response was parsed successfully (which is why we are here) reuse the connection unless we
// received the "Connection: close" header.
if (r->headers["Connection"] != "close") {
client->conectionPool->returnConnection(connectPoolKey, rconn, client->knobs.connection_pool_size);
}
rconn.conn.clear();
} catch (Error& e) {
if (e.code() == error_code_actor_cancelled) {
throw;
}
err = e;
}
// If err is not present then r is valid.
// If r->code is in successCodes then record the successful request and return r.
if (!err.present() && successCodes.count(r->code) != 0) {
statsPtr->requests_successful++;
return r;
}
// Otherwise, this request is considered failed. Update failure count.
statsPtr->requests_failed++;
// All errors in err are potentially retryable as well as certain HTTP response codes...
bool retryable = err.present() || r->code == HTTP::HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR ||
r->code == HTTP::HTTP_STATUS_CODE_BAD_GATEWAY ||
r->code == HTTP::HTTP_STATUS_CODE_SERVICE_UNAVAILABLE ||
r->code == HTTP::HTTP_STATUS_CODE_TOO_MANY_REQUESTS;
// But only if our previous attempt was not the last allowable try.
retryable = retryable && (thisTry < maxTries);
TraceEvent event(SevWarn, retryable ? "RESTClient_FailedRetryable" : "RESTClient_RequestFailed");
// Attach err to trace event if present, otherwise extract some stuff from the response
if (err.present()) {
event.errorUnsuppressed(err.get());
}
event.suppressFor(60);
if (!err.present()) {
event.detail("ResponseCode", r->code);
}
event.detail("ConnectionEstablished", connectionEstablished);
if (remoteAddress.present())
event.detail("RemoteEndpoint", remoteAddress.get());
else
event.detail("RemoteHost", url->host);
event.detail("Verb", verb).detail("Resource", url->resource).detail("ThisTry", thisTry);
// If r is not valid or not code TOO_MANY_REQUESTS then increment the try count.
// TOO_MANY_REQUEST's will not count against the attempt limit.
if (!r || r->code != HTTP::HTTP_STATUS_CODE_TOO_MANY_REQUESTS) {
++thisTry;
}
// We will wait delay seconds before the next retry, start with nextRetryDelay.
double delay = nextRetryDelay;
// Double but limit the *next* nextRetryDelay.
nextRetryDelay = std::min(nextRetryDelay * 2, 60.0);
if (retryable) {
// If r is valid then obey the Retry-After response header if present.
if (r) {
auto iRetryAfter = r->headers.find("Retry-After");
if (iRetryAfter != r->headers.end()) {
event.detail("RetryAfterHeader", iRetryAfter->second);
char* pEnd;
double retryAfter = strtod(iRetryAfter->second.c_str(), &pEnd);
if (*pEnd) {
// If there were other characters then don't trust the parsed value
retryAfter = HTTP::HTTP_RETRYAFTER_DELAY_SECS;
}
// Update delay
delay = std::max(delay, retryAfter);
}
}
// Log the delay then wait.
event.detail("RetryDelay", delay);
wait(::delay(delay));
} else {
// We can't retry, so throw something.
// This error code means the authentication header was not accepted, likely the account or key is wrong.
if (r && r->code == HTTP::HTTP_STATUS_CODE_NOT_ACCEPTABLE) {
throw http_not_accepted();
}
if (r && r->code == HTTP::HTTP_STATUS_CODE_UNAUTHORIZED) {
throw http_auth_failed();
}
// Recognize and throw specific errors
if (err.present()) {
int code = err.get().code();
// If we get a timed_out error during the the connect() phase, we'll call that connection_failed despite
// the fact that there was technically never a 'connection' to begin with. It differentiates between an
// active connection timing out vs a connection timing out, though not between an active connection
// failing vs connection attempt failing.
// TODO: Add more error types?
if (code == error_code_timed_out && !connectionEstablished) {
throw connection_failed();
}
if (code == error_code_timed_out || code == error_code_connection_failed ||
code == error_code_lookup_failed) {
throw err.get();
}
}
throw http_request_failed();
}
}
}
Future<Reference<HTTP::Response>> RESTClient::doPutOrPost(const std::string& verb,
Optional<HTTP::Headers> optHeaders,
RESTUrl* url,
std::set<unsigned int> successCodes) {
HTTP::Headers headers;
if (optHeaders.present()) {
headers = optHeaders.get();
}
return doRequest_impl(Reference<RESTClient>::addRef(this), verb, headers, url, successCodes);
}
Future<Reference<HTTP::Response>> RESTClient::doPost(const std::string& fullUrl,
const std::string& requestBody,
Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, requestBody, knobs.secure_connection);
return doPutOrPost(HTTP::HTTP_VERB_POST, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
}
Future<Reference<HTTP::Response>> RESTClient::doPut(const std::string& fullUrl,
const std::string& requestBody,
Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, requestBody, knobs.secure_connection);
return doPutOrPost(
HTTP::HTTP_VERB_PUT,
optHeaders,
std::addressof(url),
// 201 - on successful resource create
// 200 / 204 - if target resource representation was successfully modified with the desired state
{ HTTP::HTTP_STATUS_CODE_OK, HTTP::HTTP_STATUS_CODE_CREATED, HTTP::HTTP_STATUS_CODE_NO_CONTENT });
}
Future<Reference<HTTP::Response>> RESTClient::doGetHeadDeleteOrTrace(const std::string& verb,
Optional<HTTP::Headers> optHeaders,
RESTUrl* url,
std::set<unsigned int> successCodes) {
HTTP::Headers headers;
if (optHeaders.present()) {
headers = optHeaders.get();
}
return doRequest_impl(Reference<RESTClient>::addRef(this), HTTP::HTTP_VERB_GET, headers, url, successCodes);
}
Future<Reference<HTTP::Response>> RESTClient::doGet(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, knobs.secure_connection);
return doGetHeadDeleteOrTrace(HTTP::HTTP_VERB_GET, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
}
Future<Reference<HTTP::Response>> RESTClient::doHead(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, knobs.secure_connection);
return doGetHeadDeleteOrTrace(HTTP::HTTP_VERB_HEAD, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
}
Future<Reference<HTTP::Response>> RESTClient::doDelete(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, knobs.secure_connection);
return doGetHeadDeleteOrTrace(
HTTP::HTTP_VERB_DELETE,
optHeaders,
std::addressof(url),
// 200 - action has been enacted.
// 202 - action will likely succeed, but, has not yet been enacted.
// 204 - action has been enated, no further information is to supplied.
{ HTTP::HTTP_STATUS_CODE_OK, HTTP::HTTP_STATUS_CODE_NO_CONTENT, HTTP::HTTP_STATUS_CODE_ACCEPTED });
}
Future<Reference<HTTP::Response>> RESTClient::doTrace(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, knobs.secure_connection);
return doGetHeadDeleteOrTrace(
HTTP::HTTP_VERB_TRACE, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
}
// Only used to link unit tests
void forceLinkRESTClientTests() {}
TEST_CASE("fdbrpc/RESTClient") {
RESTClient r;
std::unordered_map<std::string, int> knobs = r.getKnobs();
ASSERT_EQ(knobs["secure_connection"], RESTClientKnobs::SECURE_CONNECTION);
ASSERT_EQ(knobs["connection_pool_size"], FLOW_KNOBS->RESTCLIENT_MAX_CONNECTIONPOOL_SIZE);
ASSERT_EQ(knobs["connect_tries"], FLOW_KNOBS->RESTCLIENT_CONNECT_TRIES);
ASSERT_EQ(knobs["connect_timeout"], FLOW_KNOBS->RESTCLIENT_CONNECT_TIMEOUT);
ASSERT_EQ(knobs["max_connection_life"], FLOW_KNOBS->RESTCLIENT_MAX_CONNECTION_LIFE);
ASSERT_EQ(knobs["request_tries"], FLOW_KNOBS->RESTCLIENT_REQUEST_TRIES);
ASSERT_EQ(knobs["request_timeout_secs"], FLOW_KNOBS->RESTCLIENT_REQUEST_TIMEOUT_SEC);
for (auto& itr : knobs) {
itr.second++;
}
r.setKnobs(knobs);
std::unordered_map<std::string, int> updated = r.getKnobs();
for (auto& itr : updated) {
ASSERT_EQ(knobs[itr.first], itr.second);
}
// invalid client knob
knobs["foo"] = 100;
try {
r.setKnobs(knobs);
ASSERT(false);
} catch (Error& e) {
if (e.code() != error_code_rest_invalid_rest_client_knob) {
throw e;
}
}
return Void();
}

97
fdbrpc/RESTClient.h Normal file
View File

@ -0,0 +1,97 @@
/*
* RESTClient.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FDBRPC_RESTCLIENT_H
#define FDBRPC_RESTCLIENT_H
#include <memory>
#pragma once
#include "fdbclient/JSONDoc.h"
#include "fdbrpc/HTTP.h"
#include "fdbrpc/RESTUtils.h"
#include "flow/Arena.h"
#include "flow/FastRef.h"
#include "flow/flow.h"
#include "flow/Net2Packet.h"
// This interface enables sending REST HTTP requests and receiving REST HTTP responses from a resource identified by a
// URI.
class RESTClient : public ReferenceCounted<RESTClient> {
public:
struct Stats {
explicit Stats(const std::string& hService)
: host_service(hService), requests_successful(0), requests_failed(0), bytes_sent(0) {}
Stats operator-(const Stats& rhs);
void clear() { requests_failed = requests_successful = bytes_sent = 0; }
json_spirit::mObject getJSON();
std::string host_service;
int64_t requests_successful;
int64_t requests_failed;
int64_t bytes_sent;
};
RESTClientKnobs knobs;
Reference<RESTConnectionPool> conectionPool;
// Connection stats maintained per "host:service"
std::unordered_map<std::string, std::unique_ptr<Stats>> statsMap;
RESTClient();
explicit RESTClient(std::unordered_map<std::string, int>& params);
void setKnobs(const std::unordered_map<std::string, int>& knobSettings);
std::unordered_map<std::string, int> getKnobs() const;
// Supports common REST APIs.
// On invocation of below methods, input 'fullUrl' is parsed using RESTUrl interface,
// RESTConnectionPool is used to leverage cached connection if any for 'host:service' pair. API then leverage
// HTTP::doRequest to accomplish the specified operation
Future<Reference<HTTP::Response>> doGet(const std::string& fullUrl,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
Future<Reference<HTTP::Response>> doHead(const std::string& fullUrl,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
Future<Reference<HTTP::Response>> doDelete(const std::string& fullUrl,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
Future<Reference<HTTP::Response>> doTrace(const std::string& fullUrl,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
Future<Reference<HTTP::Response>> doPut(const std::string& fullUrl,
const std::string& requestBody,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
Future<Reference<HTTP::Response>> doPost(const std::string& fullUrl,
const std::string& requestBody,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
static std::string getStatsKey(const std::string& host, const std::string& service) { return host + ":" + service; }
private:
Future<Reference<HTTP::Response>> doGetHeadDeleteOrTrace(const std::string& verb,
Optional<HTTP::Headers> optHeaders,
RESTUrl* url,
std::set<unsigned int> successCodes);
Future<Reference<HTTP::Response>> doPutOrPost(const std::string& verb,
Optional<HTTP::Headers> headers,
RESTUrl* url,
std::set<unsigned int> successCodes);
};
#endif

276
fdbrpc/RESTUtils.actor.cpp Normal file
View File

@ -0,0 +1,276 @@
/*
* RESTUtils.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbrpc/RESTUtils.h"
#include "flow/flat_buffers.h"
#include "flow/UnitTest.h"
#include <boost/algorithm/string.hpp>
#include "flow/actorcompiler.h" // always the last include
namespace {
std::unordered_set<std::string> protocols = { "http", "https" };
bool isProtocolSupported(const std::string& protocol) {
return protocols.find(protocol) != protocols.end();
}
bool isSecurePrototol(const std::string& protocol) {
return protocol.compare("https") == 0;
}
} // namespace
RESTClientKnobs::RESTClientKnobs() {
secure_connection = RESTClientKnobs::SECURE_CONNECTION;
connection_pool_size = FLOW_KNOBS->RESTCLIENT_MAX_CONNECTIONPOOL_SIZE;
connect_tries = FLOW_KNOBS->RESTCLIENT_CONNECT_TRIES;
connect_timeout = FLOW_KNOBS->RESTCLIENT_CONNECT_TIMEOUT;
max_connection_life = FLOW_KNOBS->RESTCLIENT_MAX_CONNECTION_LIFE;
request_tries = FLOW_KNOBS->RESTCLIENT_REQUEST_TRIES;
request_timeout_secs = FLOW_KNOBS->RESTCLIENT_REQUEST_TIMEOUT_SEC;
knobMap["connection_pool_size"] = std::addressof(connection_pool_size);
knobMap["pz"] = std::addressof(connection_pool_size);
knobMap["secure_connection"] = std::addressof(secure_connection);
knobMap["sc"] = std::addressof(secure_connection);
knobMap["connect_tries"] = std::addressof(connect_tries);
knobMap["ct"] = std::addressof(connect_tries);
knobMap["connect_timeout"] = std::addressof(connect_timeout);
knobMap["cto"] = std::addressof(connect_timeout);
knobMap["max_connection_life"] = std::addressof(max_connection_life);
knobMap["mcl"] = std::addressof(max_connection_life);
knobMap["request_tries"] = std::addressof(request_tries);
knobMap["rt"] = std::addressof(request_tries);
knobMap["request_timeout_secs"] = std::addressof(request_timeout_secs);
knobMap["rtom"] = std::addressof(request_timeout_secs);
}
void RESTClientKnobs::set(const std::unordered_map<std::string, int>& knobSettings) {
TraceEvent trace = TraceEvent("RESTClient_SetKnobs");
for (const auto& itr : knobSettings) {
const auto& kItr = RESTClientKnobs::knobMap.find(itr.first);
if (kItr == RESTClientKnobs::knobMap.end()) {
trace.detail("RESTClient_InvalidKnobName", itr.first);
throw rest_invalid_rest_client_knob();
}
*(kItr->second) = itr.second;
trace.detail(itr.first.c_str(), itr.second);
}
}
std::unordered_map<std::string, int> RESTClientKnobs::get() const {
std::unordered_map<std::string, int> details = {
{ "connection_pool_size", connection_pool_size },
{ "secure_connection", secure_connection },
{ "connect_tries", connect_tries },
{ "connect_timeout", connect_timeout },
{ "max_connection_life", max_connection_life },
{ "request_tries", request_tries },
{ "request_timeout_secs", request_timeout_secs },
};
return details;
}
ACTOR Future<RESTConnectionPool::ReusableConnection> connect_impl(Reference<RESTConnectionPool> connectionPool,
RESTConnectionPoolKey connectKey,
bool isSecure,
int maxConnLife) {
auto poolItr = connectionPool->connectionPoolMap.find(connectKey);
if (poolItr == connectionPool->connectionPoolMap.end()) {
throw rest_connectpool_key_not_found();
}
while (!poolItr->second.empty()) {
RESTConnectionPool::ReusableConnection rconn = poolItr->second.front();
poolItr->second.pop();
if (rconn.expirationTime > now()) {
TraceEvent("RESTClient_ReusableConnection")
.suppressFor(60)
.detail("RemoteEndpoint", rconn.conn->getPeerAddress())
.detail("ExpireIn", rconn.expirationTime - now());
return rconn;
}
}
state Reference<IConnection> conn =
wait(INetworkConnections::net()->connect(connectKey.first, connectKey.second, isSecure));
wait(conn->connectHandshake());
return RESTConnectionPool::ReusableConnection({ conn, now() + maxConnLife });
}
Future<RESTConnectionPool::ReusableConnection> RESTConnectionPool::connect(RESTConnectionPoolKey connectKey,
const bool isSecure,
const int maxConnLife) {
return connect_impl(Reference<RESTConnectionPool>::addRef(this), connectKey, isSecure, maxConnLife);
}
void RESTConnectionPool::returnConnection(RESTConnectionPoolKey connectKey,
ReusableConnection& rconn,
const int maxConnections) {
auto poolItr = connectionPoolMap.find(connectKey);
if (poolItr == connectionPoolMap.end()) {
throw rest_connectpool_key_not_found();
}
// If it expires in the future then add it to the pool in the front iff connection pool size is not maxed
if (rconn.expirationTime > now() && poolItr->second.size() < maxConnections) {
poolItr->second.push(rconn);
}
rconn.conn = Reference<IConnection>();
}
RESTUrl::RESTUrl(const std::string& fUrl, const bool isSecure) {
parseUrl(fUrl, isSecure);
}
RESTUrl::RESTUrl(const std::string& fullUrl, const std::string& b, const bool isSecure) : body(b) {
parseUrl(fullUrl, isSecure);
}
void RESTUrl::parseUrl(const std::string& fullUrl, const bool isSecure) {
// Sample valid URIs
// 1. With 'host' & 'resource' := '<protocol>://<host>/<resource>'
// 2. With 'host', 'service' & 'resource' := '<protocol>://<host>:port/<resource>'
// 3. With 'host', 'service', 'resource' & 'reqParameters' := '<protocol>://<host>:port/<resource>?<parameter-list>'
try {
StringRef t(fullUrl);
StringRef p = t.eat("://");
std::string protocol = p.toString();
boost::algorithm::to_lower(protocol);
if (!isProtocolSupported(protocol)) {
throw format("Invalid REST URI protocol '%s'", protocol.c_str());
}
// Ensure connection secure knob setting matches with the input URI
if ((isSecurePrototol(protocol) && !isSecure) || (!isSecurePrototol(protocol) && isSecure)) {
throw format("Invalid REST URI protocol secure knob '%s'", fullUrl.c_str());
}
// extract 'resource' and optional 'parameter list' if supplied in the URL
uint8_t foundSeparator = 0;
StringRef hostPort = t.eatAny("/?", &foundSeparator);
if (foundSeparator == '/') {
resource = t.eat("?").toString();
reqParameters = t.eat().toString();
}
// hostPort is at least a host or IP address, optionally followed by :portNumber or :serviceName
StringRef hRef(hostPort);
StringRef h = hRef.eat(":");
if (h.size() == 0) {
throw std::string("host cannot be empty");
}
host = h.toString();
service = hRef.eat().toString();
TraceEvent("RESTClient_ParseURI")
.detail("URI", fullUrl)
.detail("Host", host)
.detail("Service", service)
.detail("Resource", resource)
.detail("ReqParameters", reqParameters);
} catch (std::string& err) {
TraceEvent("RESTClient_ParseError").detail("URI", fullUrl).detail("Error", err);
throw rest_invalid_uri();
}
}
// Only used to link unit tests
void forceLinkRESTUtilsTests() {}
TEST_CASE("fdbrpc/RESTUtils") {
// invalid protocol
try {
std::string uri("httpx://foo/bar");
RESTUrl r(uri, false);
ASSERT(false);
} catch (Error& e) {
if (e.code() != error_code_rest_invalid_uri) {
throw e;
}
}
// mismatch protocol and knob values
try {
std::string uri("http://foo/bar");
RESTUrl r(uri, true);
ASSERT(false);
} catch (Error& e) {
if (e.code() != error_code_rest_invalid_uri) {
throw e;
}
}
// missing host
try {
std::string uri("https://:/bar");
RESTUrl r(uri, true);
ASSERT(false);
} catch (Error& e) {
if (e.code() != error_code_rest_invalid_uri) {
throw e;
}
}
// valid URI with service
try {
std::string uri("https://host:80/foo/bar");
RESTUrl r(uri, true);
ASSERT_EQ(r.host.compare("host"), 0);
ASSERT_EQ(r.service.compare("80"), 0);
ASSERT_EQ(r.resource.compare("foo/bar"), 0);
} catch (Error& e) {
throw e;
}
// valid URI with-out service
try {
std::string uri("https://host/foo/bar");
RESTUrl r(uri, true);
ASSERT_EQ(r.host.compare("host"), 0);
ASSERT(r.service.empty());
ASSERT_EQ(r.resource.compare("foo/bar"), 0);
} catch (Error& e) {
throw e;
}
// valid URI with parameters
try {
std::string uri("https://host/foo/bar?param1,param2");
RESTUrl r(uri, true);
ASSERT_EQ(r.host.compare("host"), 0);
ASSERT(r.service.empty());
ASSERT_EQ(r.resource.compare("foo/bar"), 0);
ASSERT_EQ(r.reqParameters.compare("param1,param2"), 0);
} catch (Error& e) {
throw e;
}
// ensure RESTClient::Knob default values and updates
return Void();
}

113
fdbrpc/RESTUtils.h Normal file
View File

@ -0,0 +1,113 @@
/*
* RESTUtils.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FDRPC_REST_UTILS_H
#define FDRPC_REST_UTILS_H
#pragma once
#include "flow/flow.h"
#include "flow/FastRef.h"
#include "flow/Net2Packet.h"
#include <unordered_map>
#include <utility>
// Util interface managing REST active connection pool.
// The interface internally constructs and maintains map {"host:service" -> activeConnection}; any new connection
// request would first access cached connection if possible (not expired), if none exists, it would establish a new
// connection and return to the caller. Caller on accomplishing the task at-hand, should return the connection back to
// the pool.
using RESTConnectionPoolKey = std::pair<std::string, std::string>;
class RESTConnectionPool : public ReferenceCounted<RESTConnectionPool> {
public:
struct ReusableConnection {
Reference<IConnection> conn;
double expirationTime;
};
// Maximum number of connections cached in the connection-pool.
int maxConnPerConnectKey;
std::map<RESTConnectionPoolKey, std::queue<ReusableConnection>> connectionPoolMap;
RESTConnectionPool(const int maxConnsPerKey) : maxConnPerConnectKey(maxConnsPerKey) {}
// Routine is responsible to provide an usable TCP connection object; it reuses an active connection from
// connection-pool if availalbe, otherwise, establish a new TCP connection
Future<ReusableConnection> connect(RESTConnectionPoolKey connectKey, const bool isSecure, const int maxConnLife);
void returnConnection(RESTConnectionPoolKey connectKey, ReusableConnection& conn, const int maxConnections);
static RESTConnectionPoolKey getConnectionPoolKey(const std::string& host, const std::string& service) {
return std::make_pair(host, service);
}
};
// Util interface facilitating management and update for RESTClient knob parameters
struct RESTClientKnobs {
int connection_pool_size, secure_connection, connect_timeout, connect_tries, max_connection_life, request_tries,
request_timeout_secs;
constexpr static int SECURE_CONNECTION = 1;
constexpr static int NOT_SECURE_CONNECTION = 0;
RESTClientKnobs();
void set(const std::unordered_map<std::string, int>& knobSettings);
std::unordered_map<std::string, int> get() const;
std::unordered_map<std::string, int*> knobMap;
static std::vector<std::string> getKnobDescriptions() {
return {
"connection_pool_size (pz) Maximum numbers of active connections in the connection-pool",
"secure_connection (or sc) Set 1 for secure connection and 0 for insecure connection.",
"connect_tries (or ct) Number of times to try to connect for each request.",
"connect_timeout (or cto) Number of seconds to wait for a connect request to succeed.",
"max_connection_life (or mcl) Maximum number of seconds to use a single TCP connection.",
"request_tries (or rt) Number of times to try each request until a parsable HTTP "
"response other than 429 is received.",
"request_timeout_secs (or rtom) Number of seconds to wait for a request to succeed after a "
"connection is established.",
};
}
};
// Util interface facilitating parsing of an input REST 'full_url'
struct RESTUrl {
public:
// Connection resources - host and port details
std::string host;
std::string service;
// resource identified by URI
std::string resource;
// optional REST request parameters
std::string reqParameters;
// Request 'body' payload
std::string body;
explicit RESTUrl(const std::string& fullUrl, const bool isSecure);
explicit RESTUrl(const std::string& fullUrl, const std::string& body, const bool isSecure);
private:
void parseUrl(const std::string& fullUrl, bool isSecure);
};
#endif

View File

@ -72,6 +72,20 @@ Future<REPLY_TYPE(Req)> retryBrokenPromise(RequestStream<Req, P> to, Req request
}
}
ACTOR template <class Req>
Future<Void> tryInitializeRequestStream(RequestStream<Req>* stream, Hostname hostname, WellKnownEndpoints token) {
Optional<NetworkAddress> address = wait(hostname.resolve());
if (!address.present()) {
return Void();
}
if (stream == nullptr) {
stream = new RequestStream<Req>(Endpoint::wellKnown({ address.get() }, token));
} else {
*stream = RequestStream<Req>(Endpoint::wellKnown({ address.get() }, token));
}
return Void();
}
ACTOR template <class Req>
Future<ErrorOr<REPLY_TYPE(Req)>> tryGetReplyFromHostname(Req request, Hostname hostname, WellKnownEndpoints token) {
// A wrapper of tryGetReply(request), except that the request is sent to an address resolved from a hostname.

View File

@ -1110,10 +1110,10 @@ ACTOR Future<Void> registerWorker(RegisterWorkerRequest req,
newPriorityInfo.processClassFitness = newProcessClass.machineClassFitness(ProcessClass::ClusterController);
bool isCoordinator =
(std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), req.wi.address()) !=
(std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), w.address()) !=
coordinatorAddresses.end()) ||
(req.wi.secondaryAddress().present() &&
std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), req.wi.secondaryAddress().get()) !=
(w.secondaryAddress().present() &&
std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), w.secondaryAddress().get()) !=
coordinatorAddresses.end());
for (auto it : req.incompatiblePeers) {
@ -1933,8 +1933,24 @@ ACTOR Future<Void> handleForcedRecoveries(ClusterControllerData* self, ClusterCo
}
}
ACTOR Future<Void> startDataDistributor(ClusterControllerData* self) {
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
struct SingletonRecruitThrottler {
double lastRecruitStart;
SingletonRecruitThrottler() : lastRecruitStart(-1) {}
double newRecruitment() {
double n = now();
double waitTime =
std::max(0.0, (lastRecruitStart + SERVER_KNOBS->CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL - n));
lastRecruitStart = n;
return waitTime;
}
};
ACTOR Future<Void> startDataDistributor(ClusterControllerData* self, double waitTime) {
// If master fails at the same time, give it a chance to clear master PID.
// Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
TraceEvent("CCStartDataDistributor", self->id).log();
loop {
@ -2003,6 +2019,7 @@ ACTOR Future<Void> startDataDistributor(ClusterControllerData* self) {
}
ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
wait(self->db.serverInfo->onChange());
}
@ -2019,13 +2036,15 @@ ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
when(wait(self->recruitDistributor.onChange())) {}
}
} else {
wait(startDataDistributor(self));
wait(startDataDistributor(self, recruitThrottler.newRecruitment()));
}
}
}
ACTOR Future<Void> startRatekeeper(ClusterControllerData* self) {
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
ACTOR Future<Void> startRatekeeper(ClusterControllerData* self, double waitTime) {
// If master fails at the same time, give it a chance to clear master PID.
// Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
TraceEvent("CCStartRatekeeper", self->id).log();
loop {
@ -2091,6 +2110,7 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData* self) {
}
ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
wait(self->db.serverInfo->onChange());
}
@ -2107,34 +2127,15 @@ ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
when(wait(self->recruitRatekeeper.onChange())) {}
}
} else {
wait(startRatekeeper(self));
wait(startRatekeeper(self, recruitThrottler.newRecruitment()));
}
}
}
// Acquires the BM lock by getting the next epoch no.
ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
loop {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
try {
Optional<Value> oldEpoch = wait(tr->get(blobManagerEpochKey));
state int64_t newEpoch = oldEpoch.present() ? decodeBlobManagerEpochValue(oldEpoch.get()) + 1 : 1;
tr->set(blobManagerEpochKey, blobManagerEpochValueFor(newEpoch));
wait(tr->commit());
TraceEvent(SevDebug, "CCNextBlobManagerEpoch", self->id).detail("Epoch", newEpoch);
return newEpoch;
} catch (Error& e) {
wait(tr->onError(e));
}
}
}
ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self) {
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self, double waitTime) {
// If master fails at the same time, give it a chance to clear master PID.
// Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
TraceEvent("CCEKP_Start", self->id).log();
loop {
@ -2208,6 +2209,7 @@ ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self) {
}
ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
loop {
if (self->db.serverInfo->get().encryptKeyProxy.present() && !self->recruitEncryptKeyProxy.get()) {
choose {
@ -2219,13 +2221,36 @@ ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) {
when(wait(self->recruitEncryptKeyProxy.onChange())) {}
}
} else {
wait(startEncryptKeyProxy(self));
wait(startEncryptKeyProxy(self, recruitThrottler.newRecruitment()));
}
}
}
ACTOR Future<Void> startBlobManager(ClusterControllerData* self) {
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
// Acquires the BM lock by getting the next epoch no.
ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
loop {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
try {
Optional<Value> oldEpoch = wait(tr->get(blobManagerEpochKey));
state int64_t newEpoch = oldEpoch.present() ? decodeBlobManagerEpochValue(oldEpoch.get()) + 1 : 1;
tr->set(blobManagerEpochKey, blobManagerEpochValueFor(newEpoch));
wait(tr->commit());
TraceEvent(SevDebug, "CCNextBlobManagerEpoch", self->id).detail("Epoch", newEpoch);
return newEpoch;
} catch (Error& e) {
wait(tr->onError(e));
}
}
}
ACTOR Future<Void> startBlobManager(ClusterControllerData* self, double waitTime) {
// If master fails at the same time, give it a chance to clear master PID.
// Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
TraceEvent("CCStartBlobManager", self->id).log();
loop {
@ -2322,6 +2347,7 @@ ACTOR Future<Void> watchBlobGranulesConfigKey(ClusterControllerData* self) {
}
ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
wait(self->db.serverInfo->onChange());
}
@ -2352,7 +2378,7 @@ ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
}
} else if (self->db.blobGranulesEnabled.get()) {
// if there is no blob manager present but blob granules are now enabled, recruit a BM
wait(startBlobManager(self));
wait(startBlobManager(self, recruitThrottler.newRecruitment()));
} else {
// if there is no blob manager present and blob granules are disabled, wait for a config change
wait(self->db.blobGranulesEnabled.onChange());
@ -2481,12 +2507,11 @@ ACTOR Future<Void> workerHealthMonitor(ClusterControllerData* self) {
}
}
ACTOR Future<Void> clusterControllerCore(Reference<IClusterConnectionRecord> connRecord,
ClusterControllerFullInterface interf,
ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
Future<Void> leaderFail,
ServerCoordinators coordinators,
LocalityData locality,
ConfigDBType configDBType) {
state ServerCoordinators coordinators(connRecord);
state ClusterControllerData self(interf, locality, coordinators);
state ConfigBroadcaster configBroadcaster(coordinators, configDBType);
state Future<Void> coordinationPingDelay = delay(SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY);
@ -2621,7 +2646,7 @@ ACTOR Future<Void> replaceInterface(ClusterControllerFullInterface interf) {
}
}
ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRecord,
ACTOR Future<Void> clusterController(ServerCoordinators coordinators,
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC,
bool hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
@ -2632,10 +2657,9 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
state bool inRole = false;
cci.initEndpoints();
try {
wait(connRecord->resolveHostnames());
// Register as a possible leader; wait to be elected
state Future<Void> leaderFail =
tryBecomeLeader(connRecord, cci, currentCC, hasConnected, asyncPriorityInfo);
tryBecomeLeader(coordinators, cci, currentCC, hasConnected, asyncPriorityInfo);
state Future<Void> shouldReplace = replaceInterface(cci);
while (!currentCC->get().present() || currentCC->get().get() != cci) {
@ -2654,7 +2678,7 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
startRole(Role::CLUSTER_CONTROLLER, cci.id(), UID());
inRole = true;
wait(clusterControllerCore(connRecord, cci, leaderFail, locality, configDBType));
wait(clusterControllerCore(cci, leaderFail, coordinators, locality, configDBType));
}
} catch (Error& e) {
if (inRole)
@ -2683,7 +2707,8 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
state bool hasConnected = false;
loop {
try {
wait(clusterController(connRecord, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType));
ServerCoordinators coordinators(connRecord);
wait(clusterController(coordinators, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType));
hasConnected = true;
} catch (Error& e) {
if (e.code() != error_code_coordinators_changed)

View File

@ -537,8 +537,7 @@ ACTOR Future<Void> changeCoordinators(Reference<ClusterRecoveryData> self) {
}
try {
state ClusterConnectionString conn(changeCoordinatorsRequest.newConnectionString.toString());
wait(conn.resolveHostnames());
ClusterConnectionString conn(changeCoordinatorsRequest.newConnectionString.toString());
wait(self->cstate.move(conn));
} catch (Error& e) {
if (e.code() != error_code_actor_cancelled)

View File

@ -236,6 +236,105 @@ struct ResolutionRequestBuilder {
}
};
ErrorOr<Optional<TenantMapEntry>> getTenantEntry(ProxyCommitData* commitData,
Optional<TenantNameRef> tenant,
Optional<int64_t> tenantId,
bool logOnFailure) {
if (tenant.present()) {
auto itr = commitData->tenantMap.find(tenant.get());
if (itr == commitData->tenantMap.end()) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyUnknownTenant", commitData->dbgid).detail("Tenant", tenant.get());
}
return unknown_tenant();
} else if (tenantId.present() && tenantId.get() != itr->second.id) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyTenantIdMismatch", commitData->dbgid)
.detail("Tenant", tenant.get())
.detail("TenantId", tenantId)
.detail("ExistingId", itr->second.id);
}
return unknown_tenant();
}
return ErrorOr<Optional<TenantMapEntry>>(Optional<TenantMapEntry>(itr->second));
}
return Optional<TenantMapEntry>();
}
bool verifyTenantPrefix(ProxyCommitData* const commitData, const CommitTransactionRequest& req) {
ErrorOr<Optional<TenantMapEntry>> tenantEntry =
getTenantEntry(commitData, req.tenantInfo.name.castTo<TenantNameRef>(), req.tenantInfo.tenantId, true);
if (tenantEntry.isError()) {
return true;
}
if (tenantEntry.get().present()) {
Key tenantPrefix = tenantEntry.get().get().prefix;
for (auto& m : req.transaction.mutations) {
if (m.param1 != metadataVersionKey) {
if (!m.param1.startsWith(tenantPrefix)) {
TraceEvent(SevWarnAlways, "TenantPrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("Key", m.param1.toHexString());
return false;
}
if (m.type == MutationRef::ClearRange && !m.param2.startsWith(tenantPrefix)) {
TraceEvent(SevWarnAlways, "TenantClearRangePrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("Key", m.param2.toHexString());
return false;
} else if (m.type == MutationRef::SetVersionstampedKey) {
ASSERT(m.param1.size() >= 4);
uint8_t* key = const_cast<uint8_t*>(m.param1.begin());
int* offset = reinterpret_cast<int*>(&key[m.param1.size() - 4]);
if (*offset < tenantPrefix.size()) {
TraceEvent(SevWarnAlways, "TenantVersionstampInvalidOffset")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("Key", m.param1.toHexString())
.detail("Offset", *offset);
return false;
}
}
}
}
for (auto& rc : req.transaction.read_conflict_ranges) {
if (rc.begin != metadataVersionKey &&
(!rc.begin.startsWith(tenantPrefix) || !rc.end.startsWith(tenantPrefix))) {
TraceEvent(SevWarnAlways, "TenantReadConflictPrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("BeginKey", rc.begin.toHexString())
.detail("EndKey", rc.end.toHexString());
return false;
}
}
for (auto& wc : req.transaction.write_conflict_ranges) {
if (wc.begin != metadataVersionKey &&
(!wc.begin.startsWith(tenantPrefix) || !wc.end.startsWith(tenantPrefix))) {
TraceEvent(SevWarnAlways, "TenantWriteConflictPrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("BeginKey", wc.begin.toHexString())
.detail("EndKey", wc.end.toHexString());
return false;
}
}
}
return true;
}
ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData,
PromiseStream<std::pair<std::vector<CommitTransactionRequest>, int>> out,
FutureStream<CommitTransactionRequest> in,
@ -282,6 +381,13 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData,
.detail("Size", bytes)
.detail("Client", req.reply.getEndpoint().getPrimaryAddress());
}
if (!verifyTenantPrefix(commitData, req)) {
++commitData->stats.txnCommitErrors;
req.reply.sendError(illegal_tenant_access());
continue;
}
++commitData->stats.txnCommitIn;
if (req.debugID.present()) {
@ -450,35 +556,6 @@ ACTOR static Future<ResolveTransactionBatchReply> trackResolutionMetrics(Referen
return reply;
}
ErrorOr<Optional<TenantMapEntry>> getTenantEntry(ProxyCommitData* commitData,
Optional<TenantNameRef> tenant,
Optional<int64_t> tenantId,
bool logOnFailure) {
if (tenant.present()) {
auto itr = commitData->tenantMap.find(tenant.get());
if (itr == commitData->tenantMap.end()) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyUnknownTenant", commitData->dbgid).detail("Tenant", tenant.get());
}
return unknown_tenant();
} else if (tenantId.present() && tenantId.get() != itr->second.id) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyTenantIdMismatch", commitData->dbgid)
.detail("Tenant", tenant.get())
.detail("TenantId", tenantId)
.detail("ExistingId", itr->second.id);
}
return unknown_tenant();
}
return ErrorOr<Optional<TenantMapEntry>>(Optional<TenantMapEntry>(itr->second));
}
return Optional<TenantMapEntry>();
}
namespace CommitBatch {
struct CommitBatchContext {
@ -685,6 +762,11 @@ bool canReject(const std::vector<CommitTransactionRequest>& trs) {
return true;
}
double computeReleaseDelay(CommitBatchContext* self, double latencyBucket) {
return std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE,
self->batchOperations * self->pProxyCommitData->commitComputePerOperation[latencyBucket]);
}
ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
state ProxyCommitData* const pProxyCommitData = self->pProxyCommitData;
@ -708,6 +790,7 @@ ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
// Pre-resolution the commits
TEST(pProxyCommitData->latestLocalCommitBatchResolving.get() < localBatchNumber - 1); // Wait for local batch
wait(pProxyCommitData->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber - 1));
pProxyCommitData->stats.computeLatency.addMeasurement(now() - timeStart);
double queuingDelay = g_network->now() - timeStart;
pProxyCommitData->stats.commitBatchQueuingDist->sampleSeconds(queuingDelay);
if ((queuingDelay > (double)SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS / SERVER_KNOBS->VERSIONS_PER_SECOND ||
@ -736,10 +819,7 @@ ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
return Void();
}
self->releaseDelay =
delay(std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE,
self->batchOperations * pProxyCommitData->commitComputePerOperation[latencyBucket]),
TaskPriority::ProxyMasterVersionReply);
self->releaseDelay = delay(computeReleaseDelay(self, latencyBucket), TaskPriority::ProxyMasterVersionReply);
if (debugID.present()) {
g_traceBatch.addEvent(
@ -1385,8 +1465,10 @@ ACTOR Future<Void> postResolution(CommitBatchContext* self) {
self->computeDuration += g_network->timer() - self->computeStart;
if (self->batchOperations > 0) {
double estimatedDelay = computeReleaseDelay(self, self->latencyBucket);
double computePerOperation =
std::min(SERVER_KNOBS->MAX_COMPUTE_PER_OPERATION, self->computeDuration / self->batchOperations);
if (computePerOperation <= pProxyCommitData->commitComputePerOperation[self->latencyBucket]) {
pProxyCommitData->commitComputePerOperation[self->latencyBucket] = computePerOperation;
} else {
@ -1401,6 +1483,20 @@ ACTOR Future<Void> postResolution(CommitBatchContext* self) {
pProxyCommitData->stats.minComputeNS =
std::min<int64_t>(pProxyCommitData->stats.minComputeNS,
1e9 * pProxyCommitData->commitComputePerOperation[self->latencyBucket]);
if (estimatedDelay >= SERVER_KNOBS->MAX_COMPUTE_DURATION_LOG_CUTOFF ||
self->computeDuration >= SERVER_KNOBS->MAX_COMPUTE_DURATION_LOG_CUTOFF) {
TraceEvent(SevInfo, "LongComputeDuration", pProxyCommitData->dbgid)
.suppressFor(10.0)
.detail("EstimatedComputeDuration", estimatedDelay)
.detail("ComputeDuration", self->computeDuration)
.detail("ComputePerOperation", computePerOperation)
.detail("LatencyBucket", self->latencyBucket)
.detail("UpdatedComputePerOperationEstimate",
pProxyCommitData->commitComputePerOperation[self->latencyBucket])
.detail("BatchBytes", self->batchBytes)
.detail("BatchOperations", self->batchOperations);
}
}
pProxyCommitData->stats.processingMutationDist->sampleSeconds(now() - postResolutionQueuing);

View File

@ -26,21 +26,29 @@
#include "fdbserver/LeaderElection.h"
#include "flow/actorcompiler.h" // has to be last include
ACTOR Future<GenerationRegReadReply> waitAndSendRead(RequestStream<GenerationRegReadRequest> to,
GenerationRegReadRequest req) {
ACTOR Future<GenerationRegReadReply> waitAndSendRead(GenerationRegInterface stateServer, GenerationRegReadRequest req) {
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
state GenerationRegReadReply reply = wait(retryBrokenPromise(to, req));
state GenerationRegReadReply reply;
if (stateServer.hostname.present()) {
wait(store(reply, retryGetReplyFromHostname(req, stateServer.hostname.get(), WLTOKEN_GENERATIONREG_READ)));
} else {
wait(store(reply, retryBrokenPromise(stateServer.read, req)));
}
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
return reply;
}
ACTOR Future<UniqueGeneration> waitAndSendWrite(RequestStream<GenerationRegWriteRequest> to,
GenerationRegWriteRequest req) {
ACTOR Future<UniqueGeneration> waitAndSendWrite(GenerationRegInterface stateServer, GenerationRegWriteRequest req) {
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
state UniqueGeneration reply = wait(retryBrokenPromise(to, req));
state UniqueGeneration reply;
if (stateServer.hostname.present()) {
wait(store(reply, retryGetReplyFromHostname(req, stateServer.hostname.get(), WLTOKEN_GENERATIONREG_WRITE)));
} else {
wait(store(reply, retryBrokenPromise(stateServer.write, req)));
}
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
return reply;
@ -152,7 +160,7 @@ struct CoordinatedStateImpl {
state std::vector<Future<GenerationRegReadReply>> rep_reply;
for (int i = 0; i < replicas.size(); i++) {
Future<GenerationRegReadReply> reply =
waitAndSendRead(replicas[i].read, GenerationRegReadRequest(req.key, req.gen));
waitAndSendRead(replicas[i], GenerationRegReadRequest(req.key, req.gen));
rep_empty_reply.push_back(nonemptyToNever(reply));
rep_reply.push_back(emptyToNever(reply));
self->ac.add(success(reply));
@ -192,8 +200,7 @@ struct CoordinatedStateImpl {
state std::vector<GenerationRegInterface>& replicas = self->coordinators.stateServers;
state std::vector<Future<UniqueGeneration>> wrep_reply;
for (int i = 0; i < replicas.size(); i++) {
Future<UniqueGeneration> reply =
waitAndSendWrite(replicas[i].write, GenerationRegWriteRequest(req.kv, req.gen));
Future<UniqueGeneration> reply = waitAndSendWrite(replicas[i], GenerationRegWriteRequest(req.kv, req.gen));
wrep_reply.push_back(reply);
self->ac.add(success(reply));
}

View File

@ -98,12 +98,16 @@ LeaderElectionRegInterface::LeaderElectionRegInterface(INetwork* local) : Client
}
ServerCoordinators::ServerCoordinators(Reference<IClusterConnectionRecord> ccr) : ClientCoordinators(ccr) {
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
ClusterConnectionString cs = ccr->getConnectionString();
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s) {
leaderElectionServers.emplace_back(*s);
stateServers.emplace_back(*s);
configServers.emplace_back(*s);
for (auto h : cs.hostnames) {
leaderElectionServers.emplace_back(h);
stateServers.emplace_back(h);
configServers.emplace_back(h);
}
for (auto s : cs.coordinators()) {
leaderElectionServers.emplace_back(s);
stateServers.emplace_back(s);
configServers.emplace_back(s);
}
}
@ -208,10 +212,8 @@ ACTOR Future<Void> openDatabase(ClientData* db,
int* clientCount,
Reference<AsyncVar<bool>> hasConnectedClients,
OpenDatabaseCoordRequest req,
Future<Void> checkStuck,
Reference<AsyncVar<Void>> coordinatorsChanged) {
Future<Void> checkStuck) {
state ErrorOr<CachedSerialization<ClientDBInfo>> replyContents;
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
state Future<Void> clientInfoOnChange = db->clientInfo->onChange();
++(*clientCount);
@ -233,11 +235,6 @@ ACTOR Future<Void> openDatabase(ClientData* db,
clientInfoOnChange = db->clientInfo->onChange();
replyContents = db->clientInfo->get();
}
when(wait(coordinatorsChangedOnChange)) {
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
replyContents = coordinators_changed();
break;
}
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) {
if (db->clientInfo->get().read().id.isValid()) {
replyContents = db->clientInfo->get();
@ -268,10 +265,7 @@ ACTOR Future<Void> openDatabase(ClientData* db,
ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
Reference<AsyncVar<bool>> hasConnectedClients,
Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader,
ElectionResultRequest req,
Reference<AsyncVar<Void>> coordinatorsChanged) {
state bool coordinatorsChangeDetected = false;
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
ElectionResultRequest req) {
state Future<Void> currentElectedLeaderOnChange = currentElectedLeader->onChange();
++(*clientCount);
hasConnectedClients->set(true);
@ -281,20 +275,11 @@ ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
when(wait(yieldedFuture(currentElectedLeaderOnChange))) {
currentElectedLeaderOnChange = currentElectedLeader->onChange();
}
when(wait(coordinatorsChangedOnChange)) {
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
coordinatorsChangeDetected = true;
break;
}
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) { break; }
}
}
if (coordinatorsChangeDetected) {
req.reply.sendError(coordinators_changed());
} else {
req.reply.send(currentElectedLeader->get());
}
if (--(*clientCount) == 0) {
hasConnectedClients->set(false);
@ -325,8 +310,6 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
state Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader =
makeReference<AsyncVar<Optional<LeaderInfo>>>();
state LivenessChecker canConnectToLeader(SERVER_KNOBS->COORDINATOR_LEADER_CONNECTION_TIMEOUT);
state Reference<AsyncVar<Void>> coordinatorsChanged = makeReference<AsyncVar<Void>>();
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
state Future<Void> hasConnectedClientsOnChange = hasConnectedClients->onChange();
loop choose {
@ -338,14 +321,10 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
} else {
if (!leaderMon.isValid()) {
leaderMon = monitorLeaderAndGetClientInfo(
req.clusterKey, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged);
req.clusterKey, req.hostnames, req.coordinators, &clientData, currentElectedLeader);
}
actors.add(openDatabase(&clientData,
&clientCount,
hasConnectedClients,
req,
canConnectToLeader.checkStuck(),
coordinatorsChanged));
actors.add(
openDatabase(&clientData, &clientCount, hasConnectedClients, req, canConnectToLeader.checkStuck()));
}
}
when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) {
@ -355,10 +334,9 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
} else {
if (!leaderMon.isValid()) {
leaderMon = monitorLeaderAndGetClientInfo(
req.key, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged);
req.key, req.hostnames, req.coordinators, &clientData, currentElectedLeader);
}
actors.add(remoteMonitorLeader(
&clientCount, hasConnectedClients, currentElectedLeader, req, coordinatorsChanged));
actors.add(remoteMonitorLeader(&clientCount, hasConnectedClients, currentElectedLeader, req));
}
}
when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) {
@ -499,10 +477,6 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
}
}
when(wait(actors.getResult())) {}
when(wait(coordinatorsChangedOnChange)) {
leaderMon = Future<Void>();
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
}
}
}

View File

@ -153,17 +153,21 @@ struct CandidacyRequest {
struct ElectionResultRequest {
constexpr static FileIdentifier file_identifier = 11815465;
Key key;
std::vector<Hostname> hostnames;
std::vector<NetworkAddress> coordinators;
UID knownLeader;
ReplyPromise<Optional<LeaderInfo>> reply;
ElectionResultRequest() = default;
ElectionResultRequest(Key key, std::vector<NetworkAddress> coordinators, UID knownLeader)
: key(key), coordinators(std::move(coordinators)), knownLeader(knownLeader) {}
ElectionResultRequest(Key key,
std::vector<Hostname> hostnames,
std::vector<NetworkAddress> coordinators,
UID knownLeader)
: key(key), hostnames(std::move(hostnames)), coordinators(std::move(coordinators)), knownLeader(knownLeader) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, key, coordinators, knownLeader, reply);
serializer(ar, key, hostnames, coordinators, knownLeader, reply);
}
};

View File

@ -726,6 +726,7 @@ public:
// .detail("LastAnyUndesired", lastAnyUndesired)
// .detail("AnyWrongConfiguration", anyWrongConfiguration)
// .detail("LastWrongConfiguration", lastWrongConfiguration)
// .detail("ContainsWigglingServer", anyWigglingServer)
// .detail("Recheck", recheck)
// .detail("BadTeam", badTeam)
// .detail("LastZeroHealthy", lastZeroHealthy)
@ -1103,9 +1104,8 @@ public:
if (worstStatus == DDTeamCollection::Status::WIGGLING && invalidWiggleServer(worstAddr, self, server)) {
TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId)
.detail("Address", worstAddr.toString())
.detail("ProcessId", server->getLastKnownInterface().locality.processId())
.detail("WigglingId", self->wigglingId.present());
self->excludedServers.set(worstAddr, DDTeamCollection::Status::NONE);
.detail("ServerId", server->getId())
.detail("WigglingId", self->wigglingId.present() ? self->wigglingId.get().toString() : "");
worstStatus = DDTeamCollection::Status::NONE;
}
otherChanges.push_back(self->excludedServers.onChange(worstAddr));
@ -1127,10 +1127,9 @@ public:
if (testStatus == DDTeamCollection::Status::WIGGLING &&
invalidWiggleServer(testAddr, self, server)) {
TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId)
.detail("Address", testAddr.toString())
.detail("ProcessId", server->getLastKnownInterface().locality.processId())
.detail("ValidWigglingId", self->wigglingId.present());
self->excludedServers.set(testAddr, DDTeamCollection::Status::NONE);
.detail("Address", worstAddr.toString())
.detail("ServerId", server->getId())
.detail("WigglingId", self->wigglingId.present() ? self->wigglingId.get().toString() : "");
testStatus = DDTeamCollection::Status::NONE;
}
@ -2052,7 +2051,7 @@ public:
"PerpetualStorageWigglePause",
self->distributorId)
.detail("Primary", self->primary)
.detail("ProcessId", id)
.detail("ServerId", id)
.detail("BestTeamKeepStuckCount", self->bestTeamKeepStuckCount)
.detail("ExtraHealthyTeamCount", extraTeamCount)
.detail("HealthyTeamCount", self->healthyTeamCount);
@ -2065,7 +2064,7 @@ public:
moveFinishFuture = fv;
TraceEvent("PerpetualStorageWiggleStart", self->distributorId)
.detail("Primary", self->primary)
.detail("ProcessId", id)
.detail("ServerId", id)
.detail("ExtraHealthyTeamCount", extraTeamCount)
.detail("HealthyTeamCount", self->healthyTeamCount);
}
@ -2091,7 +2090,7 @@ public:
self->includeStorageServersForWiggle();
TraceEvent("PerpetualStorageWiggleFinish", self->distributorId)
.detail("Primary", self->primary)
.detail("ProcessId", self->wigglingId.get());
.detail("ServerId", self->wigglingId.get());
wait(self->eraseStorageWiggleMap(&metadataMap, self->wigglingId.get()) &&
self->storageWiggler->finishWiggle());
@ -2112,7 +2111,7 @@ public:
self->includeStorageServersForWiggle();
TraceEvent("PerpetualStorageWiggleExitingPause", self->distributorId)
.detail("Primary", self->primary)
.detail("ProcessId", self->wigglingId.get());
.detail("ServerId", self->wigglingId.get());
self->wigglingId.reset();
}

View File

@ -27,44 +27,29 @@
// Keep trying to become a leader by submitting itself to all coordinators.
// Monitor the health of all coordinators at the same time.
// Note: for coordinators whose NetworkAddress is parsed out of a hostname, a connection failure will cause this actor
// to throw `coordinators_changed()` error
ACTOR Future<Void> submitCandidacy(Key key,
LeaderElectionRegInterface coord,
LeaderInfo myInfo,
UID prevChangeID,
AsyncTrigger* nomineeChange,
Optional<LeaderInfo>* nominee,
Optional<Hostname> hostname = Optional<Hostname>()) {
Optional<LeaderInfo>* nominee) {
loop {
state Optional<LeaderInfo> li;
if (coord.candidacy.getEndpoint().getPrimaryAddress().fromHostname) {
state ErrorOr<Optional<LeaderInfo>> rep = wait(coord.candidacy.tryGetReply(
if (coord.hostname.present()) {
wait(store(
li,
retryGetReplyFromHostname(
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
TaskPriority::CoordinationReply));
if (rep.isError()) {
// Connecting to nominee failed, most likely due to connection failed.
TraceEvent("SubmitCandadicyError")
.error(rep.getError())
.detail("Hostname", hostname.present() ? hostname.get().toString() : "UnknownHostname")
.detail("OldAddr", coord.candidacy.getEndpoint().getPrimaryAddress().toString());
if (rep.getError().code() == error_code_request_maybe_delivered) {
// Delay to prevent tight resolving loop due to outdated DNS cache
wait(delay(FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL));
throw coordinators_changed();
coord.hostname.get(),
WLTOKEN_LEADERELECTIONREG_CANDIDACY,
TaskPriority::CoordinationReply)));
} else {
throw rep.getError();
}
} else if (rep.present()) {
li = rep.get();
}
} else {
Optional<LeaderInfo> tmp = wait(retryBrokenPromise(
wait(store(
li,
retryBrokenPromise(
coord.candidacy,
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
TaskPriority::CoordinationReply));
li = tmp;
TaskPriority::CoordinationReply)));
}
wait(Future<Void>(Void())); // Make sure we weren't cancelled
@ -104,20 +89,26 @@ Future<Void> buggifyDelayedAsyncVar(Reference<AsyncVar<T>>& var) {
ACTOR Future<Void> changeLeaderCoordinators(ServerCoordinators coordinators, Value forwardingInfo) {
std::vector<Future<Void>> forwardRequests;
forwardRequests.reserve(coordinators.leaderElectionServers.size());
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++)
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
if (coordinators.leaderElectionServers[i].hostname.present()) {
forwardRequests.push_back(retryGetReplyFromHostname(ForwardRequest(coordinators.clusterKey, forwardingInfo),
coordinators.leaderElectionServers[i].hostname.get(),
WLTOKEN_LEADERELECTIONREG_FORWARD));
} else {
forwardRequests.push_back(retryBrokenPromise(coordinators.leaderElectionServers[i].forward,
ForwardRequest(coordinators.clusterKey, forwardingInfo)));
}
}
int quorum_size = forwardRequests.size() / 2 + 1;
wait(quorum(forwardRequests, quorum_size));
return Void();
}
ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> connRecord,
ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators,
Value proposedSerializedInterface,
Reference<AsyncVar<Value>> outSerializedLeader,
bool hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo) {
state ServerCoordinators coordinators(connRecord);
state AsyncTrigger nomineeChange;
state std::vector<Optional<LeaderInfo>> nominees;
state LeaderInfo myInfo;
@ -134,6 +125,8 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
wait(delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY));
}
nominees.resize(coordinators.leaderElectionServers.size());
myInfo.serializedInfo = proposedSerializedInterface;
outSerializedLeader->set(Value());
@ -141,9 +134,6 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
(SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY) ? buggifyDelayedAsyncVar(outSerializedLeader) : Void();
while (!iAmLeader) {
wait(connRecord->resolveHostnames());
coordinators = ServerCoordinators(connRecord);
nominees.resize(coordinators.leaderElectionServers.size());
state Future<Void> badCandidateTimeout;
myInfo.changeID = deterministicRandom()->randomUniqueID();
@ -153,19 +143,12 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
std::vector<Future<Void>> cand;
cand.reserve(coordinators.leaderElectionServers.size());
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
Optional<Hostname> hostname;
auto r = connRecord->getConnectionString().networkAddressToHostname.find(
coordinators.leaderElectionServers[i].candidacy.getEndpoint().getPrimaryAddress());
if (r != connRecord->getConnectionString().networkAddressToHostname.end()) {
hostname = r->second;
}
cand.push_back(submitCandidacy(coordinators.clusterKey,
coordinators.leaderElectionServers[i],
myInfo,
prevChangeID,
&nomineeChange,
&nominees[i],
hostname));
&nominees[i]));
}
candidacies = waitForAll(cand);
@ -220,7 +203,6 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
} else
badCandidateTimeout = Future<Void>();
try {
choose {
when(wait(nomineeChange.onTrigger())) {}
when(wait(badCandidateTimeout.isValid() ? badCandidateTimeout : Never())) {
@ -231,14 +213,6 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
when(wait(candidacies)) { ASSERT(false); }
when(wait(asyncPriorityInfo->onChange())) { break; }
}
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
connRecord->getConnectionString().resetToUnresolved();
break;
} else {
throw e;
}
}
}
candidacies.cancel();
@ -258,10 +232,17 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
state std::vector<Future<Void>> true_heartbeats;
state std::vector<Future<Void>> false_heartbeats;
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
Future<LeaderHeartbeatReply> hb =
retryBrokenPromise(coordinators.leaderElectionServers[i].leaderHeartbeat,
Future<LeaderHeartbeatReply> hb;
if (coordinators.leaderElectionServers[i].hostname.present()) {
hb = retryGetReplyFromHostname(LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID),
coordinators.leaderElectionServers[i].hostname.get(),
WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT,
TaskPriority::CoordinationReply);
} else {
hb = retryBrokenPromise(coordinators.leaderElectionServers[i].leaderHeartbeat,
LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID),
TaskPriority::CoordinationReply);
}
true_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ true }));
false_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ false }));
}

View File

@ -37,7 +37,7 @@ class ServerCoordinators;
// eventually be set. If the return value is cancelled, the candidacy or leadership of the proposedInterface
// will eventually end.
template <class LeaderInterface>
Future<Void> tryBecomeLeader(Reference<IClusterConnectionRecord> const& connRecord,
Future<Void> tryBecomeLeader(ServerCoordinators const& coordinators,
LeaderInterface const& proposedInterface,
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader,
bool hasConnected,
@ -50,20 +50,20 @@ Future<Void> changeLeaderCoordinators(ServerCoordinators const& coordinators, Va
#pragma region Implementation
#endif // __INTEL_COMPILER
Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> const& connRecord,
Future<Void> tryBecomeLeaderInternal(ServerCoordinators const& coordinators,
Value const& proposedSerializedInterface,
Reference<AsyncVar<Value>> const& outSerializedLeader,
bool const& hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo);
template <class LeaderInterface>
Future<Void> tryBecomeLeader(Reference<IClusterConnectionRecord> const& connRecord,
Future<Void> tryBecomeLeader(ServerCoordinators const& coordinators,
LeaderInterface const& proposedInterface,
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader,
bool hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo) {
auto serializedInfo = makeReference<AsyncVar<Value>>();
Future<Void> m = tryBecomeLeaderInternal(connRecord,
Future<Void> m = tryBecomeLeaderInternal(coordinators,
ObjectWriter::toValue(proposedInterface, IncludeVersion()),
serializedInfo,
hasConnected,

View File

@ -99,8 +99,17 @@ class GetCommittedVersionQuorum {
// Now roll node forward to match the largest committed version of
// the replies.
state Reference<ConfigFollowerInfo> quorumCfi(new ConfigFollowerInfo(self->replies[target]));
try {
state std::vector<ConfigFollowerInterface> interfs = self->replies[target];
std::vector<Future<Void>> fs;
for (ConfigFollowerInterface& interf : interfs) {
if (interf.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&interf.getChanges, interf.hostname.get(), WLTOKEN_CONFIGFOLLOWER_GETCHANGES));
}
}
wait(waitForAll(fs));
state Reference<ConfigFollowerInfo> quorumCfi(new ConfigFollowerInfo(interfs));
state Version lastSeenVersion = std::max(
rollback.present() ? rollback.get() : nodeVersion.lastCommitted, self->largestCompactedResponse);
ConfigFollowerGetChangesReply reply =
@ -108,9 +117,21 @@ class GetCommittedVersionQuorum {
&ConfigFollowerInterface::getChanges,
ConfigFollowerGetChangesRequest{ lastSeenVersion, target }),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
wait(timeoutError(cfi.rollforward.getReply(ConfigFollowerRollforwardRequest{
if (cfi.hostname.present()) {
wait(timeoutError(
retryGetReplyFromHostname(
ConfigFollowerRollforwardRequest{
rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations },
cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_ROLLFORWARD),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
} else {
wait(timeoutError(
cfi.rollforward.getReply(ConfigFollowerRollforwardRequest{
rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations }),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
}
} catch (Error& e) {
if (e.code() == error_code_transaction_too_old) {
// Seeing this trace is not necessarily a problem. There
@ -129,9 +150,18 @@ class GetCommittedVersionQuorum {
ACTOR static Future<Void> getCommittedVersionActor(GetCommittedVersionQuorum* self, ConfigFollowerInterface cfi) {
try {
ConfigFollowerGetCommittedVersionReply reply =
wait(timeoutError(cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{}),
state ConfigFollowerGetCommittedVersionReply reply;
if (cfi.hostname.present()) {
wait(timeoutError(store(reply,
retryGetReplyFromHostname(ConfigFollowerGetCommittedVersionRequest{},
cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION)),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
} else {
wait(timeoutError(
store(reply, cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{})),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
}
++self->totalRepliesReceived;
self->largestCompactedResponse = std::max(self->largestCompactedResponse, reply.lastCompacted);
@ -279,7 +309,15 @@ class PaxosConfigConsumerImpl {
std::vector<Future<Void>> compactionRequests;
compactionRequests.reserve(compactionRequests.size());
for (const auto& cfi : self->cfis) {
compactionRequests.push_back(cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
if (cfi.hostname.present()) {
compactionRequests.push_back(
retryGetReplyFromHostname(ConfigFollowerCompactRequest{ compactionVersion },
cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_COMPACT));
} else {
compactionRequests.push_back(
cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
}
}
try {
wait(timeoutError(waitForAll(compactionRequests), 1.0));
@ -294,8 +332,18 @@ class PaxosConfigConsumerImpl {
self->resetCommittedVersionQuorum(); // TODO: This seems to fix a segfault, investigate more
try {
state Version committedVersion = wait(getCommittedVersion(self));
state Reference<ConfigFollowerInfo> configNodes(
new ConfigFollowerInfo(self->getCommittedVersionQuorum.getReadReplicas()));
state std::vector<ConfigFollowerInterface> readReplicas =
self->getCommittedVersionQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigFollowerInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(&readReplica.getSnapshotAndChanges,
readReplica.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETSNAPSHOTANDCHANGES));
}
}
wait(waitForAll(fs));
state Reference<ConfigFollowerInfo> configNodes(new ConfigFollowerInfo(readReplicas));
ConfigFollowerGetSnapshotAndChangesReply reply =
wait(timeoutError(basicLoadBalance(configNodes,
&ConfigFollowerInterface::getSnapshotAndChanges,
@ -349,8 +397,18 @@ class PaxosConfigConsumerImpl {
// returned would be 1.
if (committedVersion > self->lastSeenVersion) {
ASSERT(self->getCommittedVersionQuorum.getReadReplicas().size() >= self->cfis.size() / 2 + 1);
state Reference<ConfigFollowerInfo> configNodes(
new ConfigFollowerInfo(self->getCommittedVersionQuorum.getReadReplicas()));
state std::vector<ConfigFollowerInterface> readReplicas =
self->getCommittedVersionQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigFollowerInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(&readReplica.getChanges,
readReplica.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCHANGES));
}
}
wait(waitForAll(fs));
state Reference<ConfigFollowerInfo> configNodes(new ConfigFollowerInfo(readReplicas));
ConfigFollowerGetChangesReply reply = wait(timeoutError(
basicLoadBalance(configNodes,
&ConfigFollowerInterface::getChanges,

View File

@ -73,6 +73,8 @@ struct ProxyStats {
LatencySample commitBatchingWindowSize;
LatencySample computeLatency;
Future<Void> logger;
int64_t maxComputeNS;
@ -126,6 +128,10 @@ struct ProxyStats {
id,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
computeLatency("ComputeLatency",
id,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
maxComputeNS(0), minComputeNS(1e12),
commitBatchQueuingDist(Histogram::getHistogram(LiteralStringRef("CommitProxy"),
LiteralStringRef("CommitBatchQueuing"),

View File

@ -161,9 +161,8 @@ ACTOR Future<std::vector<WorkerInterface>> getCoordWorkers(Database cx,
if (!coordinators.present()) {
throw operation_failed();
}
state ClusterConnectionString ccs(coordinators.get().toString());
wait(ccs.resolveHostnames());
std::vector<NetworkAddress> coordinatorsAddr = ccs.coordinators();
ClusterConnectionString ccs(coordinators.get().toString());
std::vector<NetworkAddress> coordinatorsAddr = wait(ccs.tryResolveHostnames());
std::set<NetworkAddress> coordinatorsAddrSet;
for (const auto& addr : coordinatorsAddr) {
TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", addr);

View File

@ -44,15 +44,29 @@ class SimpleConfigConsumerImpl {
loop {
state Version compactionVersion = self->lastSeenVersion;
wait(delayJittered(self->compactionInterval.get()));
if (self->cfi.hostname.present()) {
wait(retryGetReplyFromHostname(ConfigFollowerCompactRequest{ compactionVersion },
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_COMPACT));
} else {
wait(self->cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
}
++self->compactRequest;
broadcaster->compact(compactionVersion);
}
}
ACTOR static Future<Version> getCommittedVersion(SimpleConfigConsumerImpl* self) {
ConfigFollowerGetCommittedVersionReply committedVersionReply =
wait(self->cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{}));
state ConfigFollowerGetCommittedVersionReply committedVersionReply;
if (self->cfi.hostname.present()) {
wait(store(committedVersionReply,
retryGetReplyFromHostname(ConfigFollowerGetCommittedVersionRequest{},
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION)));
} else {
wait(store(committedVersionReply,
self->cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{})));
}
return committedVersionReply.lastCommitted;
}
@ -63,8 +77,18 @@ class SimpleConfigConsumerImpl {
state Version committedVersion = wait(getCommittedVersion(self));
ASSERT_GE(committedVersion, self->lastSeenVersion);
if (committedVersion > self->lastSeenVersion) {
ConfigFollowerGetChangesReply reply = wait(self->cfi.getChanges.getReply(
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion }));
state ConfigFollowerGetChangesReply reply;
if (self->cfi.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion },
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCHANGES)));
} else {
wait(store(reply,
self->cfi.getChanges.getReply(
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion })));
}
++self->successfulChangeRequest;
for (const auto& versionedMutation : reply.changes) {
TraceEvent te(SevDebug, "ConsumerFetchedMutation", self->id);
@ -96,8 +120,17 @@ class SimpleConfigConsumerImpl {
ACTOR static Future<Void> getSnapshotAndChanges(SimpleConfigConsumerImpl* self, ConfigBroadcaster* broadcaster) {
state Version committedVersion = wait(getCommittedVersion(self));
ConfigFollowerGetSnapshotAndChangesReply reply = wait(
self->cfi.getSnapshotAndChanges.getReply(ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion }));
state ConfigFollowerGetSnapshotAndChangesReply reply;
if (self->cfi.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion },
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETSNAPSHOTANDCHANGES)));
} else {
wait(store(reply,
self->cfi.getSnapshotAndChanges.getReply(
ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion })));
}
++self->snapshotRequest;
TraceEvent(SevDebug, "ConfigConsumerGotSnapshotAndChanges", self->id)
.detail("SnapshotVersion", reply.snapshotVersion)

View File

@ -1980,8 +1980,8 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
TEST(useIPv6); // Use IPv6
TEST(!useIPv6); // Use IPv4
// TODO(renxuan): Use hostname 25% of the time, unless it is disabled
bool useHostname = false; // !testConfig.disableHostname && deterministicRandom()->random01() < 0.25;
// Use hostname 25% of the time, unless it is disabled
bool useHostname = !testConfig.disableHostname && deterministicRandom()->random01() < 0.25;
TEST(useHostname); // Use hostname
TEST(!useHostname); // Use IP address
NetworkAddressFromHostname fromHostname =

View File

@ -831,7 +831,8 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
}
}
for (auto& coordinator : coordinators.ccr->getConnectionString().coordinators()) {
std::vector<NetworkAddress> addressVec = wait(coordinators.ccr->getConnectionString().tryResolveHostnames());
for (const auto& coordinator : addressVec) {
roles.addCoordinatorRole(coordinator);
}
@ -1689,8 +1690,7 @@ static JsonBuilderObject configurationFetcher(Optional<DatabaseConfiguration> co
}
statusObj["excluded_servers"] = excludedServersArr;
}
std::vector<ClientLeaderRegInterface> coordinatorLeaderServers = coordinators.clientLeaderServers;
int count = coordinatorLeaderServers.size();
int count = coordinators.clientLeaderServers.size();
statusObj["coordinators_count"] = count;
} catch (Error&) {
incomplete_reasons->insert("Could not retrieve all configuration status information.");
@ -2505,7 +2505,8 @@ static JsonBuilderArray tlogFetcher(int* logFaultTolerance,
static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration configuration,
ServerCoordinators coordinators,
std::vector<WorkerDetails>& workers,
const std::vector<NetworkAddress>& coordinatorAddresses,
const std::vector<WorkerDetails>& workers,
int extraTlogEligibleZones,
int minStorageReplicasRemaining,
int oldLogFaultTolerance,
@ -2521,11 +2522,11 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi
int maxCoordinatorFailures = (coordinators.clientLeaderServers.size() - 1) / 2;
std::map<NetworkAddress, StringRef> workerZones;
for (auto& worker : workers) {
for (const auto& worker : workers) {
workerZones[worker.interf.address()] = worker.interf.locality.zoneId().orDefault(LiteralStringRef(""));
}
std::map<StringRef, int> coordinatorZoneCounts;
for (auto& coordinator : coordinators.ccr->getConnectionString().coordinators()) {
for (const auto& coordinator : coordinatorAddresses) {
auto zone = workerZones[coordinator];
coordinatorZoneCounts[zone] += 1;
}
@ -3061,6 +3062,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
wait(success(primaryDCFO));
std::vector<NetworkAddress> coordinatorAddresses =
wait(coordinators.ccr->getConnectionString().tryResolveHostnames());
int logFaultTolerance = 100;
if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) {
statusObj["logs"] = tlogFetcher(&logFaultTolerance, db, address_workers);
@ -3070,6 +3074,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
statusObj["fault_tolerance"] =
faultToleranceStatusFetcher(configuration.get(),
coordinators,
coordinatorAddresses,
workers,
extraTlogEligibleZones,
minStorageReplicasRemaining,

View File

@ -859,9 +859,9 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
NetworkAddressList publicNetworkAddresses;
NetworkAddressList listenNetworkAddresses;
connectionRecord.resolveHostnamesBlocking();
auto& coordinators = connectionRecord.getConnectionString().coordinators();
ASSERT(coordinators.size() > 0);
std::vector<Hostname>& hostnames = connectionRecord.getConnectionString().hostnames;
const std::vector<NetworkAddress>& coords = connectionRecord.getConnectionString().coordinators();
ASSERT(hostnames.size() + coords.size() > 0);
for (int ii = 0; ii < publicAddressStrs.size(); ++ii) {
const std::string& publicAddressStr = publicAddressStrs[ii];
@ -930,13 +930,26 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
listenNetworkAddresses.secondaryAddress = currentListenAddress;
}
bool hasSameCoord = std::all_of(coordinators.begin(), coordinators.end(), [&](const NetworkAddress& address) {
bool matchCoordinatorsTls = std::all_of(coords.begin(), coords.end(), [&](const NetworkAddress& address) {
if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) {
return address.isTLS() == currentPublicAddress.isTLS();
}
return true;
});
if (!hasSameCoord) {
// If true, further check hostnames.
if (matchCoordinatorsTls) {
matchCoordinatorsTls = std::all_of(hostnames.begin(), hostnames.end(), [&](Hostname& hostname) {
Optional<NetworkAddress> resolvedAddress = hostname.resolveBlocking();
if (resolvedAddress.present()) {
NetworkAddress address = resolvedAddress.get();
if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) {
return address.isTLS() == currentPublicAddress.isTLS();
}
}
return true;
});
}
if (!matchCoordinatorsTls) {
fprintf(stderr,
"ERROR: TLS state of public address %s does not match in coordinator list.\n",
publicAddressStr.c_str());

View File

@ -3455,7 +3455,8 @@ ACTOR Future<GetRangeReqAndResultRef> quickGetKeyValues(
tr.setVersion(version);
// TODO: is DefaultPromiseEndpoint the best priority for this?
tr.trState->taskID = TaskPriority::DefaultPromiseEndpoint;
Future<RangeResult> rangeResultFuture = tr.getRange(prefixRange(prefix), Snapshot::True);
Future<RangeResult> rangeResultFuture =
tr.getRange(prefixRange(prefix), GetRangeLimits::ROW_LIMIT_UNLIMITED, Snapshot::True);
// TODO: async in case it needs to read from other servers.
RangeResult rangeResult = wait(rangeResultFuture);
a->dependsOn(rangeResult.arena());

View File

@ -2977,21 +2977,40 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> result,
MonitorLeaderInfo info) {
state ClusterConnectionString ccf = info.intermediateConnRecord->getConnectionString();
state std::vector<NetworkAddress> addrs = ccf.coordinators();
ClusterConnectionString cs = info.intermediateConnRecord->getConnectionString();
state int coordinatorsSize = cs.hostnames.size() + cs.coordinators().size();
state ElectionResultRequest request;
state int index = 0;
state int successIndex = 0;
request.key = ccf.clusterKey();
request.coordinators = ccf.coordinators();
state std::vector<LeaderElectionRegInterface> leaderElectionServers;
deterministicRandom()->randomShuffle(addrs);
leaderElectionServers.reserve(coordinatorsSize);
for (const auto& h : cs.hostnames) {
leaderElectionServers.push_back(LeaderElectionRegInterface(h));
}
for (const auto& c : cs.coordinators()) {
leaderElectionServers.push_back(LeaderElectionRegInterface(c));
}
deterministicRandom()->randomShuffle(leaderElectionServers);
request.key = cs.clusterKey();
request.hostnames = cs.hostnames;
request.coordinators = cs.coordinators();
loop {
LeaderElectionRegInterface interf(addrs[index]);
LeaderElectionRegInterface interf = leaderElectionServers[index];
bool usingHostname = interf.hostname.present();
request.reply = ReplyPromise<Optional<LeaderInfo>>();
ErrorOr<Optional<LeaderInfo>> leader = wait(interf.electionResult.tryGetReply(request));
state ErrorOr<Optional<LeaderInfo>> leader;
if (usingHostname) {
wait(store(
leader,
tryGetReplyFromHostname(request, interf.hostname.get(), WLTOKEN_LEADERELECTIONREG_ELECTIONRESULT)));
} else {
wait(store(leader, interf.electionResult.tryGetReply(request)));
}
if (leader.present()) {
if (leader.get().present()) {
if (leader.get().get().forward) {
@ -3027,14 +3046,9 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
}
successIndex = index;
} else {
if (leader.isError() && leader.getError().code() == error_code_coordinators_changed) {
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
throw coordinators_changed();
}
index = (index + 1) % addrs.size();
index = (index + 1) % coordinatorsSize;
if (index == successIndex) {
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
throw coordinators_changed();
}
}
}
@ -3042,22 +3056,11 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
ACTOR Future<Void> monitorLeaderWithDelayedCandidacyImplInternal(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> outSerializedLeaderInfo) {
wait(connRecord->resolveHostnames());
state MonitorLeaderInfo info(connRecord);
loop {
try {
wait(info.intermediateConnRecord->resolveHostnames());
MonitorLeaderInfo _info =
wait(monitorLeaderWithDelayedCandidacyImplOneGeneration(connRecord, outSerializedLeaderInfo, info));
info = _info;
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
TraceEvent("MonitorLeaderWithDelayedCandidacyCoordinatorsChanged").suppressFor(1.0);
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
} else {
throw e;
}
}
}
}
@ -3191,6 +3194,7 @@ ACTOR Future<Void> fdbd(Reference<IClusterConnectionRecord> connRecord,
actors.push_back(serveProcess());
try {
ServerCoordinators coordinators(connRecord);
if (g_network->isSimulated()) {
whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,";
}

View File

@ -2096,7 +2096,8 @@ struct ConsistencyCheckWorkload : TestWorkload {
return false;
}
state ClusterConnectionString old(currentKey.get().toString());
ClusterConnectionString old(currentKey.get().toString());
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
std::vector<ProcessData> workers = wait(::getWorkers(&tr));
@ -2106,7 +2107,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
}
std::set<Optional<Standalone<StringRef>>> checkDuplicates;
for (const auto& addr : old.coordinators()) {
for (const auto& addr : oldCoordinators) {
auto findResult = addr_locality.find(addr);
if (findResult != addr_locality.end()) {
if (checkDuplicates.count(findResult->second.zoneId())) {

View File

@ -106,6 +106,7 @@ struct CycleWorkload : TestWorkload {
state Transaction tr(cx);
if (deterministicRandom()->random01() >= self->traceParentProbability) {
state Span span("CycleClient"_loc);
// TraceEvent("CycleTracingTransaction", span.context).log();
TraceEvent("CycleTracingTransaction", span.context).log();
tr.setOption(FDBTransactionOptions::SPAN_PARENT,
BinaryWriter::toValue(span.context, Unversioned()));

View File

@ -132,7 +132,7 @@ struct DataLossRecoveryWorkload : TestWorkload {
} else {
tr.clear(key);
}
wait(timeoutError(tr.commit(), 30.0));
wait(tr.commit());
break;
} catch (Error& e) {
wait(tr.onError(e));

View File

@ -329,9 +329,7 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
for (int j = i; j < end; j++) {
if (deterministicRandom()->random01() < self->initialKeyDensity) {
Key key = self->getKeyForIndex(tenantNum, j);
if (key.size() <= (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
if (key.size() <= getMaxWriteKeySize(key, false)) {
Value value = self->getRandomValue();
value = value.substr(
0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT));
@ -1091,24 +1089,22 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
pos = littleEndian32(*(int32_t*)&value.end()[-4]);
}
contract = {
std::make_pair(error_code_key_too_large,
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))),
contract = { std::make_pair(error_code_key_too_large,
key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
: key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
: ExceptionContract::Never),
std::make_pair(error_code_value_too_large,
ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)),
std::make_pair(
error_code_invalid_mutation_type,
ExceptionContract::requiredIf(!isValidMutationType(op) || !isAtomicOp((MutationRef::Type)op))),
std::make_pair(error_code_invalid_mutation_type,
ExceptionContract::requiredIf(!isValidMutationType(op) ||
!isAtomicOp((MutationRef::Type)op))),
std::make_pair(error_code_key_outside_legal_range,
ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))),
std::make_pair(
error_code_client_invalid_operation,
ExceptionContract::requiredIf(
(op == MutationRef::SetVersionstampedKey && (pos < 0 || pos + 10 > key.size() - 4)) ||
(op == MutationRef::SetVersionstampedValue && (pos < 0 || pos + 10 > value.size() - 4))))
};
std::make_pair(error_code_client_invalid_operation,
ExceptionContract::requiredIf((op == MutationRef::SetVersionstampedKey &&
(pos < 0 || pos + 10 > key.size() - 4)) ||
(op == MutationRef::SetVersionstampedValue &&
(pos < 0 || pos + 10 > value.size() - 4)))) };
}
void callback(Reference<ITransaction> tr) override { tr->atomicOp(key, value, (FDBMutationTypes::Option)op); }
@ -1131,11 +1127,10 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
key = makeKey();
}
value = makeValue();
contract = { std::make_pair(
error_code_key_too_large,
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))),
contract = { std::make_pair(error_code_key_too_large,
key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
: key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
: ExceptionContract::Never),
std::make_pair(error_code_value_too_large,
ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)),
std::make_pair(error_code_key_outside_legal_range,
@ -1268,11 +1263,11 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
TestWatch(unsigned int id, FuzzApiCorrectnessWorkload* workload, Reference<ITransaction> tr)
: BaseTest(id, workload, "TestWatch") {
key = makeKey();
contract = { std::make_pair(
error_code_key_too_large,
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))),
printf("Watching: %d %s\n", key.size(), printable(key.substr(0, std::min(key.size(), 20))).c_str());
contract = { std::make_pair(error_code_key_too_large,
key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
: key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
: ExceptionContract::Never),
std::make_pair(error_code_watches_disabled, ExceptionContract::Possible),
std::make_pair(error_code_key_outside_legal_range,
ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))),

View File

@ -541,7 +541,12 @@ struct RemoveServersSafelyWorkload : TestWorkload {
state AddressExclusion coordExcl;
// Exclude a coordinator under buggify, but only if fault tolerance is > 0 and kill set is non-empty already
if (BUGGIFY && toKill.size()) {
std::vector<NetworkAddress> coordinators = wait(getCoordinators(cx));
Optional<ClusterConnectionString> csOptional = wait(getConnectionString(cx));
state std::vector<NetworkAddress> coordinators;
if (csOptional.present()) {
ClusterConnectionString cs = csOptional.get();
wait(store(coordinators, cs.tryResolveHostnames()));
}
if (coordinators.size() > 2) {
auto randomCoordinator = deterministicRandom()->randomChoice(coordinators);
coordExcl = AddressExclusion(randomCoordinator.ip, randomCoordinator.port);

View File

@ -957,9 +957,9 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
boost::split(
process_addresses, coordinator_processes_key.get().toString(), [](char c) { return c == ','; });
ASSERT(process_addresses.size() == cs.coordinators().size() + cs.hostnames.size());
wait(cs.resolveHostnames());
// compare the coordinator process network addresses one by one
for (const auto& network_address : cs.coordinators()) {
std::vector<NetworkAddress> coordinators = wait(cs.tryResolveHostnames());
for (const auto& network_address : coordinators) {
ASSERT(std::find(process_addresses.begin(), process_addresses.end(), network_address.toString()) !=
process_addresses.end());
}
@ -1077,19 +1077,20 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
Optional<Value> res = wait(tx->get(coordinatorsKey));
ASSERT(res.present()); // Otherwise, database is in a bad state
state ClusterConnectionString csNew(res.get().toString());
wait(csNew.resolveHostnames());
ASSERT(csNew.coordinators().size() == old_coordinators_processes.size() + 1);
ClusterConnectionString csNew(res.get().toString());
// verify the cluster decription
ASSERT(new_cluster_description == csNew.clusterKeyName().toString());
ASSERT(csNew.hostnames.size() + csNew.coordinators().size() ==
old_coordinators_processes.size() + 1);
std::vector<NetworkAddress> newCoordinators = wait(csNew.tryResolveHostnames());
// verify the coordinators' addresses
for (const auto& network_address : csNew.coordinators()) {
for (const auto& network_address : newCoordinators) {
std::string address_str = network_address.toString();
ASSERT(std::find(old_coordinators_processes.begin(),
old_coordinators_processes.end(),
address_str) != old_coordinators_processes.end() ||
new_coordinator_process == address_str);
}
// verify the cluster decription
ASSERT(new_cluster_description == csNew.clusterKeyName().toString());
tx->reset();
} catch (Error& e) {
wait(tx->onError(e));

View File

@ -30,7 +30,7 @@ void forceLinkMemcpyTests();
void forceLinkMemcpyPerfTests();
#if (!defined(TLS_DISABLED) && !defined(_WIN32))
void forceLinkStreamCipherTests();
void forceLinkBLockCiherTests();
void forceLinkBlobCipherTests();
#endif
void forceLinkParallelStreamTests();
void forceLinkSimExternalConnectionTests();
@ -39,6 +39,8 @@ void forceLinkSimKmsConnectorTests();
void forceLinkIThreadPoolTests();
void forceLinkTokenSignTests();
void forceLinkVersionVectorTests();
void forceLinkRESTClientTests();
void forceLinkRESTUtilsTests();
struct UnitTestWorkload : TestWorkload {
bool enabled;
@ -88,6 +90,8 @@ struct UnitTestWorkload : TestWorkload {
forceLinkIThreadPoolTests();
forceLinkTokenSignTests();
forceLinkVersionVectorTests();
forceLinkRESTClientTests();
forceLinkRESTUtilsTests();
}
std::string description() const override { return "UnitTests"; }

View File

@ -653,9 +653,7 @@ struct WriteDuringReadWorkload : TestWorkload {
for (int j = i; j < end; j++) {
if (deterministicRandom()->random01() < self->initialKeyDensity) {
Key key = self->getKeyForIndex(j);
if (key.size() <= (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
if (key.size() <= getMaxWriteKeySize(key, false)) {
Value value = self->getRandomValue();
value =
value.substr(0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT));
@ -898,18 +896,10 @@ struct WriteDuringReadWorkload : TestWorkload {
tr.clear(range);
if (!noConflict) {
KeyRangeRef conflict(
range.begin.substr(0,
std::min<int>(range.begin.size(),
(range.begin.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)),
range.end.substr(0,
std::min<int>(range.end.size(),
(range.end.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)));
range.begin.substr(
0, std::min<int>(range.begin.size(), getMaxClearKeySize(range.begin) + 1)),
range.end.substr(
0, std::min<int>(range.end.size(), getMaxClearKeySize(range.end) + 1)));
self->addedConflicts.insert(conflict, true);
}
self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin),
@ -922,9 +912,7 @@ struct WriteDuringReadWorkload : TestWorkload {
if (noConflict)
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
tr.clear(key);
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
if (!noConflict && key.size() <= getMaxClearKeySize(key)) {
self->addedConflicts.insert(key, true);
}
self->memoryDatabase.erase(key);
@ -936,18 +924,9 @@ struct WriteDuringReadWorkload : TestWorkload {
//TraceEvent("WDRAddWriteConflict").detail("Range", range);
tr.addWriteConflictRange(range);
KeyRangeRef conflict(
range.begin.substr(0,
std::min<int>(range.begin.size(),
(range.begin.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)),
range.end.substr(0,
std::min<int>(range.end.size(),
(range.end.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)));
range.begin.substr(
0, std::min<int>(range.begin.size(), getMaxKeySize(range.begin) + 1)),
range.end.substr(0, std::min<int>(range.end.size(), getMaxKeySize(range.end) + 1)));
self->addedConflicts.insert(conflict, true);
} else if (operationType == 8 && !disableDelay) {
double maxTime = 6.0;
@ -991,18 +970,10 @@ struct WriteDuringReadWorkload : TestWorkload {
tr.atomicOp(versionStampKey, value, MutationRef::SetVersionstampedKey);
tr.clear(range);
KeyRangeRef conflict(
range.begin.substr(0,
std::min<int>(range.begin.size(),
(range.begin.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)),
range.end.substr(0,
std::min<int>(range.end.size(),
(range.end.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)));
range.begin.substr(
0, std::min<int>(range.begin.size(), getMaxClearKeySize(range.begin) + 1)),
range.end.substr(
0, std::min<int>(range.end.size(), getMaxClearKeySize(range.end) + 1)));
self->addedConflicts.insert(conflict, true);
self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin),
self->memoryDatabase.lower_bound(range.end));
@ -1043,10 +1014,9 @@ struct WriteDuringReadWorkload : TestWorkload {
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
tr.atomicOp(key, value, opType);
//TraceEvent("WDRAtomicOpSuccess").detail("Key", key).detail("Value", value.size());
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (!noConflict && key.size() <= getMaxWriteKeySize(key, false)) {
self->addedConflicts.insert(key, true);
}
Optional<Value> existing = self->memoryGet(&self->memoryDatabase, key);
self->memoryDatabase[key] =
self->applyAtomicOp(existing.present() ? Optional<StringRef>(existing.get())
@ -1063,10 +1033,9 @@ struct WriteDuringReadWorkload : TestWorkload {
if (noConflict)
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
tr.set(key, value);
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))
if (!noConflict && key.size() <= getMaxWriteKeySize(key, false)) {
self->addedConflicts.insert(key, true);
}
//TraceEvent("WDRSetSuccess").detail("Key", key).detail("Value", value.size());
self->memoryDatabase[key] = value;
}

View File

@ -39,6 +39,9 @@
#include "flow/flow.h"
#include "flow/genericactors.actor.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/aes.h>
#include <openssl/engine.h>
#include <openssl/evp.h>

View File

@ -84,6 +84,10 @@ set(FLOW_SRCS
actorcompiler.h
crc32c.h
crc32c.cpp
ppc-asm.h
crc32.S
crc32_wrapper.h
crc32_wrapper.c
error_definitions.h
${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h
flat_buffers.cpp
@ -172,6 +176,10 @@ if(NOT WITH_TLS)
else()
target_link_libraries(flow PUBLIC OpenSSL::SSL)
target_link_libraries(flow_sampling PUBLIC OpenSSL::SSL)
if(USE_WOLFSSL)
target_include_directories(flow SYSTEM BEFORE PUBLIC ${WOLFSSL_INCLUDE_DIR}/wolfssl)
target_include_directories(flow_sampling SYSTEM BEFORE PUBLIC ${WOLFSSL_INCLUDE_DIR}/wolfssl)
endif()
endif()
target_link_libraries(flow PUBLIC Threads::Threads ${CMAKE_DL_LIBS})
target_link_libraries(flow_sampling PUBLIC Threads::Threads ${CMAKE_DL_LIBS})

Some files were not shown because too many files have changed in this diff Show More