Merge branch 'main' of github.com:apple/foundationdb into jfu-list-tenants

This commit is contained in:
Jon Fu 2022-04-29 13:16:54 -04:00
commit d953b961b7
130 changed files with 11900 additions and 5493 deletions

View File

@ -10,3 +10,4 @@ set(SRCS
add_library(FDBLibTLS STATIC ${SRCS}) add_library(FDBLibTLS STATIC ${SRCS})
target_link_libraries(FDBLibTLS PUBLIC OpenSSL::SSL boost_target PRIVATE flow) target_link_libraries(FDBLibTLS PUBLIC OpenSSL::SSL boost_target PRIVATE flow)
target_include_directories(FDBLibTLS INTERFACE OpenSSL::SSL boost_target PRIVATE flow)

View File

@ -22,6 +22,9 @@
#include "FDBLibTLS/FDBLibTLSSession.h" #include "FDBLibTLS/FDBLibTLSSession.h"
#include "flow/Trace.h" #include "flow/Trace.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/bio.h> #include <openssl/bio.h>
#include <openssl/err.h> #include <openssl/err.h>
#include <openssl/evp.h> #include <openssl/evp.h>

View File

@ -23,6 +23,9 @@
#include "flow/flow.h" #include "flow/flow.h"
#include "flow/Trace.h" #include "flow/Trace.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/bio.h> #include <openssl/bio.h>
#include <openssl/err.h> #include <openssl/err.h>
#include <openssl/pem.h> #include <openssl/pem.h>

View File

@ -20,6 +20,9 @@
#include "FDBLibTLS/FDBLibTLSVerify.h" #include "FDBLibTLS/FDBLibTLSVerify.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/objects.h> #include <openssl/objects.h>
#include <algorithm> #include <algorithm>

View File

@ -25,6 +25,9 @@
#include <string.h> #include <string.h>
#include <boost/lexical_cast.hpp> #include <boost/lexical_cast.hpp>
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/objects.h> #include <openssl/objects.h>
#include "fdbrpc/ITLSPlugin.h" #include "fdbrpc/ITLSPlugin.h"

View File

@ -80,10 +80,23 @@ endif()
# The tests don't build on windows # The tests don't build on windows
if(NOT WIN32) if(NOT WIN32)
set(MAKO_SRCS set(MAKO_SRCS
test/mako/mako.c test/mako/async.hpp
test/mako/mako.h test/mako/async.cpp
test/mako/utils.c test/mako/blob_granules.hpp
test/mako/utils.h) test/mako/blob_granules.cpp
test/mako/future.hpp
test/mako/limit.hpp
test/mako/logger.hpp
test/mako/mako.cpp
test/mako/mako.hpp
test/mako/operations.hpp
test/mako/operations.cpp
test/mako/process.hpp
test/mako/shm.hpp
test/mako/stats.hpp
test/mako/time.hpp
test/mako/utils.cpp
test/mako/utils.hpp)
add_subdirectory(test/unit/third_party) add_subdirectory(test/unit/third_party)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
set(UNIT_TEST_SRCS set(UNIT_TEST_SRCS
@ -98,6 +111,11 @@ if(NOT WIN32)
test/unit/fdb_api.cpp test/unit/fdb_api.cpp
test/unit/fdb_api.hpp) test/unit/fdb_api.hpp)
add_library(fdb_cpp INTERFACE)
target_sources(fdb_cpp INTERFACE test/fdb_api.hpp)
target_include_directories(fdb_cpp INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/test)
target_link_libraries(fdb_cpp INTERFACE fmt::fmt)
set(API_TESTER_SRCS set(API_TESTER_SRCS
test/apitester/fdb_c_api_tester.cpp test/apitester/fdb_c_api_tester.cpp
test/apitester/TesterApiWorkload.cpp test/apitester/TesterApiWorkload.cpp
@ -179,7 +197,11 @@ endif()
# do not set RPATH for mako # do not set RPATH for mako
set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE) set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE)
target_link_libraries(mako PRIVATE fdb_c fdbclient) if (USE_SANITIZER)
target_link_libraries(mako PRIVATE fdb_c fdbclient fmt::fmt Threads::Threads fdb_cpp boost_asan)
else()
target_link_libraries(mako PRIVATE fdb_c fdbclient fmt::fmt Threads::Threads fdb_cpp boost_target)
endif()
if(NOT OPEN_FOR_IDE) if(NOT OPEN_FOR_IDE)
# Make sure that fdb_c.h is compatible with c90 # Make sure that fdb_c.h is compatible with c90
@ -254,6 +276,8 @@ endif()
${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests
--tmp-dir --tmp-dir
@TMP_DIR@ @TMP_DIR@
--log-dir
@LOG_DIR@
) )
add_fdbclient_test( add_fdbclient_test(
@ -271,6 +295,10 @@ endif()
${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/blobgranuletests ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/blobgranuletests
--blob-granule-local-file-path --blob-granule-local-file-path
@DATA_DIR@/fdbblob/ @DATA_DIR@/fdbblob/
--tmp-dir
@TMP_DIR@
--log-dir
@LOG_DIR@
) )
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT USE_SANITIZER) if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT USE_SANITIZER)

View File

@ -37,55 +37,71 @@ private:
enum OpType { OP_INSERT, OP_CLEAR, OP_CLEAR_RANGE, OP_READ, OP_GET_RANGES, OP_LAST = OP_GET_RANGES }; enum OpType { OP_INSERT, OP_CLEAR, OP_CLEAR_RANGE, OP_READ, OP_GET_RANGES, OP_LAST = OP_GET_RANGES };
std::vector<OpType> excludedOpTypes; std::vector<OpType> excludedOpTypes;
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
// FIXME: should still guarantee a read succeeds eventually somehow
bool seenReadSuccess = false;
void randomReadOp(TTaskFct cont) { void randomReadOp(TTaskFct cont) {
std::string begin = randomKeyName(); std::string begin = randomKeyName();
std::string end = randomKeyName(); std::string end = randomKeyName();
auto results = std::make_shared<std::vector<KeyValue>>(); auto results = std::make_shared<std::vector<KeyValue>>();
auto tooOld = std::make_shared<bool>(false);
if (begin > end) { if (begin > end) {
std::swap(begin, end); std::swap(begin, end);
} }
execTransaction( execTransaction(
[begin, end, results](auto ctx) { [this, begin, end, results, tooOld](auto ctx) {
ctx->tx()->setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE); ctx->tx()->setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE);
KeyValuesResult res = ctx->tx()->readBlobGranules(begin, end, ctx->getBGBasePath()); KeyValuesResult res = ctx->tx()->readBlobGranules(begin, end, ctx->getBGBasePath());
bool more; bool more;
(*results) = res.getKeyValues(&more); (*results) = res.getKeyValues(&more);
ASSERT(!more); ASSERT(!more);
if (res.getError() != error_code_success) { if (res.getError() == error_code_blob_granule_transaction_too_old) {
info("BlobGranuleCorrectness::randomReadOp bg too old\n");
ASSERT(!seenReadSuccess);
*tooOld = true;
ctx->done();
} else if (res.getError() != error_code_success) {
ctx->onError(res.getError()); ctx->onError(res.getError());
} else { } else {
if (!seenReadSuccess) {
info("BlobGranuleCorrectness::randomReadOp first success\n");
}
seenReadSuccess = true;
ctx->done(); ctx->done();
} }
}, },
[this, begin, end, results, cont]() { [this, begin, end, results, tooOld, cont]() {
std::vector<KeyValue> expected = store.getRange(begin, end, store.size(), false); if (!*tooOld) {
if (results->size() != expected.size()) { std::vector<KeyValue> expected = store.getRange(begin, end, store.size(), false);
error(fmt::format("randomReadOp result size mismatch. expected: {} actual: {}", if (results->size() != expected.size()) {
expected.size(), error(fmt::format("randomReadOp result size mismatch. expected: {} actual: {}",
results->size())); expected.size(),
} results->size()));
ASSERT(results->size() == expected.size());
for (int i = 0; i < results->size(); i++) {
if ((*results)[i].key != expected[i].key) {
error(fmt::format("randomReadOp key mismatch at {}/{}. expected: {} actual: {}",
i,
results->size(),
expected[i].key,
(*results)[i].key));
} }
ASSERT((*results)[i].key == expected[i].key); ASSERT(results->size() == expected.size());
if ((*results)[i].value != expected[i].value) { for (int i = 0; i < results->size(); i++) {
error( if ((*results)[i].key != expected[i].key) {
fmt::format("randomReadOp value mismatch at {}/{}. key: {} expected: {:.80} actual: {:.80}", error(fmt::format("randomReadOp key mismatch at {}/{}. expected: {} actual: {}",
i, i,
results->size(), results->size(),
expected[i].key, expected[i].key,
expected[i].value, (*results)[i].key));
(*results)[i].value)); }
ASSERT((*results)[i].key == expected[i].key);
if ((*results)[i].value != expected[i].value) {
error(fmt::format(
"randomReadOp value mismatch at {}/{}. key: {} expected: {:.80} actual: {:.80}",
i,
results->size(),
expected[i].key,
expected[i].value,
(*results)[i].value));
}
ASSERT((*results)[i].value == expected[i].value);
} }
ASSERT((*results)[i].value == expected[i].value);
} }
schedule(cont); schedule(cont);
}); });
@ -110,9 +126,11 @@ private:
true); true);
}, },
[this, begin, end, results, cont]() { [this, begin, end, results, cont]() {
ASSERT(results->size() > 0); if (seenReadSuccess) {
ASSERT(results->front().key <= begin); ASSERT(results->size() > 0);
ASSERT(results->back().value >= end); ASSERT(results->front().key <= begin);
ASSERT(results->back().value >= end);
}
for (int i = 0; i < results->size(); i++) { for (int i = 0; i < results->size(); i++) {
// no empty or inverted ranges // no empty or inverted ranges

View File

@ -20,12 +20,19 @@
# #
import sys import sys
import subprocess
import argparse import argparse
import os import os
from subprocess import Popen, TimeoutExpired from subprocess import Popen, TimeoutExpired
import logging import logging
import signal import signal
from pathlib import Path
import glob
import random
import string
def random_string(len):
return ''.join(random.choice(string.ascii_letters + string.digits) for i in range(len))
def get_logger(): def get_logger():
@ -48,6 +55,14 @@ def initialize_logger_level(logging_level):
logger.setLevel(logging.ERROR) logger.setLevel(logging.ERROR)
def dump_client_logs(log_dir):
for log_file in glob.glob(os.path.join(log_dir, "*")):
print(">>>>>>>>>>>>>>>>>>>> Contents of {}:".format(log_file))
with open(log_file, "r") as f:
print(f.read())
print(">>>>>>>>>>>>>>>>>>>> End of {}:".format(log_file))
def run_tester(args, test_file): def run_tester(args, test_file):
cmd = [args.tester_binary, cmd = [args.tester_binary,
"--cluster-file", args.cluster_file, "--cluster-file", args.cluster_file,
@ -56,6 +71,12 @@ def run_tester(args, test_file):
cmd += ["--external-client-library", args.external_client_library] cmd += ["--external-client-library", args.external_client_library]
if args.tmp_dir is not None: if args.tmp_dir is not None:
cmd += ["--tmp-dir", args.tmp_dir] cmd += ["--tmp-dir", args.tmp_dir]
log_dir = None
if args.log_dir is not None:
log_dir = Path(args.log_dir).joinpath(random_string(8))
log_dir.mkdir(exist_ok=True)
cmd += ['--log', "--log-dir", str(log_dir)]
if args.blob_granule_local_file_path is not None: if args.blob_granule_local_file_path is not None:
cmd += ["--blob-granule-local-file-path", cmd += ["--blob-granule-local-file-path",
args.blob_granule_local_file_path] args.blob_granule_local_file_path]
@ -63,6 +84,7 @@ def run_tester(args, test_file):
get_logger().info('\nRunning tester \'%s\'...' % ' '.join(cmd)) get_logger().info('\nRunning tester \'%s\'...' % ' '.join(cmd))
proc = Popen(cmd, stdout=sys.stdout, stderr=sys.stderr) proc = Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
timed_out = False timed_out = False
ret_code = 1
try: try:
ret_code = proc.wait(args.timeout) ret_code = proc.wait(args.timeout)
except TimeoutExpired: except TimeoutExpired:
@ -72,15 +94,16 @@ def run_tester(args, test_file):
raise Exception('Unable to run tester (%s)' % e) raise Exception('Unable to run tester (%s)' % e)
if ret_code != 0: if ret_code != 0:
if ret_code < 0: if timed_out:
reason = 'timed out after %d seconds' % args.timeout
elif ret_code < 0:
reason = signal.Signals(-ret_code).name reason = signal.Signals(-ret_code).name
else: else:
reason = 'exit code: %d' % ret_code reason = 'exit code: %d' % ret_code
if timed_out:
reason = 'timed out after %d seconds' % args.timeout
ret_code = 1
get_logger().error('\n\'%s\' did not complete succesfully (%s)' % get_logger().error('\n\'%s\' did not complete succesfully (%s)' %
(cmd[0], reason)) (cmd[0], reason))
if (log_dir is not None):
dump_client_logs(log_dir)
get_logger().info('') get_logger().info('')
return ret_code return ret_code
@ -115,6 +138,8 @@ def parse_args(argv):
help='Path to a directory with test definitions. (default: ./)') help='Path to a directory with test definitions. (default: ./)')
parser.add_argument('--timeout', type=int, default=300, parser.add_argument('--timeout', type=int, default=300,
help='The timeout in seconds for running each individual test. (default 300)') help='The timeout in seconds for running each individual test. (default 300)')
parser.add_argument('--log-dir', type=str, default=None,
help='The directory for storing logs (default: None)')
parser.add_argument('--logging-level', type=str, default='INFO', parser.add_argument('--logging-level', type=str, default='INFO',
choices=['ERROR', 'WARNING', 'INFO', 'DEBUG'], help='Specifies the level of detail in the tester output (default=\'INFO\').') choices=['ERROR', 'WARNING', 'INFO', 'DEBUG'], help='Specifies the level of detail in the tester output (default=\'INFO\').')
parser.add_argument('--tmp-dir', type=str, default=None, parser.add_argument('--tmp-dir', type=str, default=None,

561
bindings/c/test/fdb_api.hpp Normal file
View File

@ -0,0 +1,561 @@
/*
* fdb_api.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FDB_API_HPP
#define FDB_API_HPP
#pragma once
#ifndef FDB_API_VERSION
#define FDB_API_VERSION 720
#endif
#include <cassert>
#include <cstdint>
#include <memory>
#include <stdexcept>
#include <string>
#include <string_view>
#include <fmt/format.h>
// introduce the option enums
#include <fdb_c_options.g.h>
namespace fdb {
// hide C API to discourage mixing C/C++ API
namespace native {
#include <foundationdb/fdb_c.h>
}
using ByteString = std::basic_string<uint8_t>;
using BytesRef = std::basic_string_view<uint8_t>;
using CharsRef = std::string_view;
using KeyRef = BytesRef;
using ValueRef = BytesRef;
inline uint8_t const* toBytePtr(char const* ptr) noexcept {
return reinterpret_cast<uint8_t const*>(ptr);
}
// get bytestring view from charstring: e.g. std::basic_string{_view}<char>
template <template <class...> class StringLike, class Char>
BytesRef toBytesRef(const StringLike<Char>& s) noexcept {
static_assert(sizeof(Char) == 1);
return BytesRef(reinterpret_cast<uint8_t const*>(s.data()), s.size());
}
// get charstring view from bytestring: e.g. std::basic_string{_view}<uint8_t>
template <template <class...> class StringLike, class Char>
CharsRef toCharsRef(const StringLike<Char>& s) noexcept {
static_assert(sizeof(Char) == 1);
return CharsRef(reinterpret_cast<char const*>(s.data()), s.size());
}
[[maybe_unused]] constexpr const bool OverflowCheck = false;
inline int intSize(BytesRef b) {
if constexpr (OverflowCheck) {
if (b.size() > static_cast<size_t>(std::numeric_limits<int>::max()))
throw std::overflow_error("byte strlen goes beyond int bounds");
}
return static_cast<int>(b.size());
}
class Error {
public:
using CodeType = native::fdb_error_t;
Error() noexcept : err(0) {}
explicit Error(CodeType err) noexcept : err(err) {}
char const* what() noexcept { return native::fdb_get_error(err); }
explicit operator bool() const noexcept { return err != 0; }
bool is(CodeType other) const noexcept { return err == other; }
CodeType code() const noexcept { return err; }
bool retryable() const noexcept { return native::fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err) != 0; }
private:
CodeType err;
};
/* Traits of value types held by ready futures.
Holds type and value extraction function. */
namespace future_var {
struct None {
struct Type {};
static Error extract(native::FDBFuture*, Type&) noexcept { return Error(0); }
};
struct Int64 {
using Type = int64_t;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
return Error(native::fdb_future_get_int64(f, &out));
}
};
struct Key {
using Type = std::pair<uint8_t const*, int>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_key, out_key_length] = out;
return Error(native::fdb_future_get_key(f, &out_key, &out_key_length));
}
};
struct Value {
using Type = std::tuple<bool, uint8_t const*, int>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_present, out_value, out_value_length] = out;
auto out_present_native = native::fdb_bool_t{};
auto err = native::fdb_future_get_value(f, &out_present_native, &out_value, &out_value_length);
out_present = (out_present_native != 0);
return Error(err);
}
};
struct StringArray {
using Type = std::pair<const char**, int>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_strings, out_count] = out;
return Error(native::fdb_future_get_string_array(f, &out_strings, &out_count));
}
};
struct KeyValueArray {
using Type = std::tuple<native::FDBKeyValue const*, int, bool>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_kv, out_count, out_more] = out;
auto out_more_native = native::fdb_bool_t{};
auto err = native::fdb_future_get_keyvalue_array(f, &out_kv, &out_count, &out_more_native);
out_more = (out_more_native != 0);
return Error(err);
}
};
} // namespace future_var
[[noreturn]] inline void throwError(std::string_view preamble, Error err) {
auto msg = std::string(preamble);
msg.append(err.what());
throw std::runtime_error(msg);
}
inline int maxApiVersion() {
return native::fdb_get_max_api_version();
}
inline Error selectApiVersionNothrow(int version) {
return Error(native::fdb_select_api_version(version));
}
inline void selectApiVersion(int version) {
if (auto err = selectApiVersionNothrow(version)) {
throwError(fmt::format("ERROR: fdb_select_api_version({}): ", version), err);
}
}
namespace network {
inline Error setOptionNothrow(FDBNetworkOption option, BytesRef str) noexcept {
return Error(native::fdb_network_set_option(option, str.data(), intSize(str)));
}
inline Error setOptionNothrow(FDBNetworkOption option, int64_t value) noexcept {
return Error(native::fdb_network_set_option(
option, reinterpret_cast<const uint8_t*>(&value), static_cast<int>(sizeof(value))));
}
inline void setOption(FDBNetworkOption option, BytesRef str) {
if (auto err = setOptionNothrow(option, str)) {
throwError(fmt::format("ERROR: fdb_network_set_option({}): ",
static_cast<std::underlying_type_t<FDBNetworkOption>>(option)),
err);
}
}
inline void setOption(FDBNetworkOption option, int64_t value) {
if (auto err = setOptionNothrow(option, value)) {
throwError(fmt::format("ERROR: fdb_network_set_option({}, {}): ",
static_cast<std::underlying_type_t<FDBNetworkOption>>(option),
value),
err);
}
}
inline Error setupNothrow() noexcept {
return Error(native::fdb_setup_network());
}
inline void setup() {
if (auto err = setupNothrow())
throwError("ERROR: fdb_network_setup(): ", err);
}
inline Error run() {
return Error(native::fdb_run_network());
}
inline Error stop() {
return Error(native::fdb_stop_network());
}
} // namespace network
class Transaction;
class Database;
class Result {
friend class Transaction;
std::shared_ptr<native::FDBResult> r;
Result(native::FDBResult* result) {
if (result)
r = std::shared_ptr<native::FDBResult>(result, &native::fdb_result_destroy);
}
public:
using KeyValueArray = future_var::KeyValueArray::Type;
Error getKeyValueArrayNothrow(KeyValueArray& out) const noexcept {
auto out_more_native = native::fdb_bool_t{};
auto& [out_kv, out_count, out_more] = out;
auto err_raw = native::fdb_result_get_keyvalue_array(r.get(), &out_kv, &out_count, &out_more_native);
out_more = out_more_native != 0;
return Error(err_raw);
}
KeyValueArray getKeyValueArray() const {
auto ret = KeyValueArray{};
if (auto err = getKeyValueArrayNothrow(ret))
throwError("ERROR: result_get_keyvalue_array(): ", err);
return ret;
}
};
class Future {
protected:
friend class Transaction;
std::shared_ptr<native::FDBFuture> f;
Future(native::FDBFuture* future) {
if (future)
f = std::shared_ptr<native::FDBFuture>(future, &native::fdb_future_destroy);
}
// wrap any capturing lambda as callback passable to fdb_future_set_callback().
// destroy after invocation.
template <class Fn>
static void callback(native::FDBFuture*, void* param) {
auto fp = static_cast<Fn*>(param);
try {
(*fp)();
} catch (const std::exception& e) {
fmt::print(stderr, "ERROR: Exception thrown in user callback: {}", e.what());
}
delete fp;
}
// set as callback user-defined completion handler of signature void(Future)
template <class FutureType, class UserFunc>
void then(UserFunc&& fn) {
auto cb = [fut = FutureType(*this), fn = std::forward<UserFunc>(fn)]() { fn(fut); };
using cb_type = std::decay_t<decltype(cb)>;
auto fp = new cb_type(std::move(cb));
if (auto err = Error(native::fdb_future_set_callback(f.get(), &callback<cb_type>, fp))) {
throwError("ERROR: future_set_callback: ", err);
}
}
public:
Future() noexcept : Future(nullptr) {}
Future(const Future&) noexcept = default;
Future& operator=(const Future&) noexcept = default;
bool valid() const noexcept { return f != nullptr; }
explicit operator bool() const noexcept { return valid(); }
bool ready() const noexcept {
assert(valid());
return native::fdb_future_is_ready(f.get()) != 0;
}
Error blockUntilReady() const noexcept {
assert(valid());
return Error(native::fdb_future_block_until_ready(f.get()));
}
Error error() const noexcept {
assert(valid());
return Error(native::fdb_future_get_error(f.get()));
}
void cancel() noexcept { native::fdb_future_cancel(f.get()); }
template <class VarTraits>
typename VarTraits::Type get() const {
assert(valid());
assert(!error());
auto out = typename VarTraits::Type{};
if (auto err = VarTraits::extract(f.get(), out)) {
throwError("future_get: ", err);
}
return out;
}
template <class VarTraits>
Error getNothrow(typename VarTraits::Type& var) const noexcept {
assert(valid());
assert(!error());
auto out = typename VarTraits::Type{};
return VarTraits::extract(f.get(), out);
}
template <class UserFunc>
void then(UserFunc&& fn) {
then<Future>(std::forward<UserFunc>(fn));
}
};
template <typename VarTraits>
class TypedFuture : public Future {
friend class Future;
friend class Transaction;
using SelfType = TypedFuture<VarTraits>;
using Future::Future;
// hide type-unsafe inherited functions
using Future::get;
using Future::getNothrow;
using Future::then;
TypedFuture(const Future& f) noexcept : Future(f) {}
public:
using ContainedType = typename VarTraits::Type;
Future eraseType() const noexcept { return static_cast<Future const&>(*this); }
ContainedType get() const { return get<VarTraits>(); }
Error getNothrow(ContainedType& out) const noexcept { return getNothrow<VarTraits>(out); }
template <class UserFunc>
void then(UserFunc&& fn) {
Future::then<SelfType>(std::forward<UserFunc>(fn));
}
};
struct KeySelector {
const uint8_t* key;
int keyLength;
bool orEqual;
int offset;
};
namespace key_select {
inline KeySelector firstGreaterThan(KeyRef key, int offset = 0) {
return KeySelector{ FDB_KEYSEL_FIRST_GREATER_THAN(key.data(), intSize(key)) + offset };
}
inline KeySelector firstGreaterOrEqual(KeyRef key, int offset = 0) {
return KeySelector{ FDB_KEYSEL_FIRST_GREATER_OR_EQUAL(key.data(), intSize(key)) + offset };
}
inline KeySelector lastLessThan(KeyRef key, int offset = 0) {
return KeySelector{ FDB_KEYSEL_LAST_LESS_THAN(key.data(), intSize(key)) + offset };
}
inline KeySelector lastLessOrEqual(KeyRef key, int offset = 0) {
return KeySelector{ FDB_KEYSEL_LAST_LESS_OR_EQUAL(key.data(), intSize(key)) + offset };
}
} // namespace key_select
class Transaction {
friend class Database;
std::shared_ptr<native::FDBTransaction> tr;
explicit Transaction(native::FDBTransaction* tr_raw) {
if (tr_raw)
tr = std::shared_ptr<native::FDBTransaction>(tr_raw, &native::fdb_transaction_destroy);
}
public:
Transaction() noexcept : Transaction(nullptr) {}
Transaction(const Transaction&) noexcept = default;
Transaction& operator=(const Transaction&) noexcept = default;
bool valid() const noexcept { return tr != nullptr; }
explicit operator bool() const noexcept { return valid(); }
Error setOptionNothrow(FDBTransactionOption option, int64_t value) noexcept {
return Error(native::fdb_transaction_set_option(
tr.get(), option, reinterpret_cast<const uint8_t*>(&value), static_cast<int>(sizeof(value))));
}
Error setOptionNothrow(FDBTransactionOption option, BytesRef str) noexcept {
return Error(native::fdb_transaction_set_option(tr.get(), option, str.data(), intSize(str)));
}
void setOption(FDBTransactionOption option, int64_t value) {
if (auto err = setOptionNothrow(option, value)) {
throwError(fmt::format("transaction_set_option({}, {}) returned error: ",
static_cast<std::underlying_type_t<FDBTransactionOption>>(option),
value),
err);
}
}
void setOption(FDBTransactionOption option, BytesRef str) {
if (auto err = setOptionNothrow(option, str)) {
throwError(fmt::format("transaction_set_option({}) returned error: ",
static_cast<std::underlying_type_t<FDBTransactionOption>>(option)),
err);
}
}
TypedFuture<future_var::Int64> getReadVersion() { return native::fdb_transaction_get_read_version(tr.get()); }
Error getCommittedVersionNothrow(int64_t& out) {
return Error(native::fdb_transaction_get_committed_version(tr.get(), &out));
}
int64_t getCommittedVersion() {
auto out = int64_t{};
if (auto err = getCommittedVersionNothrow(out)) {
throwError("get_committed_version: ", err);
}
return out;
}
TypedFuture<future_var::Key> getKey(KeySelector sel, bool snapshot) {
return native::fdb_transaction_get_key(tr.get(), sel.key, sel.keyLength, sel.orEqual, sel.offset, snapshot);
}
TypedFuture<future_var::Value> get(KeyRef key, bool snapshot) {
return native::fdb_transaction_get(tr.get(), key.data(), intSize(key), snapshot);
}
// Usage: tx.getRange(key_select::firstGreaterOrEqual(firstKey), key_select::lastLessThan(lastKey), ...)
// gets key-value pairs in key range [begin, end)
TypedFuture<future_var::KeyValueArray> getRange(KeySelector first,
KeySelector last,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
bool snapshot,
bool reverse) {
return native::fdb_transaction_get_range(tr.get(),
first.key,
first.keyLength,
first.orEqual,
first.offset,
last.key,
last.keyLength,
last.orEqual,
last.offset,
limit,
target_bytes,
mode,
iteration,
snapshot,
reverse);
}
Result readBlobGranules(KeyRef begin,
KeyRef end,
int64_t begin_version,
int64_t read_version,
native::FDBReadBlobGranuleContext context) {
return Result(native::fdb_transaction_read_blob_granules(
tr.get(), begin.data(), intSize(begin), end.data(), intSize(end), begin_version, read_version, context));
}
TypedFuture<future_var::None> commit() { return native::fdb_transaction_commit(tr.get()); }
TypedFuture<future_var::None> onError(Error err) { return native::fdb_transaction_on_error(tr.get(), err.code()); }
void reset() { return native::fdb_transaction_reset(tr.get()); }
void set(KeyRef key, ValueRef value) {
native::fdb_transaction_set(tr.get(), key.data(), intSize(key), value.data(), intSize(value));
}
void clear(KeyRef key) { native::fdb_transaction_clear(tr.get(), key.data(), intSize(key)); }
void clearRange(KeyRef begin, KeyRef end) {
native::fdb_transaction_clear_range(tr.get(), begin.data(), intSize(begin), end.data(), intSize(end));
}
};
class Database {
std::shared_ptr<native::FDBDatabase> db;
public:
Database(const Database&) noexcept = default;
Database& operator=(const Database&) noexcept = default;
Database(const std::string& cluster_file_path) : db(nullptr) {
auto db_raw = static_cast<native::FDBDatabase*>(nullptr);
if (auto err = Error(native::fdb_create_database(cluster_file_path.c_str(), &db_raw)))
throwError(fmt::format("Failed to create database with '{}': ", cluster_file_path), err);
db = std::shared_ptr<native::FDBDatabase>(db_raw, &native::fdb_database_destroy);
}
Database() noexcept : db(nullptr) {}
Error setOptionNothrow(FDBDatabaseOption option, int64_t value) noexcept {
return Error(native::fdb_database_set_option(
db.get(), option, reinterpret_cast<const uint8_t*>(&value), static_cast<int>(sizeof(value))));
}
Error setOptionNothrow(FDBDatabaseOption option, BytesRef str) noexcept {
return Error(native::fdb_database_set_option(db.get(), option, str.data(), intSize(str)));
}
void setOption(FDBDatabaseOption option, int64_t value) {
if (auto err = setOptionNothrow(option, value)) {
throwError(fmt::format("database_set_option({}, {}) returned error: ",
static_cast<std::underlying_type_t<FDBDatabaseOption>>(option),
value),
err);
}
}
void setOption(FDBDatabaseOption option, BytesRef str) {
if (auto err = setOptionNothrow(option, str)) {
throwError(fmt::format("database_set_option({}) returned error: ",
static_cast<std::underlying_type_t<FDBDatabaseOption>>(option)),
err);
}
}
Transaction createTransaction() {
if (!db)
throw std::runtime_error("create_transaction from null database");
auto tx_native = static_cast<native::FDBTransaction*>(nullptr);
auto err = Error(native::fdb_database_create_transaction(db.get(), &tx_native));
if (err)
throwError("Failed to create transaction: ", err);
return Transaction(tx_native);
}
};
} // namespace fdb
#endif /*FDB_API_HPP*/

View File

@ -0,0 +1,288 @@
/*
* async.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <boost/asio.hpp>
#include "async.hpp"
#include "future.hpp"
#include "logger.hpp"
#include "operations.hpp"
#include "stats.hpp"
#include "time.hpp"
#include "utils.hpp"
extern thread_local mako::Logger logr;
using namespace fdb;
namespace mako {
void ResumableStateForPopulate::postNextTick() {
boost::asio::post(io_context, [this, state = shared_from_this()]() { runOneTick(); });
}
void ResumableStateForPopulate::runOneTick() {
const auto num_commit_every = args.txnspec.ops[OP_INSERT][OP_COUNT];
for (auto i = key_checkpoint; i <= key_end; i++) {
genKey(keystr.data(), KEY_PREFIX, args, i);
randomString(valstr.data(), args.value_length);
tx.set(keystr, valstr);
stats.incrOpCount(OP_INSERT);
if (i == key_end || (i - key_begin + 1) % num_commit_every == 0) {
watch_commit.start();
tx.commit().then([this, state = shared_from_this(), i](Future f) {
if (auto err = f.error()) {
logr.printWithLogLevel(err.retryable() ? VERBOSE_WARN : VERBOSE_NONE,
"ERROR",
"commit for populate returned '{}'",
err.what());
tx.onError(err).then([this, state = shared_from_this()](Future f) {
const auto f_rc = handleForOnError(tx, f, "ON_ERROR_FOR_POPULATE");
if (f_rc == FutureRC::ABORT) {
signalEnd();
return;
} else {
postNextTick();
}
});
} else {
// successfully committed
watch_commit.stop();
watch_tx.setStop(watch_commit.getStop());
if (stats.getOpCount(OP_TRANSACTION) % args.sampling == 0) {
const auto commit_latency = watch_commit.diff();
const auto tx_duration = watch_tx.diff();
stats.addLatency(OP_COMMIT, commit_latency);
stats.addLatency(OP_TRANSACTION, tx_duration);
sample_bins[OP_COMMIT].put(commit_latency);
sample_bins[OP_TRANSACTION].put(tx_duration);
}
stats.incrOpCount(OP_COMMIT);
stats.incrOpCount(OP_TRANSACTION);
tx.reset();
watch_tx.startFromStop();
key_checkpoint = i + 1;
if (i != key_end) {
postNextTick();
} else {
logr.debug("Populated {} rows [{}, {}]: {:6.3f} sec",
key_end - key_begin + 1,
key_begin,
key_end,
toDoubleSeconds(watch_total.stop().diff()));
signalEnd();
return;
}
}
});
break;
}
}
}
void ResumableStateForRunWorkload::postNextTick() {
boost::asio::post(io_context, [this, state = shared_from_this()]() { runOneTick(); });
}
void ResumableStateForRunWorkload::runOneTick() {
assert(iter != OpEnd);
if (iter.step == 0 /* first step */)
prepareKeys(iter.op, key1, key2, args);
watch_step.start();
if (iter.step == 0)
watch_op = Stopwatch(watch_step.getStart());
auto f = Future{};
// to minimize context switch overhead, repeat immediately completed ops
// in a loop, not an async continuation.
repeat_immediate_steps:
f = opTable[iter.op].stepFunction(iter.step)(tx, args, key1, key2, val);
if (!f) {
// immediately completed client-side ops: e.g. set, setrange, clear, clearrange, ...
updateStepStats();
iter = getOpNext(args, iter);
if (iter == OpEnd)
onTransactionSuccess();
else
goto repeat_immediate_steps;
} else {
// step is blocking. register a continuation and return
f.then([this, state = shared_from_this()](Future f) {
if (auto postStepFn = opTable[iter.op].postStepFunction(iter.step))
postStepFn(f, tx, args, key1, key2, val);
if (iter.stepKind() != StepKind::ON_ERROR) {
if (auto err = f.error()) {
logr.printWithLogLevel(err.retryable() ? VERBOSE_WARN : VERBOSE_NONE,
"ERROR",
"{}:{} returned '{}'",
iter.opName(),
iter.step,
err.what());
tx.onError(err).then([this, state = shared_from_this()](Future f) {
const auto rc = handleForOnError(tx, f, fmt::format("{}:{}", iter.opName(), iter.step));
if (rc == FutureRC::RETRY) {
stats.incrErrorCount(iter.op);
} else if (rc == FutureRC::CONFLICT) {
stats.incrConflictCount();
} else if (rc == FutureRC::ABORT) {
tx.reset();
signalEnd();
return;
}
// restart this iteration from beginning
iter = getOpBegin(args);
needs_commit = false;
postNextTick();
});
} else {
// async step succeeded
updateStepStats();
iter = getOpNext(args, iter);
if (iter == OpEnd) {
onTransactionSuccess();
} else {
postNextTick();
}
}
} else {
// blob granules op error
auto rc = handleForOnError(tx, f, "BG_ON_ERROR");
if (rc == FutureRC::RETRY) {
stats.incrErrorCount(iter.op);
} else if (rc == FutureRC::CONFLICT) {
stats.incrConflictCount();
} else if (rc == FutureRC::ABORT) {
tx.reset();
stopcount.fetch_add(1);
return;
}
iter = getOpBegin(args);
needs_commit = false;
// restart this iteration from beginning
postNextTick();
}
});
}
}
void ResumableStateForRunWorkload::updateStepStats() {
logr.debug("Step {}:{} succeeded", iter.opName(), iter.step);
// step successful
watch_step.stop();
const auto do_sample = stats.getOpCount(OP_TRANSACTION) % args.sampling == 0;
if (iter.stepKind() == StepKind::COMMIT) {
// reset transaction boundary
const auto step_latency = watch_step.diff();
if (do_sample) {
stats.addLatency(OP_COMMIT, step_latency);
sample_bins[OP_COMMIT].put(step_latency);
}
tx.reset();
stats.incrOpCount(OP_COMMIT);
needs_commit = false;
}
// op completed successfully
if (iter.step + 1 == opTable[iter.op].steps()) {
if (opTable[iter.op].needsCommit())
needs_commit = true;
watch_op.setStop(watch_step.getStop());
if (do_sample) {
const auto op_latency = watch_op.diff();
stats.addLatency(iter.op, op_latency);
sample_bins[iter.op].put(op_latency);
}
stats.incrOpCount(iter.op);
}
}
void ResumableStateForRunWorkload::onTransactionSuccess() {
if (needs_commit || args.commit_get) {
// task completed, need to commit before finish
watch_commit.start();
tx.commit().then([this, state = shared_from_this()](Future f) {
if (auto err = f.error()) {
// commit had errors
logr.printWithLogLevel(err.retryable() ? VERBOSE_WARN : VERBOSE_NONE,
"ERROR",
"Post-iteration commit returned error: {}",
err.what());
tx.onError(err).then([this, state = shared_from_this()](Future f) {
const auto rc = handleForOnError(tx, f, "ON_ERROR");
if (rc == FutureRC::CONFLICT)
stats.incrConflictCount();
else
stats.incrErrorCount(OP_COMMIT);
if (rc == FutureRC::ABORT) {
signalEnd();
return;
}
if (ended()) {
signalEnd();
} else {
iter = getOpBegin(args);
needs_commit = false;
postNextTick();
}
});
} else {
// commit successful
watch_commit.stop();
watch_tx.setStop(watch_commit.getStop());
if (stats.getOpCount(OP_TRANSACTION) % args.sampling == 0) {
const auto commit_latency = watch_commit.diff();
const auto tx_duration = watch_tx.diff();
stats.addLatency(OP_COMMIT, commit_latency);
stats.addLatency(OP_TRANSACTION, commit_latency);
sample_bins[OP_COMMIT].put(commit_latency);
sample_bins[OP_TRANSACTION].put(tx_duration);
}
stats.incrOpCount(OP_COMMIT);
stats.incrOpCount(OP_TRANSACTION);
tx.reset();
watch_tx.startFromStop();
if (ended()) {
signalEnd();
} else {
// start next iteration
iter = getOpBegin(args);
postNextTick();
}
}
});
} else {
// transaction completed but no need to commit
watch_tx.stop();
if (stats.getOpCount(OP_TRANSACTION) % args.sampling == 0) {
const auto tx_duration = watch_tx.diff();
stats.addLatency(OP_TRANSACTION, tx_duration);
sample_bins[OP_TRANSACTION].put(tx_duration);
}
stats.incrOpCount(OP_TRANSACTION);
watch_tx.startFromStop();
tx.reset();
if (ended()) {
signalEnd();
} else {
iter = getOpBegin(args);
// start next iteration
postNextTick();
}
}
}
} // namespace mako

View File

@ -0,0 +1,127 @@
/*
* async.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_ASYNC_HPP
#define MAKO_ASYNC_HPP
#include <atomic>
#include <memory>
#include <boost/asio.hpp>
#include "logger.hpp"
#include "mako.hpp"
#include "shm.hpp"
#include "stats.hpp"
#include "time.hpp"
namespace mako {
// as we don't have coroutines yet, we need to store in heap the complete state of execution,
// such that we can resume exactly where we were from last database op.
struct ResumableStateForPopulate : std::enable_shared_from_this<ResumableStateForPopulate> {
Logger logr;
fdb::Database db;
fdb::Transaction tx;
boost::asio::io_context& io_context;
Arguments const& args;
ThreadStatistics& stats;
std::atomic<int>& stopcount;
LatencySampleBinArray sample_bins;
int key_begin;
int key_end;
int key_checkpoint;
fdb::ByteString keystr;
fdb::ByteString valstr;
Stopwatch watch_tx;
Stopwatch watch_commit;
Stopwatch watch_total;
ResumableStateForPopulate(Logger logr,
fdb::Database db,
fdb::Transaction tx,
boost::asio::io_context& io_context,
Arguments const& args,
ThreadStatistics& stats,
std::atomic<int>& stopcount,
int key_begin,
int key_end)
: logr(logr), db(db), tx(tx), io_context(io_context), args(args), stats(stats), stopcount(stopcount),
key_begin(key_begin), key_end(key_end), key_checkpoint(key_begin) {
keystr.resize(args.key_length);
valstr.resize(args.value_length);
}
void runOneTick();
void postNextTick();
void signalEnd() { stopcount.fetch_add(1); }
};
using PopulateStateHandle = std::shared_ptr<ResumableStateForPopulate>;
struct ResumableStateForRunWorkload : std::enable_shared_from_this<ResumableStateForRunWorkload> {
Logger logr;
fdb::Database db;
fdb::Transaction tx;
boost::asio::io_context& io_context;
Arguments const& args;
ThreadStatistics& stats;
std::atomic<int>& stopcount;
std::atomic<int> const& signal;
int max_iters;
OpIterator iter;
LatencySampleBinArray sample_bins;
fdb::ByteString key1;
fdb::ByteString key2;
fdb::ByteString val;
Stopwatch watch_step;
Stopwatch watch_op;
Stopwatch watch_commit;
Stopwatch watch_tx;
bool needs_commit;
ResumableStateForRunWorkload(Logger logr,
fdb::Database db,
fdb::Transaction tx,
boost::asio::io_context& io_context,
Arguments const& args,
ThreadStatistics& stats,
std::atomic<int>& stopcount,
std::atomic<int> const& signal,
int max_iters,
OpIterator iter)
: logr(logr), db(db), tx(tx), io_context(io_context), args(args), stats(stats), stopcount(stopcount),
signal(signal), max_iters(max_iters), iter(iter), needs_commit(false) {
key1.resize(args.key_length);
key2.resize(args.key_length);
val.resize(args.value_length);
}
void signalEnd() noexcept { stopcount.fetch_add(1); }
bool ended() noexcept {
return (max_iters != -1 && max_iters >= stats.getOpCount(OP_TRANSACTION)) || signal.load() == SIGNAL_RED;
}
void postNextTick();
void runOneTick();
void updateStepStats();
void onTransactionSuccess();
};
using RunWorkloadStateHandle = std::shared_ptr<ResumableStateForRunWorkload>;
} // namespace mako
#endif /*MAKO_ASYNC_HPP*/

View File

@ -0,0 +1,116 @@
/*
* blob_granules.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "blob_granules.hpp"
#include "limit.hpp"
#include "logger.hpp"
#include <cstdio>
#include <fdb_api.hpp>
extern thread_local mako::Logger logr;
namespace mako::blob_granules::local_file {
int64_t startLoad(const char* filename,
int filenameLength,
int64_t offset,
int64_t length,
int64_t fullFileLength,
void* userContext) {
FILE* fp;
char full_fname[PATH_MAX]{
0,
};
int loadId;
uint8_t* data;
size_t readSize;
auto context = static_cast<UserContext*>(userContext);
loadId = context->nextId;
if (context->dataById[loadId] != 0) {
logr.error("too many granule file loads at once: {}", MAX_BG_IDS);
return -1;
}
context->nextId = (context->nextId + 1) % MAX_BG_IDS;
int ret = snprintf(full_fname, PATH_MAX, "%s%s", context->bgFilePath, filename);
if (ret < 0 || ret >= PATH_MAX) {
logr.error("BG filename too long: {}{}", context->bgFilePath, filename);
return -1;
}
fp = fopen(full_fname, "r");
if (!fp) {
logr.error("BG could not open file: {}", full_fname);
return -1;
}
// don't seek if offset == 0
if (offset && fseek(fp, offset, SEEK_SET)) {
// if fseek was non-zero, it failed
logr.error("BG could not seek to %{} in file {}", offset, full_fname);
fclose(fp);
return -1;
}
data = new uint8_t[length];
readSize = fread(data, sizeof(uint8_t), length, fp);
fclose(fp);
if (readSize != length) {
logr.error("BG could not read {} bytes from file: {}", length, full_fname);
return -1;
}
context->dataById[loadId] = data;
return loadId;
}
uint8_t* getLoad(int64_t loadId, void* userContext) {
auto context = static_cast<UserContext*>(userContext);
if (context->dataById[loadId] == 0) {
logr.error("BG loadId invalid for get_load: {}", loadId);
return 0;
}
return context->dataById[loadId];
}
void freeLoad(int64_t loadId, void* userContext) {
auto context = static_cast<UserContext*>(userContext);
if (context->dataById[loadId] == 0) {
logr.error("BG loadId invalid for free_load: {}", loadId);
}
delete[] context->dataById[loadId];
context->dataById[loadId] = 0;
}
fdb::native::FDBReadBlobGranuleContext createApiContext(UserContext& ctx, bool materialize_files) {
auto ret = fdb::native::FDBReadBlobGranuleContext{};
ret.userContext = &ctx;
ret.start_load_f = &startLoad;
ret.get_load_f = &getLoad;
ret.free_load_f = &freeLoad;
ret.debugNoMaterialize = !materialize_files;
ret.granuleParallelism = 2; // TODO make knob or setting for changing this?
return ret;
}
} // namespace mako::blob_granules::local_file

View File

@ -0,0 +1,50 @@
/*
* blob_granules.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_BLOB_GRANULES_HPP
#define MAKO_BLOB_GRANULES_HPP
#include <cstdint>
#include <memory>
#include <fdb_api.hpp>
namespace mako::blob_granules::local_file {
constexpr const int MAX_BG_IDS = 1000;
// TODO: could always abstract this into something more generically usable by something other than mako.
// But outside of testing there are likely few use cases for local granules
struct UserContext {
char const* bgFilePath;
int nextId;
std::unique_ptr<uint8_t*[]> dataByIdMem;
uint8_t** dataById;
UserContext(char const* filePath)
: bgFilePath(filePath), nextId(0), dataByIdMem(new uint8_t*[MAX_BG_IDS]()), dataById(dataByIdMem.get()) {}
void clear() { dataByIdMem.reset(); }
};
fdb::native::FDBReadBlobGranuleContext createApiContext(UserContext& ctx, bool materialize_files);
} // namespace mako::blob_granules::local_file
#endif /*MAKO_BLOB_GRANULES_HPP*/

View File

@ -0,0 +1,89 @@
/*
* future.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_FUTURE_HPP
#define MAKO_FUTURE_HPP
#include <fdb_api.hpp>
#include <cassert>
#include <string_view>
#include "logger.hpp"
#include "macro.hpp"
extern thread_local mako::Logger logr;
namespace mako {
enum class FutureRC { OK, RETRY, CONFLICT, ABORT };
template <class FutureType>
force_inline FutureRC handleForOnError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
if (auto err = f.error()) {
if (err.is(1020 /*not_committed*/)) {
return FutureRC::CONFLICT;
} else if (err.retryable()) {
logr.warn("Retryable error '{}' found at on_error(), step: {}", err.what(), step);
return FutureRC::RETRY;
} else {
logr.error("Unretryable error '{}' found at on_error(), step: {}", err.what(), step);
tx.reset();
return FutureRC::ABORT;
}
} else {
return FutureRC::RETRY;
}
}
template <class FutureType>
force_inline FutureRC waitAndHandleForOnError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
assert(f);
if (auto err = f.blockUntilReady()) {
logr.error("'{}' found while waiting for on_error() future, step: {}", err.what(), step);
return FutureRC::ABORT;
}
return handleForOnError(tx, f, step);
}
// wait on any non-immediate tx-related step to complete. Follow up with on_error().
template <class FutureType>
force_inline FutureRC waitAndHandleError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
assert(f);
auto err = fdb::Error{};
if ((err = f.blockUntilReady())) {
const auto retry = err.retryable();
logr.error("{} error '{}' found during step: {}", (retry ? "Retryable" : "Unretryable"), err.what(), step);
return retry ? FutureRC::RETRY : FutureRC::ABORT;
}
err = f.error();
if (!err)
return FutureRC::OK;
if (err.retryable()) {
logr.warn("step {} returned '{}'", step, err.what());
} else {
logr.error("step {} returned '{}'", step, err.what());
}
// implicit backoff
auto follow_up = tx.onError(err);
return waitAndHandleForOnError(tx, f, step);
}
} // namespace mako
#endif /*MAKO_FUTURE_HPP*/

View File

@ -0,0 +1,32 @@
/*
* limit.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LIMIT_HPP
#define LIMIT_HPP
#if defined(__linux__)
#include <linux/limits.h>
#elif defined(__APPLE__)
#include <sys/syslimits.h>
#else
#include <limits.h>
#endif
#endif

View File

@ -0,0 +1,117 @@
/*
* logger.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_LOGGER_HPP
#define MAKO_LOGGER_HPP
#include <fmt/format.h>
#include <cassert>
#include <cstdio>
#include <iterator>
#include <string_view>
#include "process.hpp"
namespace mako {
constexpr const int VERBOSE_NONE = 0; // will still print errors
constexpr const int VERBOSE_DEFAULT = 1; // will print info and work stats
constexpr const int VERBOSE_WARN = 2; // will print expected errors
constexpr const int VERBOSE_DEBUG = 3; // will print everything
template <ProcKind P>
using ProcKindConstant = std::integral_constant<ProcKind, P>;
using MainProcess = ProcKindConstant<ProcKind::MAIN>;
using StatsProcess = ProcKindConstant<ProcKind::STATS>;
using WorkerProcess = ProcKindConstant<ProcKind::WORKER>;
class Logger {
ProcKind proc;
int verbosity{ VERBOSE_DEFAULT };
int process_id{ -1 };
int thread_id{ -1 };
void putHeader(fmt::memory_buffer& buf, std::string_view category) {
if (proc == ProcKind::MAIN) {
fmt::format_to(std::back_inserter(buf), "[MAIN] {}: ", category);
} else if (proc == ProcKind::STATS) {
fmt::format_to(std::back_inserter(buf), "[STATS] {}: ", category);
} else {
if (thread_id == -1) {
fmt::format_to(std::back_inserter(buf), "[WORKER{:3d}] {}: ", process_id + 1, category);
} else {
fmt::format_to(
std::back_inserter(buf), "[WORKER{:3d}:{:3d}] {}: ", process_id + 1, thread_id + 1, category);
}
}
}
public:
Logger(MainProcess, int verbosity) noexcept : proc(MainProcess::value), verbosity(verbosity) {}
Logger(StatsProcess, int verbosity) noexcept : proc(StatsProcess::value), verbosity(verbosity) {}
Logger(WorkerProcess, int verbosity, int process_id, int thread_id = -1) noexcept
: proc(WorkerProcess::value), verbosity(verbosity), process_id(process_id), thread_id(thread_id) {}
Logger(const Logger&) noexcept = default;
Logger& operator=(const Logger&) noexcept = default;
void setVerbosity(int value) noexcept {
assert(value >= VERBOSE_NONE && value <= VERBOSE_DEBUG);
verbosity = value;
}
template <typename... Args>
void printWithLogLevel(int log_level, std::string_view header, Args&&... args) {
assert(log_level >= VERBOSE_NONE && log_level <= VERBOSE_DEBUG);
if (log_level <= verbosity) {
const auto fp = log_level == VERBOSE_NONE ? stderr : stdout;
// 500B inline buffer
auto buf = fmt::memory_buffer{};
putHeader(buf, header);
fmt::format_to(std::back_inserter(buf), std::forward<Args>(args)...);
fmt::print(fp, "{}\n", std::string_view(buf.data(), buf.size()));
}
}
template <typename... Args>
void error(Args&&... args) {
printWithLogLevel(VERBOSE_NONE, "ERROR", std::forward<Args>(args)...);
}
template <typename... Args>
void info(Args&&... args) {
printWithLogLevel(VERBOSE_DEFAULT, "INFO", std::forward<Args>(args)...);
}
template <typename... Args>
void warn(Args&&... args) {
printWithLogLevel(VERBOSE_WARN, "WARNING", std::forward<Args>(args)...);
}
template <typename... Args>
void debug(Args&&... args) {
printWithLogLevel(VERBOSE_DEBUG, "DEBUG", std::forward<Args>(args)...);
}
};
} // namespace mako
#endif /*MAKO_LOGGER_HPP*/

View File

@ -0,0 +1,32 @@
/*
* macro.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_MACRO_HPP
#define MAKO_MACRO_HPP
#if defined(__GNUG__)
#define force_inline inline __attribute__((__always_inline__))
#elif defined(_MSC_VER)
#define force_inline __forceinline
#else
#error Missing force inline
#endif
#endif /*MAKO_MACRO_HPP*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,209 +0,0 @@
#ifndef MAKO_H
#define MAKO_H
#pragma once
#ifndef FDB_API_VERSION
#define FDB_API_VERSION 720
#endif
#include <foundationdb/fdb_c.h>
#include <pthread.h>
#include <sys/types.h>
#include <stdbool.h>
#if defined(__linux__)
#include <linux/limits.h>
#elif defined(__APPLE__)
#include <sys/syslimits.h>
#else
#include <limits.h>
#endif
#define VERBOSE_NONE 0
#define VERBOSE_DEFAULT 1
#define VERBOSE_ANNOYING 2
#define VERBOSE_DEBUG 3
#define MODE_INVALID -1
#define MODE_CLEAN 0
#define MODE_BUILD 1
#define MODE_RUN 2
#define FDB_SUCCESS 0
#define FDB_ERROR_RETRY -1
#define FDB_ERROR_ABORT -2
#define FDB_ERROR_CONFLICT -3
#define LAT_BLOCK_SIZE 511 /* size of each block to get detailed latency for each operation */
/* transaction specification */
enum Operations {
OP_GETREADVERSION,
OP_GET,
OP_GETRANGE,
OP_SGET,
OP_SGETRANGE,
OP_UPDATE,
OP_INSERT,
OP_INSERTRANGE,
OP_OVERWRITE,
OP_CLEAR,
OP_SETCLEAR,
OP_CLEARRANGE,
OP_SETCLEARRANGE,
OP_COMMIT,
OP_TRANSACTION, /* pseudo-operation - cumulative time for the operation + commit */
OP_READ_BG,
MAX_OP /* must be the last item */
};
#define OP_COUNT 0
#define OP_RANGE 1
#define OP_REVERSE 2
/* for long arguments */
enum Arguments {
ARG_KEYLEN,
ARG_VALLEN,
ARG_TPS,
ARG_COMMITGET,
ARG_SAMPLING,
ARG_VERSION,
ARG_KNOBS,
ARG_FLATBUFFERS,
ARG_LOGGROUP,
ARG_PREFIXPADDING,
ARG_TRACE,
ARG_TRACEPATH,
ARG_TRACEFORMAT,
ARG_TPSMAX,
ARG_TPSMIN,
ARG_TPSINTERVAL,
ARG_TPSCHANGE,
ARG_TXNTRACE,
ARG_TXNTAGGING,
ARG_TXNTAGGINGPREFIX,
ARG_STREAMING_MODE,
ARG_DISABLE_RYW,
ARG_CLIENT_THREADS_PER_VERSION,
ARG_JSON_REPORT,
ARG_BG_FILE_PATH // if blob granule files are stored locally, mako will read and materialize them if this is set
};
enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE };
#define KEYPREFIX "mako"
#define KEYPREFIXLEN 4
#define TEMP_DATA_STORE "/tmp/makoTemp"
/* we set mako_txnspec_t and mako_args_t only once in the master process,
* and won't be touched by child processes.
*/
typedef struct {
/* for each operation, it stores "count", "range" and "reverse" */
int ops[MAX_OP][3];
} mako_txnspec_t;
#define LOGGROUP_MAX 256
#define KNOB_MAX 256
#define TAGPREFIXLENGTH_MAX 8
#define NUM_CLUSTERS_MAX 3
#define NUM_DATABASES_MAX 10
#define MAX_BG_IDS 1000
/* benchmark parameters */
typedef struct {
int api_version;
int json;
int num_processes;
int num_threads;
int mode;
int rows; /* is 2 billion enough? */
int seconds;
int iteration;
int tpsmax;
int tpsmin;
int tpsinterval;
int tpschange;
int sampling;
int key_length;
int value_length;
int zipf;
int commit_get;
int verbose;
mako_txnspec_t txnspec;
char cluster_files[NUM_CLUSTERS_MAX][PATH_MAX];
int num_fdb_clusters;
int num_databases;
char log_group[LOGGROUP_MAX];
int prefixpadding;
int trace;
char tracepath[PATH_MAX];
int traceformat; /* 0 - XML, 1 - JSON */
char knobs[KNOB_MAX];
uint8_t flatbuffers;
int txntrace;
int txntagging;
char txntagging_prefix[TAGPREFIXLENGTH_MAX];
FDBStreamingMode streaming_mode;
int client_threads_per_version;
int disable_ryw;
char json_output_path[PATH_MAX];
bool bg_materialize_files;
char bg_file_path[PATH_MAX];
} mako_args_t;
/* shared memory */
#define SIGNAL_RED 0
#define SIGNAL_GREEN 1
#define SIGNAL_OFF 2
typedef struct {
int signal;
int readycount;
double throttle_factor;
int stopcount;
} mako_shmhdr_t;
/* memory block allocated to each operation when collecting detailed latency */
typedef struct {
uint64_t data[LAT_BLOCK_SIZE];
void* next_block;
} lat_block_t;
typedef struct {
uint64_t xacts;
uint64_t conflicts;
uint64_t ops[MAX_OP];
uint64_t errors[MAX_OP];
uint64_t latency_samples[MAX_OP];
uint64_t latency_us_total[MAX_OP];
uint64_t latency_us_min[MAX_OP];
uint64_t latency_us_max[MAX_OP];
} mako_stats_t;
/* per-process information */
typedef struct {
int worker_id;
pid_t parent_id;
mako_args_t* args;
mako_shmhdr_t* shm;
FDBDatabase* databases[NUM_DATABASES_MAX];
} process_info_t;
/* args for threads */
typedef struct {
int thread_id;
int database_index; // index of the database to do work to
int elem_size[MAX_OP]; /* stores the multiple of LAT_BLOCK_SIZE to check the memory allocation of each operation */
bool is_memory_allocated[MAX_OP]; /* flag specified for each operation, whether the memory was allocated to that
specific operation */
lat_block_t* block[MAX_OP];
process_info_t* process;
} thread_args_t;
/* process type */
typedef enum { proc_master = 0, proc_worker, proc_stats } proc_type_t;
#endif /* MAKO_H */

View File

@ -0,0 +1,168 @@
/*
* mako.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_HPP
#define MAKO_HPP
#ifndef FDB_API_VERSION
#define FDB_API_VERSION 720
#endif
#include <array>
#include <atomic>
#include <cassert>
#include <chrono>
#include <list>
#include <vector>
#include <string_view>
#include <fdb_api.hpp>
#include <pthread.h>
#include <sys/types.h>
#include <stdbool.h>
#include "limit.hpp"
namespace mako {
constexpr const int MODE_INVALID = -1;
constexpr const int MODE_CLEAN = 0;
constexpr const int MODE_BUILD = 1;
constexpr const int MODE_RUN = 2;
/* for long arguments */
enum ArgKind {
ARG_KEYLEN,
ARG_VALLEN,
ARG_TPS,
ARG_ASYNC,
ARG_COMMITGET,
ARG_SAMPLING,
ARG_VERSION,
ARG_KNOBS,
ARG_FLATBUFFERS,
ARG_LOGGROUP,
ARG_PREFIXPADDING,
ARG_TRACE,
ARG_TRACEPATH,
ARG_TRACEFORMAT,
ARG_TPSMAX,
ARG_TPSMIN,
ARG_TPSINTERVAL,
ARG_TPSCHANGE,
ARG_TXNTRACE,
ARG_TXNTAGGING,
ARG_TXNTAGGINGPREFIX,
ARG_STREAMING_MODE,
ARG_DISABLE_RYW,
ARG_CLIENT_THREADS_PER_VERSION,
ARG_JSON_REPORT,
ARG_BG_FILE_PATH // if blob granule files are stored locally, mako will read and materialize them if this is set
};
constexpr const int OP_COUNT = 0;
constexpr const int OP_RANGE = 1;
constexpr const int OP_REVERSE = 2;
/* transaction specification */
enum OpKind {
OP_GETREADVERSION,
OP_GET,
OP_GETRANGE,
OP_SGET,
OP_SGETRANGE,
OP_UPDATE,
OP_INSERT,
OP_INSERTRANGE,
OP_OVERWRITE,
OP_CLEAR,
OP_SETCLEAR,
OP_CLEARRANGE,
OP_SETCLEARRANGE,
OP_COMMIT,
OP_TRANSACTION, /* pseudo-operation - time it takes to run one iteration of ops sequence */
OP_READ_BG,
MAX_OP /* must be the last item */
};
enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE };
/* we set WorkloadSpec and Arguments only once in the master process,
* and won't be touched by child processes.
*/
struct WorkloadSpec {
/* for each operation, it stores "count", "range" and "reverse" */
int ops[MAX_OP][3];
};
constexpr const int LOGGROUP_MAX = 256;
constexpr const int KNOB_MAX = 256;
constexpr const int TAGPREFIXLENGTH_MAX = 8;
constexpr const int NUM_CLUSTERS_MAX = 3;
constexpr const int NUM_DATABASES_MAX = 10;
constexpr const std::string_view KEY_PREFIX{ "mako" };
constexpr const std::string_view TEMP_DATA_STORE{ "/tmp/makoTemp" };
/* benchmark parameters */
struct Arguments {
int api_version;
int json;
int num_processes;
int num_threads;
int async_xacts;
int mode;
int rows; /* is 2 billion enough? */
int row_digits;
int seconds;
int iteration;
int tpsmax;
int tpsmin;
int tpsinterval;
int tpschange;
int sampling;
int key_length;
int value_length;
int zipf;
int commit_get;
int verbose;
WorkloadSpec txnspec;
char cluster_files[NUM_CLUSTERS_MAX][PATH_MAX];
int num_fdb_clusters;
int num_databases;
char log_group[LOGGROUP_MAX];
int prefixpadding;
int trace;
char tracepath[PATH_MAX];
int traceformat; /* 0 - XML, 1 - JSON */
char knobs[KNOB_MAX];
uint8_t flatbuffers;
int txntrace;
int txntagging;
char txntagging_prefix[TAGPREFIXLENGTH_MAX];
FDBStreamingMode streaming_mode;
int64_t client_threads_per_version;
int disable_ryw;
char json_output_path[PATH_MAX];
bool bg_materialize_files;
char bg_file_path[PATH_MAX];
};
} // namespace mako
#endif /* MAKO_HPP */

View File

@ -53,6 +53,13 @@ Arguments
- | ``-t | --threads <threads>`` - | ``-t | --threads <threads>``
| Number of threads per worker process (Default: 1) | Number of threads per worker process (Default: 1)
| With ``--async_xacts <xacts>`` == 0 (Default), each of the ``<threads>`` operates on a transaction object with blocking API calls
| Otherwise, all of the ``<threads>`` run an asynchronous job scheduler, serving ``<xacts>`` transactions
- | ``--async_xacts <xacts>``
| Number of transactions per worker process to run asynchronously (Default: 0)
| ``<xacts>`` > 0 switches the execution mode to non-blocking (See ``-t | --threads``), with the exception of blob granules API
| Note: throttling options, e.g. ``--tpsmax``, ``--tpsmin``, ``--tpschange``, ``--tpsinterval``, are ignored in asynchronous mode
- | ``-r | --rows <rows>`` - | ``-r | --rows <rows>``
| Number of rows initially populated (Default: 100000) | Number of rows initially populated (Default: 100000)

View File

@ -0,0 +1,275 @@
/*
* operations.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "blob_granules.hpp"
#include "operations.hpp"
#include "mako.hpp"
#include "logger.hpp"
#include "utils.hpp"
#include <array>
extern thread_local mako::Logger logr;
namespace mako {
using namespace fdb;
const std::array<Operation, MAX_OP> opTable{
{ { "GRV",
{ { StepKind::READ,
[](Transaction& tx, Arguments const&, ByteString&, ByteString&, ByteString&) {
return tx.getReadVersion().eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString&) {
if (f && !f.error()) {
f.get<future_var::Int64>();
}
} } },
1,
false },
{ "GET",
{ { StepKind::READ,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
return tx.get(key, false /*snapshot*/).eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::Value>();
}
} } },
1,
false },
{ "GETRANGE",
{ { StepKind::READ,
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
return tx
.getRange(key_select::firstGreaterOrEqual(begin),
key_select::lastLessOrEqual(end, 1),
0 /*limit*/,
0 /*target_bytes*/,
args.streaming_mode,
0 /*iteration*/,
false /*snapshot*/,
args.txnspec.ops[OP_GETRANGE][OP_REVERSE])
.eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::KeyValueArray>();
}
} } },
1,
false },
{ "SGET",
{ { StepKind::READ,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
return tx.get(key, true /*snapshot*/).eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::Value>();
}
} } },
1,
false },
{ "SGETRANGE",
{ {
StepKind::READ,
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
return tx
.getRange(key_select::firstGreaterOrEqual(begin),
key_select::lastLessOrEqual(end, 1),
0 /*limit*/,
0 /*target_bytes*/,
args.streaming_mode,
0 /*iteration*/,
true /*snapshot*/,
args.txnspec.ops[OP_GETRANGE][OP_REVERSE])
.eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::KeyValueArray>();
}
} } },
1,
false },
{ "UPDATE",
{ { StepKind::READ,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
return tx.get(key, false /*snapshot*/).eraseType();
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::Value>();
}
} },
{ StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
randomString(value.data(), args.value_length);
tx.set(key, value);
return Future();
} } },
2,
true },
{ "INSERT",
{ { StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
// key[0..args.key_length] := concat(key_prefix, random_string)
randomString(key.data() + intSize(KEY_PREFIX), args.key_length - intSize(KEY_PREFIX));
randomString(value.data(), args.value_length);
tx.set(key, value);
return Future();
} } },
1,
true },
{ "INSERTRANGE",
{ { StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
randomString(value.data(), args.value_length);
// key[0..args.key_length] := concat(prefix, random_string, num[0..range_digits])
const auto range = args.txnspec.ops[OP_INSERTRANGE][OP_RANGE];
assert(range > 0);
const auto range_digits = digits(range);
const auto random_len = args.key_length - intSize(KEY_PREFIX) - range_digits;
randomString(&key[intSize(KEY_PREFIX)], random_len);
for (auto i = 0; i < range; i++) {
numericWithFill(&key[args.key_length - range_digits], range_digits, i);
tx.set(key, value);
}
return Future();
} } },
1,
true },
{ "OVERWRITE",
{ { StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
randomString(value.data(), args.value_length);
tx.set(key, value);
return Future();
} } },
1,
true },
{ "CLEAR",
{ { StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
tx.clear(key);
return Future();
} } },
1,
true },
{ "SETCLEAR",
{ { StepKind::COMMIT,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
randomString(&key[KEY_PREFIX.size()], args.key_length - intSize(KEY_PREFIX));
randomString(value.data(), args.value_length);
tx.set(key, value);
return tx.commit().eraseType();
} },
{ StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
tx.reset(); // assuming commit from step 0 worked.
tx.clear(key); // key should forward unchanged from step 0
return Future();
} } },
2,
true },
{ "CLEARRANGE",
{ { StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
tx.clearRange(begin, end);
return Future();
} } },
1,
true },
{ "SETCLEARRANGE",
{ { StepKind::COMMIT,
[](Transaction& tx, Arguments const& args, ByteString& key_begin, ByteString& key, ByteString& value) {
randomString(value.data(), args.value_length);
// key[0..args.key_length] := concat(prefix, random_string, num[0..range_digits])
const auto range = args.txnspec.ops[OP_SETCLEARRANGE][OP_RANGE];
assert(range > 0);
const auto range_digits = digits(range);
const auto random_len = args.key_length - intSize(KEY_PREFIX) - range_digits;
randomString(&key[KEY_PREFIX.size()], random_len);
for (auto i = 0; i < range; i++) {
numericWithFill(&key[args.key_length - range_digits], range_digits, i);
tx.set(key, value);
if (i == 0)
key_begin.assign(key);
}
return tx.commit().eraseType();
} },
{ StepKind::IMM,
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
tx.reset();
tx.clearRange(begin, end);
return Future();
} } },
2,
true },
{ "COMMIT", { { StepKind::NONE, nullptr } }, 0, false },
{ "TRANSACTION", { { StepKind::NONE, nullptr } }, 0, false },
{ "READBLOBGRANULE",
{ { StepKind::ON_ERROR,
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
auto err = Error{};
err = tx.setOptionNothrow(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, BytesRef());
if (err) {
// Issuing read/writes before disabling RYW results in error.
// Possible malformed workload?
// As workloads execute in sequence, retrying would likely repeat this error.
fmt::print(stderr, "ERROR: TR_OPTION_READ_YOUR_WRITES_DISABLE: {}", err.what());
return Future();
}
// Allocate a separate context per call to avoid multiple threads accessing
auto user_context = blob_granules::local_file::UserContext(args.bg_file_path);
auto api_context = blob_granules::local_file::createApiContext(user_context, args.bg_materialize_files);
auto r = tx.readBlobGranules(begin,
end,
0 /* beginVersion*/,
-2, /* endVersion. -2 (latestVersion) is use txn read version */
api_context);
user_context.clear();
auto out = Result::KeyValueArray{};
err = r.getKeyValueArrayNothrow(out);
if (!err || err.is(2037 /*blob_granule_not_materialized*/))
return Future();
const auto level = (err.is(1020 /*not_committed*/) || err.is(1021 /*commit_unknown_result*/) ||
err.is(1213 /*tag_throttled*/))
? VERBOSE_WARN
: VERBOSE_NONE;
logr.printWithLogLevel(level, "ERROR", "get_keyvalue_array() after readBlobGranules(): {}", err.what());
return tx.onError(err).eraseType();
} } },
1,
false } }
};
} // namespace mako

View File

@ -0,0 +1,140 @@
/*
* operations.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_OPERATIONS_HPP
#define MAKO_OPERATIONS_HPP
#include <fdb_api.hpp>
#include <array>
#include <cassert>
#include <string_view>
#include <tuple>
#include <utility>
#include <vector>
#include "macro.hpp"
#include "mako.hpp"
namespace mako {
// determines how resultant future will be handled
enum class StepKind {
NONE, ///< not part of the table: OP_TRANSACTION, OP_COMMIT
IMM, ///< non-future ops that return immediately: e.g. set, clear_range
READ, ///< blockable reads: get(), get_range(), get_read_version, ...
COMMIT, ///< self-explanatory
ON_ERROR ///< future is a result of tx.on_error()
};
// Ops that doesn't have concrete steps to execute and are there for measurements only
force_inline bool isAbstractOp(int op) noexcept {
return op == OP_COMMIT || op == OP_TRANSACTION;
}
using StepFunction = fdb::Future (*)(fdb::Transaction& tx,
Arguments const&,
fdb::ByteString& /*key1*/,
fdb::ByteString& /*key2*/,
fdb::ByteString& /*value*/);
using PostStepFunction = void (*)(fdb::Future&,
fdb::Transaction& tx,
Arguments const&,
fdb::ByteString& /*key1*/,
fdb::ByteString& /*key2*/,
fdb::ByteString& /*value*/);
struct Step {
StepKind kind;
StepFunction step_func_;
PostStepFunction post_step_func_{ nullptr };
};
struct Operation {
std::string_view name_;
Step steps_[2];
int num_steps_;
bool needs_commit_;
std::string_view name() const noexcept { return name_; }
StepKind stepKind(int step) const noexcept {
assert(step < steps());
return steps_[step].kind;
}
StepFunction stepFunction(int step) const noexcept { return steps_[step].step_func_; }
PostStepFunction postStepFunction(int step) const noexcept { return steps_[step].post_step_func_; }
// how many steps in this op?
int steps() const noexcept { return num_steps_; }
// does the op needs to commit some time after its final step?
bool needsCommit() const noexcept { return needs_commit_; }
};
extern const std::array<Operation, MAX_OP> opTable;
force_inline char const* getOpName(int ops_code) {
if (ops_code >= 0 && ops_code < MAX_OP)
return opTable[ops_code].name().data();
return "";
}
struct OpIterator {
int op, count, step;
bool operator==(const OpIterator& other) const noexcept {
return op == other.op && count == other.count && step == other.step;
}
bool operator!=(const OpIterator& other) const noexcept { return !(*this == other); }
StepKind stepKind() const noexcept { return opTable[op].stepKind(step); }
char const* opName() const noexcept { return getOpName(op); }
};
constexpr const OpIterator OpEnd = OpIterator{ MAX_OP, -1, -1 };
force_inline OpIterator getOpBegin(Arguments const& args) noexcept {
for (auto op = 0; op < MAX_OP; op++) {
if (isAbstractOp(op) || args.txnspec.ops[op][OP_COUNT] == 0)
continue;
return OpIterator{ op, 0, 0 };
}
return OpEnd;
}
force_inline OpIterator getOpNext(Arguments const& args, OpIterator current) noexcept {
auto& [op, count, step] = current;
assert(op < MAX_OP && !isAbstractOp(op));
if (opTable[op].steps() > step + 1)
return OpIterator{ op, count, step + 1 };
count++;
for (; op < MAX_OP; op++, count = 0) {
if (isAbstractOp(op) || args.txnspec.ops[op][OP_COUNT] <= count)
continue;
return OpIterator{ op, count, 0 };
}
return OpEnd;
}
} // namespace mako
#endif /* MAKO_OPERATIONS_HPP */

View File

@ -0,0 +1,26 @@
/*
* process.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_PROCESS_HPP
#define MAKO_PROCESS_HPP
enum class ProcKind { MAIN, WORKER, STATS };
#endif /*MAKO_PROCESS_HPP*/

View File

@ -0,0 +1,108 @@
/*
* shm.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_SHM_HPP
#define MAKO_SHM_HPP
#include <atomic>
#include <cassert>
#include <cstdint>
#include "stats.hpp"
/* shared memory */
constexpr const int SIGNAL_RED = 0;
constexpr const int SIGNAL_GREEN = 1;
constexpr const int SIGNAL_OFF = 2;
// controlled, safer access to shared memory
namespace mako::shared_memory {
struct Header {
std::atomic<int> signal = ATOMIC_VAR_INIT(SIGNAL_OFF);
std::atomic<int> readycount = ATOMIC_VAR_INIT(0);
std::atomic<double> throttle_factor = ATOMIC_VAR_INIT(1.0);
std::atomic<int> stopcount = ATOMIC_VAR_INIT(0);
};
struct LayoutHelper {
Header hdr;
ThreadStatistics stats;
};
inline size_t storageSize(int num_processes, int num_threads) noexcept {
assert(num_processes >= 1 && num_threads >= 1);
return sizeof(LayoutHelper) + sizeof(ThreadStatistics) * ((num_processes * num_threads) - 1);
}
class Access {
void* base;
int num_processes;
int num_threads;
static inline ThreadStatistics& statsSlot(void* shm_base,
int num_threads,
int process_idx,
int thread_idx) noexcept {
return (&static_cast<LayoutHelper*>(shm_base)->stats)[process_idx * num_threads + thread_idx];
}
public:
Access(void* shm, int num_processes, int num_threads) noexcept
: base(shm), num_processes(num_processes), num_threads(num_threads) {}
Access() noexcept : Access(nullptr, 0, 0) {}
Access(const Access&) noexcept = default;
Access& operator=(const Access&) noexcept = default;
size_t size() const noexcept { return storageSize(num_processes, num_threads); }
void initMemory() noexcept {
new (&header()) Header{};
for (auto i = 0; i < num_processes; i++)
for (auto j = 0; j < num_threads; j++)
new (&statsSlot(i, j)) ThreadStatistics();
}
Header const& headerConst() const noexcept { return *static_cast<Header const*>(base); }
Header& header() const noexcept { return *static_cast<Header*>(base); }
ThreadStatistics const* statsConstArray() const noexcept {
return &statsSlot(base, num_threads, 0 /*process_id*/, 0 /*thread_id*/);
}
ThreadStatistics* statsArray() const noexcept {
return &statsSlot(base, num_threads, 0 /*process_id*/, 0 /*thread_id*/);
}
ThreadStatistics const& statsConstSlot(int process_idx, int thread_idx) const noexcept {
return statsSlot(base, num_threads, process_idx, thread_idx);
}
ThreadStatistics& statsSlot(int process_idx, int thread_idx) const noexcept {
return statsSlot(base, num_threads, process_idx, thread_idx);
}
};
} // namespace mako::shared_memory
#endif /* MAKO_SHM_HPP */

View File

@ -0,0 +1,177 @@
/*
* stats.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_STATS_HPP
#define MAKO_STATS_HPP
#include <array>
#include <cstdint>
#include <cstring>
#include <list>
#include <new>
#include <utility>
#include "operations.hpp"
#include "time.hpp"
namespace mako {
/* rough cap on the number of samples to avoid OOM hindering benchmark */
constexpr const size_t SAMPLE_CAP = 2000000;
/* size of each block to get detailed latency for each operation */
constexpr const size_t LAT_BLOCK_SIZE = 4093;
/* hard cap on the number of sample blocks = 488 */
constexpr const size_t MAX_LAT_BLOCKS = SAMPLE_CAP / LAT_BLOCK_SIZE;
/* memory block allocated to each operation when collecting detailed latency */
class LatencySampleBlock {
uint64_t samples[LAT_BLOCK_SIZE]{
0,
};
uint64_t index{ 0 };
public:
LatencySampleBlock() noexcept = default;
bool full() const noexcept { return index >= LAT_BLOCK_SIZE; }
void put(timediff_t td) {
assert(!full());
samples[index++] = toIntegerMicroseconds(td);
}
// return {data block, number of samples}
std::pair<uint64_t const*, size_t> data() const noexcept { return { samples, index }; }
};
/* collect sampled latencies until OOM is hit */
class LatencySampleBin {
std::list<LatencySampleBlock> blocks;
bool noMoreAlloc{ false };
bool tryAlloc() {
try {
blocks.emplace_back();
} catch (const std::bad_alloc&) {
noMoreAlloc = true;
return false;
}
return true;
}
public:
void reserveOneBlock() {
if (blocks.empty())
tryAlloc();
}
void put(timediff_t td) {
if (blocks.empty() || blocks.back().full()) {
if (blocks.size() >= MAX_LAT_BLOCKS || noMoreAlloc || !tryAlloc())
return;
}
blocks.back().put(td);
}
// iterate & apply for each block user function void(uint64_t const*, size_t)
template <typename Func>
void forEachBlock(Func&& fn) const {
for (const auto& block : blocks) {
auto [ptr, cnt] = block.data();
fn(ptr, cnt);
}
}
};
class alignas(64) ThreadStatistics {
uint64_t conflicts;
uint64_t total_errors;
uint64_t ops[MAX_OP];
uint64_t errors[MAX_OP];
uint64_t latency_samples[MAX_OP];
uint64_t latency_us_total[MAX_OP];
uint64_t latency_us_min[MAX_OP];
uint64_t latency_us_max[MAX_OP];
public:
ThreadStatistics() noexcept {
memset(this, 0, sizeof(ThreadStatistics));
memset(latency_us_min, 0xff, sizeof(latency_us_min));
}
ThreadStatistics(const ThreadStatistics& other) noexcept = default;
ThreadStatistics& operator=(const ThreadStatistics& other) noexcept = default;
uint64_t getConflictCount() const noexcept { return conflicts; }
uint64_t getOpCount(int op) const noexcept { return ops[op]; }
uint64_t getErrorCount(int op) const noexcept { return errors[op]; }
uint64_t getTotalErrorCount() const noexcept { return total_errors; }
uint64_t getLatencySampleCount(int op) const noexcept { return latency_samples[op]; }
uint64_t getLatencyUsTotal(int op) const noexcept { return latency_us_total[op]; }
uint64_t getLatencyUsMin(int op) const noexcept { return latency_us_min[op]; }
uint64_t getLatencyUsMax(int op) const noexcept { return latency_us_max[op]; }
// with 'this' as final aggregation, factor in 'other'
void combine(const ThreadStatistics& other) {
conflicts += other.conflicts;
for (auto op = 0; op < MAX_OP; op++) {
ops[op] += other.ops[op];
errors[op] += other.errors[op];
total_errors += other.errors[op];
latency_samples[op] += other.latency_samples[op];
latency_us_total[op] += other.latency_us_total[op];
if (latency_us_min[op] > other.latency_us_min[op])
latency_us_min[op] = other.latency_us_min[op];
if (latency_us_max[op] < other.latency_us_max[op])
latency_us_max[op] = other.latency_us_max[op];
}
}
void incrConflictCount() noexcept { conflicts++; }
// non-commit write operations aren't measured for time.
void incrOpCount(int op) noexcept { ops[op]++; }
void incrErrorCount(int op) noexcept {
total_errors++;
errors[op]++;
}
void addLatency(int op, timediff_t diff) noexcept {
const auto latency_us = toIntegerMicroseconds(diff);
latency_samples[op]++;
latency_us_total[op] += latency_us;
if (latency_us_min[op] > latency_us)
latency_us_min[op] = latency_us;
if (latency_us_max[op] < latency_us)
latency_us_max[op] = latency_us;
}
};
using LatencySampleBinArray = std::array<LatencySampleBin, MAX_OP>;
} // namespace mako
#endif /* MAKO_STATS_HPP */

View File

@ -0,0 +1,77 @@
/*
* time.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MAKO_TIME_HPP
#define MAKO_TIME_HPP
#include <chrono>
namespace mako {
/* time measurement helpers */
using std::chrono::steady_clock;
using timepoint_t = decltype(steady_clock::now());
using timediff_t = decltype(std::declval<timepoint_t>() - std::declval<timepoint_t>());
template <typename Duration>
double toDoubleSeconds(Duration duration) {
return std::chrono::duration_cast<std::chrono::duration<double>>(duration).count();
}
template <typename Duration>
uint64_t toIntegerSeconds(Duration duration) {
return std::chrono::duration_cast<std::chrono::duration<uint64_t>>(duration).count();
}
template <typename Duration>
uint64_t toIntegerMicroseconds(Duration duration) {
return std::chrono::duration_cast<std::chrono::duration<uint64_t, std::micro>>(duration).count();
}
// timing helpers
struct StartAtCtor {};
class Stopwatch {
timepoint_t p1, p2;
public:
Stopwatch() noexcept : p1(), p2() {}
Stopwatch(StartAtCtor) noexcept { start(); }
Stopwatch(timepoint_t start_time) noexcept : p1(start_time), p2() {}
Stopwatch(const Stopwatch&) noexcept = default;
Stopwatch& operator=(const Stopwatch&) noexcept = default;
timepoint_t getStart() const noexcept { return p1; }
timepoint_t getStop() const noexcept { return p2; }
void start() noexcept { p1 = steady_clock::now(); }
Stopwatch& stop() noexcept {
p2 = steady_clock::now();
return *this;
}
Stopwatch& setStop(timepoint_t p_stop) noexcept {
p2 = p_stop;
return *this;
}
void startFromStop() noexcept { p1 = p2; }
auto diff() const noexcept { return p2 - p1; }
};
} // namespace mako
#endif /* MAKO_TIME_HPP */

View File

@ -1,136 +0,0 @@
#include "utils.h"
#include "mako.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* uniform-distribution random */
int urand(int low, int high) {
double r = rand() / (1.0 + RAND_MAX);
int range = high - low + 1;
return (int)((r * range) + low);
}
/* random string */
/* len is the buffer size, must include null */
void randstr(char* str, int len) {
int i;
for (i = 0; i < len - 1; i++) {
str[i] = '!' + urand(0, 'z' - '!'); /* generage a char from '!' to 'z' */
}
str[len - 1] = '\0';
}
/* random numeric string */
/* len is the buffer size, must include null */
void randnumstr(char* str, int len) {
int i;
for (i = 0; i < len - 1; i++) {
str[i] = '0' + urand(0, 9); /* generage a char from '!' to 'z' */
}
str[len - 1] = '\0';
}
/* return the first key to be inserted */
int insert_begin(int rows, int p_idx, int t_idx, int total_p, int total_t) {
double interval = (double)rows / total_p / total_t;
return (int)(round(interval * ((p_idx * total_t) + t_idx)));
}
/* return the last key to be inserted */
int insert_end(int rows, int p_idx, int t_idx, int total_p, int total_t) {
double interval = (double)rows / total_p / total_t;
return (int)(round(interval * ((p_idx * total_t) + t_idx + 1) - 1));
}
/* devide val equally among threads */
int compute_thread_portion(int val, int p_idx, int t_idx, int total_p, int total_t) {
int interval = val / total_p / total_t;
int remaining = val - (interval * total_p * total_t);
if ((p_idx * total_t + t_idx) < remaining) {
return interval + 1;
} else if (interval == 0) {
return -1;
}
/* else */
return interval;
}
/* number of digits */
int digits(int num) {
int digits = 0;
while (num > 0) {
num /= 10;
digits++;
}
return digits;
}
/* generate a key for a given key number */
/* prefix is "mako" by default, prefixpadding = 1 means 'x' will be in front rather than trailing the keyname */
/* len is the buffer size, key length + null */
void genkey(char* str, char* prefix, int prefixlen, int prefixpadding, int num, int rows, int len) {
const int rowdigit = digits(rows);
const int prefixoffset = prefixpadding ? len - (prefixlen + rowdigit) - 1 : 0;
char* prefixstr = (char*)alloca(sizeof(char) * (prefixlen + rowdigit + 1));
snprintf(prefixstr, prefixlen + rowdigit + 1, "%s%0.*d", prefix, rowdigit, num);
memset(str, 'x', len);
memcpy(str + prefixoffset, prefixstr, prefixlen + rowdigit);
str[len - 1] = '\0';
}
/* This is another sorting algorithm used to calculate latency parameters */
/* We moved from radix sort to quick sort to avoid extra space used in radix sort */
#if 0
uint64_t get_max(uint64_t arr[], int n) {
uint64_t mx = arr[0];
for (int i = 1; i < n; i++) {
if (arr[i] > mx) {
mx = arr[i];
}
}
return mx;
}
void bucket_data(uint64_t arr[], int n, uint64_t exp) {
// uint64_t output[n];
int i, count[10] = { 0 };
uint64_t* output = (uint64_t*)malloc(sizeof(uint64_t) * n);
for (i = 0; i < n; i++) {
count[(arr[i] / exp) % 10]++;
}
for (i = 1; i < 10; i++) {
count[i] += count[i - 1];
}
for (i = n - 1; i >= 0; i--) {
output[count[(arr[i] / exp) % 10] - 1] = arr[i];
count[(arr[i] / exp) % 10]--;
}
for (i = 0; i < n; i++) {
arr[i] = output[i];
}
free(output);
}
// The main function is to sort arr[] of size n using Radix Sort
void radix_sort(uint64_t* arr, int n) {
// Find the maximum number to know number of digits
uint64_t m = get_max(arr, n);
for (uint64_t exp = 1; m / exp > 0; exp *= 10) bucket_data(arr, n, exp);
}
#endif
int compare(const void* a, const void* b) {
const uint64_t* da = (const uint64_t*)a;
const uint64_t* db = (const uint64_t*)b;
return (*da > *db) - (*da < *db);
}
// The main function is to sort arr[] of size n using Quick Sort
void quick_sort(uint64_t* arr, int n) {
qsort(arr, n, sizeof(uint64_t), compare);
}

View File

@ -0,0 +1,54 @@
/*
* utils.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "utils.hpp"
#include "mako.hpp"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fmt/format.h>
namespace mako {
/* return the last key to be inserted */
/* devide val equally among threads */
int computeThreadPortion(int val, int p_idx, int t_idx, int total_p, int total_t) {
int interval = val / total_p / total_t;
int remaining = val - (interval * total_p * total_t);
if ((p_idx * total_t + t_idx) < remaining) {
return interval + 1;
} else if (interval == 0) {
return -1;
}
/* else */
return interval;
}
/* number of digits */
int digits(int num) {
int digits = 0;
while (num > 0) {
num /= 10;
digits++;
}
return digits;
}
} // namespace mako

View File

@ -1,65 +0,0 @@
#ifndef UTILS_H
#define UTILS_H
#pragma once
#include <stdint.h>
/* uniform-distribution random */
/* return a uniform random number between low and high, both inclusive */
int urand(int low, int high);
/* write a random string of the length of (len-1) to memory pointed by str
* with a null-termination character at str[len-1].
*/
void randstr(char* str, int len);
/* write a random numeric string of the length of (len-1) to memory pointed by str
* with a null-termination character at str[len-1].
*/
void randnumstr(char* str, int len);
/* given the total number of rows to be inserted,
* the worker process index p_idx and the thread index t_idx (both 0-based),
* and the total number of processes, total_p, and threads, total_t,
* returns the first row number assigned to this partition.
*/
int insert_begin(int rows, int p_idx, int t_idx, int total_p, int total_t);
/* similar to insert_begin, insert_end returns the last row numer */
int insert_end(int rows, int p_idx, int t_idx, int total_p, int total_t);
/* devide a value equally among threads */
int compute_thread_portion(int val, int p_idx, int t_idx, int total_p, int total_t);
/* similar to insert_begin/end, compute_thread_tps computes
* the per-thread target TPS for given configuration.
*/
#define compute_thread_tps(val, p_idx, t_idx, total_p, total_t) \
compute_thread_portion(val, p_idx, t_idx, total_p, total_t)
/* similar to compute_thread_tps,
* compute_thread_iters computs the number of iterations.
*/
#define compute_thread_iters(val, p_idx, t_idx, total_p, total_t) \
compute_thread_portion(val, p_idx, t_idx, total_p, total_t)
/* get the number of digits */
int digits(int num);
/* generate a key for a given key number */
/* prefix is "mako" by default, prefixpadding = 1 means 'x' will be in front rather than trailing the keyname */
/* len is the buffer size, key length + null */
void genkey(char* str, char* prefix, int prefixlen, int prefixpadding, int num, int rows, int len);
#if 0
// The main function is to sort arr[] of size n using Radix Sort
void radix_sort(uint64_t arr[], int n);
void bucket_data(uint64_t arr[], int n, uint64_t exp);
uint64_t get_max(uint64_t arr[], int n);
#endif
// The main function is to sort arr[] of size n using Quick Sort
void quick_sort(uint64_t arr[], int n);
int compare(const void* a, const void* b);
#endif /* UTILS_H */

View File

@ -0,0 +1,195 @@
/*
* utils.hpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef UTILS_HPP
#define UTILS_HPP
#pragma once
#include "macro.hpp"
#include "mako.hpp"
#include "fdbclient/zipf.h"
#include <cassert>
#include <chrono>
#include <cstdint>
#include <string_view>
#include <type_traits>
#include <fmt/format.h>
namespace mako {
/* uniform-distribution random */
/* return a uniform random number between low and high, both inclusive */
force_inline int urand(int low, int high) {
double r = rand() / (1.0 + RAND_MAX);
int range = high - low + 1;
return (int)((r * range) + low);
}
force_inline int nextKey(Arguments const& args) {
if (args.zipf)
return zipfian_next();
return urand(0, args.rows - 1);
}
force_inline int intSize(std::string_view sv) {
return static_cast<int>(sv.size());
}
/* random string */
template <typename Char>
force_inline void randomString(Char* str, int len) {
assert(len >= 0);
for (auto i = 0; i < len; i++) {
str[i] = ('!' + urand(0, 'z' - '!')); /* generate a char from '!' to 'z' */
}
}
/* given the total number of rows to be inserted,
* the worker process index p_idx and the thread index t_idx (both 0-based),
* and the total number of processes, total_p, and threads, total_t,
* returns the first row number assigned to this partition.
*/
force_inline int insertBegin(int rows, int p_idx, int t_idx, int total_p, int total_t) {
double interval = (double)rows / total_p / total_t;
return (int)(round(interval * ((p_idx * total_t) + t_idx)));
}
/* similar to insertBegin, insertEnd returns the last row numer */
force_inline int insertEnd(int rows, int p_idx, int t_idx, int total_p, int total_t) {
double interval = (double)rows / total_p / total_t;
return (int)(round(interval * ((p_idx * total_t) + t_idx + 1) - 1));
}
/* devide a value equally among threads */
int computeThreadPortion(int val, int p_idx, int t_idx, int total_p, int total_t);
/* similar to insertBegin/end, computeThreadTps computes
* the per-thread target TPS for given configuration.
*/
#define computeThreadTps(val, p_idx, t_idx, total_p, total_t) computeThreadPortion(val, p_idx, t_idx, total_p, total_t)
/* similar to computeThreadTps,
* computeThreadIters computs the number of iterations.
*/
#define computeThreadIters(val, p_idx, t_idx, total_p, total_t) \
computeThreadPortion(val, p_idx, t_idx, total_p, total_t)
/* get the number of digits */
int digits(int num);
/* fill memory slice [str, str + len) as stringified, zero-padded num */
template <typename Char>
force_inline void numericWithFill(Char* str, int len, int num) {
static_assert(sizeof(Char) == 1);
assert(num >= 0);
memset(str, '0', len);
for (auto i = len - 1; num > 0 && i >= 0; i--, num /= 10) {
str[i] = (num % 10) + '0';
}
}
/* generate a key for a given key number */
/* prefix is "mako" by default, prefixpadding = 1 means 'x' will be in front rather than trailing the keyname */
template <typename Char>
void genKey(Char* str, std::string_view prefix, Arguments const& args, int num) {
static_assert(sizeof(Char) == 1);
memset(str, 'x', args.key_length);
const auto prefix_len = static_cast<int>(prefix.size());
auto pos = args.prefixpadding ? (args.key_length - prefix_len - args.row_digits) : 0;
memcpy(&str[pos], prefix.data(), prefix_len);
pos += prefix_len;
numericWithFill(&str[pos], args.row_digits, num);
}
template <typename Char>
force_inline void prepareKeys(int op,
std::basic_string<Char>& key1,
std::basic_string<Char>& key2,
Arguments const& args) {
const auto key1_num = nextKey(args);
genKey(key1.data(), KEY_PREFIX, args, key1_num);
if (args.txnspec.ops[op][OP_RANGE] > 0) {
const auto key2_num = std::min(key1_num + args.txnspec.ops[op][OP_RANGE] - 1, args.rows - 1);
genKey(key2.data(), KEY_PREFIX, args, key2_num);
}
}
// invoke user-provided callable when object goes out of scope.
template <typename Func>
class ExitGuard {
std::decay_t<Func> fn;
public:
ExitGuard(Func&& fn) : fn(std::forward<Func>(fn)) {}
~ExitGuard() { fn(); }
};
// invoke user-provided callable when stack unwinds by exception.
template <typename Func>
class FailGuard {
std::decay_t<Func> fn;
public:
FailGuard(Func&& fn) : fn(std::forward<Func>(fn)) {}
~FailGuard() {
if (std::uncaught_exceptions()) {
fn();
}
}
};
// trace helpers
constexpr const int STATS_TITLE_WIDTH = 12;
constexpr const int STATS_FIELD_WIDTH = 12;
template <typename Value>
void putTitle(Value&& value) {
fmt::print("{0: <{1}} ", std::forward<Value>(value), STATS_TITLE_WIDTH);
}
template <typename Value>
void putTitleRight(Value&& value) {
fmt::print("{0: >{1}} ", std::forward<Value>(value), STATS_TITLE_WIDTH);
}
inline void putTitleBar() {
fmt::print("{0:=<{1}} ", "", STATS_TITLE_WIDTH);
}
template <typename Value>
void putField(Value&& value) {
fmt::print("{0: >{1}} ", std::forward<Value>(value), STATS_FIELD_WIDTH);
}
inline void putFieldBar() {
fmt::print("{0:=>{1}} ", "", STATS_FIELD_WIDTH);
}
template <typename Value>
void putFieldFloat(Value&& value, int precision) {
fmt::print("{0: >{1}.{2}f} ", std::forward<Value>(value), STATS_FIELD_WIDTH, precision);
}
} // namespace mako
#endif /* UTILS_HPP */

View File

@ -21,22 +21,39 @@ endif()
include(CheckSymbolExists) include(CheckSymbolExists)
set(DISABLE_TLS OFF CACHE BOOL "Don't try to find OpenSSL and always build without TLS support") set(DISABLE_TLS OFF CACHE BOOL "Don't try to find OpenSSL and always build without TLS support")
set(USE_WOLFSSL OFF CACHE BOOL "Build against WolfSSL instead of OpenSSL")
set(USE_OPENSSL ON CACHE BOOL "Build against OpenSSL")
if(DISABLE_TLS) if(DISABLE_TLS)
set(WITH_TLS OFF) set(WITH_TLS OFF)
else() else()
set(OPENSSL_USE_STATIC_LIBS TRUE) if(USE_WOLFSSL)
if(WIN32) set(WOLFSSL_USE_STATIC_LIBS TRUE)
set(OPENSSL_MSVC_STATIC_RT ON) find_package(WolfSSL)
endif() if(WOLFSSL_FOUND)
find_package(OpenSSL) set(CMAKE_REQUIRED_INCLUDES ${WOLFSSL_INCLUDE_DIR})
if(OPENSSL_FOUND) set(WITH_TLS ON)
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR}) add_compile_options(-DHAVE_OPENSSL)
set(WITH_TLS ON) add_compile_options(-DHAVE_WOLFSSL)
add_compile_options(-DHAVE_OPENSSL) else()
else() message(STATUS "WolfSSL was not found - Will compile without TLS Support")
message(STATUS "OpenSSL was not found - Will compile without TLS Support") message(STATUS "You can set WOLFSSL_ROOT_DIR to help cmake find it")
message(STATUS "You can set OPENSSL_ROOT_DIR to help cmake find it") set(WITH_TLS OFF)
set(WITH_TLS OFF) endif()
elseif(USE_OPENSSL)
set(OPENSSL_USE_STATIC_LIBS TRUE)
if(WIN32)
set(OPENSSL_MSVC_STATIC_RT ON)
endif()
find_package(OpenSSL)
if(OPENSSL_FOUND)
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
set(WITH_TLS ON)
add_compile_options(-DHAVE_OPENSSL)
else()
message(STATUS "OpenSSL was not found - Will compile without TLS Support")
message(STATUS "You can set OPENSSL_ROOT_DIR to help cmake find it")
set(WITH_TLS OFF)
endif()
endif() endif()
endif() endif()

View File

@ -198,7 +198,7 @@ function(fdb_configure_and_install)
string(TOLOWER "${pkg}" package) string(TOLOWER "${pkg}" package)
string(TOUPPER "${IN_DESTINATION}" destination) string(TOUPPER "${IN_DESTINATION}" destination)
get_install_dest(${pkg} INCLUDE INCLUDE_DIR) get_install_dest(${pkg} INCLUDE INCLUDE_DIR)
get_install_dest(${pkg} INCLUDE LIB_DIR) get_install_dest(${pkg} LIB LIB_DIR)
get_install_dest(${pkg} ${destination} install_path) get_install_dest(${pkg} ${destination} install_path)
string(REGEX REPLACE "\.in$" "" name "${IN_FILE}") string(REGEX REPLACE "\.in$" "" name "${IN_FILE}")
get_filename_component(name "${name}" NAME) get_filename_component(name "${name}" NAME)

63
cmake/FindWolfSSL.cmake Normal file
View File

@ -0,0 +1,63 @@
# FindWolfSSL
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
if(WOLFSSL_USE_STATIC_LIBS)
if(WIN32)
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
else()
set(CMAKE_FIND_LIBRARY_SUFFIXES .a)
endif()
endif()
find_path(WOLFSSL_ROOT_DIR
NAMES
include/wolfssl/options.h
)
find_path(WOLFSSL_INCLUDE_DIR
NAMES
wolfssl/ssl.h
PATHS
${WOLFSSL_ROOT_DIR}/include
)
find_library(WOLFSSL_LIBRARY
NAMES
wolfssl
PATHS
${WOLFSSL_ROOT_DIR}/lib
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(WolfSSL
REQUIRED_VARS
WOLFSSL_LIBRARY
WOLFSSL_INCLUDE_DIR
FAIL_MESSAGE
"Could NOT find WolfSSL"
)
mark_as_advanced(
WOLFSSL_ROOT_DIR
WOLFSSL_LIBRARY
WOLFSSL_INCLUDE_DIR
)
if(WOLFSSL_FOUND)
message(STATUS "Found wolfssl library: ${WOLFSSL_LIBRARY}")
message(STATUS "Found wolfssl includes: ${WOLFSSL_INCLUDE_DIR}")
set(WOLFSSL_INCLUDE_DIRS ${WOLFSSL_INCLUDE_DIR})
set(WOLFSSL_LIBRARIES ${WOLFSSL_LIBRARY})
add_library(WolfSSL UNKNOWN IMPORTED GLOBAL)
add_library(OpenSSL::SSL ALIAS WolfSSL)
add_library(OpenSSL::CRYPTO ALIAS WolfSSL)
target_include_directories(WolfSSL INTERFACE "${WOLFSSL_INCLUDE_DIR}")
target_link_libraries(WolfSSL INTERFACE "${WOLFSSL_TLS_LIBRARY}" "${WOLFSSL_SSL_LIBRARY}" "${WOLFSSL_CRYPTO_LIBRARY}")
set_target_properties(WolfSSL PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${WOLFSSL_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${WOLFSSL_LIBRARY}")
endif()

View File

@ -48,29 +48,25 @@
--- ---
# name: test_execstack_permissions_libfdb_c[centos-versioned] # name: test_execstack_permissions_libfdb_c[centos-versioned]
' '
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000 - /lib64/libfdb_c.so
0x0000000000000000 0x0000000000000000 RW 0x0
' '
--- ---
# name: test_execstack_permissions_libfdb_c[centos] # name: test_execstack_permissions_libfdb_c[centos]
' '
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000 - /lib64/libfdb_c.so
0x0000000000000000 0x0000000000000000 RW 0x0
' '
--- ---
# name: test_execstack_permissions_libfdb_c[ubuntu-versioned] # name: test_execstack_permissions_libfdb_c[ubuntu-versioned]
' '
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000 - /lib/libfdb_c.so
0x0000000000000000 0x0000000000000000 RW 0x0
' '
--- ---
# name: test_execstack_permissions_libfdb_c[ubuntu] # name: test_execstack_permissions_libfdb_c[ubuntu]
' '
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000 - /lib/libfdb_c.so
0x0000000000000000 0x0000000000000000 RW 0x0
' '
--- ---

View File

@ -22,6 +22,7 @@ import pathlib
import pytest import pytest
import shlex import shlex
import subprocess import subprocess
import sys
import uuid import uuid
from typing import Iterator, List, Optional, Union from typing import Iterator, List, Optional, Union
@ -29,9 +30,14 @@ from typing import Iterator, List, Optional, Union
def run(args: List[str]) -> str: def run(args: List[str]) -> str:
print("$ {}".format(" ".join(map(shlex.quote, args)))) print("$ {}".format(" ".join(map(shlex.quote, args))))
result = subprocess.check_output(args).decode("utf-8") result = []
print(result, end="") proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
return result while proc.poll() is None:
text = proc.stdout.readline().decode("utf-8")
result.append(text)
sys.stdout.write(text)
assert proc.returncode == 0
return "".join(result)
class Image: class Image:
@ -106,7 +112,16 @@ def ubuntu_image_with_fdb_helper(versioned: bool) -> Iterator[Optional[Image]]:
for deb in debs: for deb in debs:
container.copy_to(deb, "/opt") container.copy_to(deb, "/opt")
container.run(["bash", "-c", "apt-get update"]) container.run(["bash", "-c", "apt-get update"])
container.run(["bash", "-c", "apt-get install --yes binutils"]) # this is for testing libfdb_c execstack permissions container.run(
["bash", "-c", "apt-get install --yes execstack"]
) # this is for testing libfdb_c execstack permissions
container.run(
[
"bash",
"-c",
"DEBIAN_FRONTEND=noninteractive DEBCONF_NONINTERACTIVE_SEEN=true apt-get install --yes gcc pkg-config cmake",
]
) # this is for testing building client apps
container.run(["bash", "-c", "dpkg -i /opt/*.deb"]) container.run(["bash", "-c", "dpkg -i /opt/*.deb"])
container.run(["bash", "-c", "rm /opt/*.deb"]) container.run(["bash", "-c", "rm /opt/*.deb"])
image = container.commit() image = container.commit()
@ -151,7 +166,12 @@ def centos_image_with_fdb_helper(versioned: bool) -> Iterator[Optional[Image]]:
for rpm in rpms: for rpm in rpms:
container.copy_to(rpm, "/opt") container.copy_to(rpm, "/opt")
container.run(["bash", "-c", "yum update -y"]) container.run(["bash", "-c", "yum update -y"])
container.run(["bash", "-c", "yum install -y binutils"]) # this is for testing libfdb_c execstack permissions container.run(
["bash", "-c", "yum install -y prelink"]
) # this is for testing libfdb_c execstack permissions
container.run(
["bash", "-c", "yum install -y gcc pkg-config cmake make"]
) # this is for testing building client apps
container.run(["bash", "-c", "yum install -y /opt/*.rpm"]) container.run(["bash", "-c", "yum install -y /opt/*.rpm"])
container.run(["bash", "-c", "rm /opt/*.rpm"]) container.run(["bash", "-c", "rm /opt/*.rpm"])
image = container.commit() image = container.commit()
@ -232,6 +252,70 @@ def test_db_available(linux_container: Container):
linux_container.run(["fdbcli", "--exec", "get x"]) linux_container.run(["fdbcli", "--exec", "get x"])
def test_client_app(linux_container: Container):
test_client_app_script = r"""#!/bin/bash
set -euxo pipefail
cat > app.c << EOF
// FDB_API_VERSION doesn't necessarily need to be kept up to date here
#define FDB_API_VERSION 700
#include <foundationdb/fdb_c.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
static void check(fdb_error_t e) {
if (e) {
fprintf(stderr, "%s\n", fdb_get_error(e));
fflush(NULL);
abort();
}
}
int result = 0;
static void callback(FDBFuture* f, void* _ignored) {
check(fdb_stop_network());
}
int main() {
check(fdb_select_api_version(700));
check(fdb_setup_network());
FDBDatabase* db;
check(fdb_create_database(NULL, &db));
FDBTransaction* tr;
check(fdb_database_create_transaction(db, &tr));
FDBFuture* f = fdb_transaction_get_read_version(tr);
check(fdb_future_set_callback(f, callback, NULL));
check(fdb_run_network());
fdb_future_destroy(f);
fdb_transaction_destroy(tr);
fdb_database_destroy(db);
return 0;
}
EOF
cc app.c `pkg-config foundationdb-client --cflags --libs`
./a.out
cat > CMakeLists.txt << EOF
project(app C)
find_package(FoundationDB-Client REQUIRED)
add_executable(app app.c)
target_link_libraries(app PRIVATE fdb_c)
EOF
mkdir build
cd build
cmake ..
make
./app
"""
linux_container.run(["bash", "-c", test_client_app_script])
def test_write(linux_container: Container, snapshot): def test_write(linux_container: Container, snapshot):
linux_container.run(["fdbcli", "--exec", "writemode on; set x y"]) linux_container.run(["fdbcli", "--exec", "writemode on; set x y"])
assert snapshot == linux_container.run(["fdbcli", "--exec", "get x"]) assert snapshot == linux_container.run(["fdbcli", "--exec", "get x"])
@ -243,7 +327,7 @@ def test_execstack_permissions_libfdb_c(linux_container: Container, snapshot):
[ [
"bash", "bash",
"-c", "-c",
"readelf -l $(ldconfig -p | grep libfdb_c | awk '{print $(NF)}') | grep -A1 GNU_STACK", "execstack -q $(ldconfig -p | grep libfdb_c | awk '{print $(NF)}')",
] ]
) )

View File

@ -148,7 +148,7 @@ is equivalent to something like:
tr.set(Tuple.from("class", "class1").pack(), encodeInt(100)); tr.set(Tuple.from("class", "class1").pack(), encodeInt(100));
t.commit().join(); t.commit().join();
} catch (RuntimeException e) { } catch (RuntimeException e) {
t = t.onError(e).get(); t = t.onError(e).join();
} }
} }
@ -290,10 +290,10 @@ This is easy -- we simply add a condition to check that the value is non-zero. L
private static void signup(TransactionContext db, final String s, final String c) { private static void signup(TransactionContext db, final String s, final String c) {
db.run((Transaction tr) -> { db.run((Transaction tr) -> {
byte[] rec = Tuple.from("attends", s, c).pack(); byte[] rec = Tuple.from("attends", s, c).pack();
if (tr.get(rec).get() != null) if (tr.get(rec).join() != null)
return null; // already signed up return null; // already signed up
int seatsLeft = decodeInt(tr.get(Tuple.from("class", c).pack()).get()); int seatsLeft = decodeInt(tr.get(Tuple.from("class", c).pack()).join());
if (seatsLeft == 0) if (seatsLeft == 0)
throw new IllegalStateException("No remaining seats"); throw new IllegalStateException("No remaining seats");

View File

@ -1189,7 +1189,6 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
ClusterConnectionFile::lookupClusterFileName(opt.clusterFile); ClusterConnectionFile::lookupClusterFileName(opt.clusterFile);
try { try {
ccf = makeReference<ClusterConnectionFile>(resolvedClusterFile.first); ccf = makeReference<ClusterConnectionFile>(resolvedClusterFile.first);
wait(ccf->resolveHostnames());
} catch (Error& e) { } catch (Error& e) {
if (e.code() == error_code_operation_cancelled) { if (e.code() == error_code_operation_cancelled) {
throw; throw;

View File

@ -28,28 +28,46 @@
#include "fdbclient/CoordinationInterface.h" #include "fdbclient/CoordinationInterface.h"
// Determine public IP address by calling the first coordinator. // Determine public IP address by calling the first available coordinator.
// If fail connecting all coordinators, throw bind_failed().
IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) { IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) {
try { int size = ccs.coordinators().size() + ccs.hostnames.size();
using namespace boost::asio; int index = 0;
loop {
try {
using namespace boost::asio;
io_service ioService; io_service ioService;
ip::udp::socket socket(ioService); ip::udp::socket socket(ioService);
ccs.resolveHostnamesBlocking(); NetworkAddress coordAddr;
const auto& coordAddr = ccs.coordinators()[0]; // Try coords first, because they don't need to be resolved.
const auto boostIp = coordAddr.ip.isV6() ? ip::address(ip::address_v6(coordAddr.ip.toV6())) if (index < ccs.coordinators().size()) {
: ip::address(ip::address_v4(coordAddr.ip.toV4())); coordAddr = ccs.coordinators()[index];
} else {
Hostname& h = ccs.hostnames[index - ccs.coordinators().size()];
Optional<NetworkAddress> resolvedAddr = h.resolveBlocking();
if (!resolvedAddr.present()) {
throw lookup_failed();
}
coordAddr = resolvedAddr.get();
}
const auto boostIp = coordAddr.ip.isV6() ? ip::address(ip::address_v6(coordAddr.ip.toV6()))
: ip::address(ip::address_v4(coordAddr.ip.toV4()));
ip::udp::endpoint endpoint(boostIp, coordAddr.port); ip::udp::endpoint endpoint(boostIp, coordAddr.port);
socket.connect(endpoint); socket.connect(endpoint);
IPAddress ip = coordAddr.ip.isV6() ? IPAddress(socket.local_endpoint().address().to_v6().to_bytes()) IPAddress ip = coordAddr.ip.isV6() ? IPAddress(socket.local_endpoint().address().to_v6().to_bytes())
: IPAddress(socket.local_endpoint().address().to_v4().to_ulong()); : IPAddress(socket.local_endpoint().address().to_v4().to_ulong());
socket.close(); socket.close();
return ip; return ip;
} catch (boost::system::system_error e) { } catch (...) {
fprintf(stderr, "Error determining public address: %s\n", e.what()); ++index;
throw bind_failed(); if (index == size) {
fprintf(stderr, "Error determining public address.\n");
throw bind_failed();
}
}
} }
} }

View File

@ -65,7 +65,6 @@ set(FDBCLIENT_SRCS
GlobalConfig.actor.cpp GlobalConfig.actor.cpp
GrvProxyInterface.h GrvProxyInterface.h
HighContentionPrefixAllocator.actor.h HighContentionPrefixAllocator.actor.h
HTTP.actor.cpp
IClientApi.h IClientApi.h
IConfigTransaction.cpp IConfigTransaction.cpp
IConfigTransaction.h IConfigTransaction.h

View File

@ -21,6 +21,7 @@
#include "fdbclient/Knobs.h" #include "fdbclient/Knobs.h"
#include "fdbclient/FDBTypes.h" #include "fdbclient/FDBTypes.h"
#include "fdbclient/SystemData.h" #include "fdbclient/SystemData.h"
#include "fdbclient/Tenant.h"
#include "flow/UnitTest.h" #include "flow/UnitTest.h"
#define init(...) KNOB_FN(__VA_ARGS__, INIT_ATOMIC_KNOB, INIT_KNOB)(__VA_ARGS__) #define init(...) KNOB_FN(__VA_ARGS__, INIT_ATOMIC_KNOB, INIT_KNOB)(__VA_ARGS__)
@ -82,6 +83,7 @@ void ClientKnobs::initialize(Randomize randomize) {
init( CHANGE_FEED_CACHE_SIZE, 100000 ); if( randomize && BUGGIFY ) CHANGE_FEED_CACHE_SIZE = 1; init( CHANGE_FEED_CACHE_SIZE, 100000 ); if( randomize && BUGGIFY ) CHANGE_FEED_CACHE_SIZE = 1;
init( CHANGE_FEED_POP_TIMEOUT, 5.0 ); init( CHANGE_FEED_POP_TIMEOUT, 5.0 );
init( CHANGE_FEED_STREAM_MIN_BYTES, 1e4 ); if( randomize && BUGGIFY ) CHANGE_FEED_STREAM_MIN_BYTES = 1; init( CHANGE_FEED_STREAM_MIN_BYTES, 1e4 ); if( randomize && BUGGIFY ) CHANGE_FEED_STREAM_MIN_BYTES = 1;
init( TENANT_PREFIX_SIZE_LIMIT, 28 ); ASSERT(TENANT_PREFIX_SIZE_LIMIT >= TenantMapEntry::ROOT_PREFIX_SIZE); // includes 8-byte ID and optional tenant subspace
init( MAX_BATCH_SIZE, 1000 ); if( randomize && BUGGIFY ) MAX_BATCH_SIZE = 1; init( MAX_BATCH_SIZE, 1000 ); if( randomize && BUGGIFY ) MAX_BATCH_SIZE = 1;
init( GRV_BATCH_TIMEOUT, 0.005 ); if( randomize && BUGGIFY ) GRV_BATCH_TIMEOUT = 0.1; init( GRV_BATCH_TIMEOUT, 0.005 ); if( randomize && BUGGIFY ) GRV_BATCH_TIMEOUT = 0.1;

View File

@ -81,6 +81,7 @@ public:
int64_t CHANGE_FEED_CACHE_SIZE; int64_t CHANGE_FEED_CACHE_SIZE;
double CHANGE_FEED_POP_TIMEOUT; double CHANGE_FEED_POP_TIMEOUT;
int64_t CHANGE_FEED_STREAM_MIN_BYTES; int64_t CHANGE_FEED_STREAM_MIN_BYTES;
int64_t TENANT_PREFIX_SIZE_LIMIT;
int MAX_BATCH_SIZE; int MAX_BATCH_SIZE;
double GRV_BATCH_TIMEOUT; double GRV_BATCH_TIMEOUT;

View File

@ -61,61 +61,31 @@ struct ClientLeaderRegInterface {
// - There is no address present more than once // - There is no address present more than once
class ClusterConnectionString { class ClusterConnectionString {
public: public:
enum ConnectionStringStatus { RESOLVED, RESOLVING, UNRESOLVED };
ClusterConnectionString() {} ClusterConnectionString() {}
ClusterConnectionString(const std::string& connStr); ClusterConnectionString(const std::string& connectionString);
ClusterConnectionString(const std::vector<NetworkAddress>& coordinators, Key key); ClusterConnectionString(const std::vector<NetworkAddress>& coordinators, Key key);
ClusterConnectionString(const std::vector<Hostname>& hosts, Key key); ClusterConnectionString(const std::vector<Hostname>& hosts, Key key);
ClusterConnectionString(const ClusterConnectionString& rhs) { operator=(rhs); }
ClusterConnectionString& operator=(const ClusterConnectionString& rhs) {
// Copy everything except AsyncTrigger resolveFinish.
status = rhs.status;
coords = rhs.coords;
hostnames = rhs.hostnames;
networkAddressToHostname = rhs.networkAddressToHostname;
key = rhs.key;
keyDesc = rhs.keyDesc;
connectionString = rhs.connectionString;
return *this;
}
std::vector<NetworkAddress> const& coordinators() const { return coords; } std::vector<NetworkAddress> const& coordinators() const { return coords; }
void addResolved(const Hostname& hostname, const NetworkAddress& address) {
coords.push_back(address);
networkAddressToHostname.emplace(address, hostname);
}
Key clusterKey() const { return key; } Key clusterKey() const { return key; }
Key clusterKeyName() const { Key clusterKeyName() const {
return keyDesc; return keyDesc;
} // Returns the "name" or "description" part of the clusterKey (the part before the ':') } // Returns the "name" or "description" part of the clusterKey (the part before the ':')
std::string toString() const; std::string toString() const;
static std::string getErrorString(std::string const& source, Error const& e); static std::string getErrorString(std::string const& source, Error const& e);
Future<Void> resolveHostnames();
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
// should be preferred.
void resolveHostnamesBlocking();
// This function derives the member connectionString from the current key, coordinators and hostnames.
void resetConnectionString();
void resetToUnresolved();
void parseKey(const std::string& key); void parseKey(const std::string& key);
ConnectionStringStatus status = RESOLVED;
AsyncTrigger resolveFinish;
// This function tries to resolve all hostnames once, and return them with coords. // This function tries to resolve all hostnames once, and return them with coords.
// Best effort, does not guarantee that the resolves succeed. // Best effort, does not guarantee that the resolves succeed.
Future<std::vector<NetworkAddress>> tryResolveHostnames(); Future<std::vector<NetworkAddress>> tryResolveHostnames();
std::vector<NetworkAddress> coords; std::vector<NetworkAddress> coords;
std::vector<Hostname> hostnames; std::vector<Hostname> hostnames;
std::unordered_map<NetworkAddress, Hostname> networkAddressToHostname;
private: private:
void parseConnString(); void parseConnString();
Key key, keyDesc; Key key, keyDesc;
std::string connectionString;
}; };
FDB_DECLARE_BOOLEAN_PARAM(ConnectionStringNeedsPersisted); FDB_DECLARE_BOOLEAN_PARAM(ConnectionStringNeedsPersisted);
@ -165,12 +135,6 @@ public:
// Signals to the connection record that it was successfully used to connect to a cluster. // Signals to the connection record that it was successfully used to connect to a cluster.
void notifyConnected(); void notifyConnected();
ClusterConnectionString::ConnectionStringStatus connectionStringStatus() const;
Future<Void> resolveHostnames();
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
// should be preferred.
void resolveHostnamesBlocking();
virtual void addref() = 0; virtual void addref() = 0;
virtual void delref() = 0; virtual void delref() = 0;
@ -275,12 +239,21 @@ struct OpenDatabaseCoordRequest {
Standalone<VectorRef<ClientVersionRef>> supportedVersions; Standalone<VectorRef<ClientVersionRef>> supportedVersions;
UID knownClientInfoID; UID knownClientInfoID;
Key clusterKey; Key clusterKey;
std::vector<Hostname> hostnames;
std::vector<NetworkAddress> coordinators; std::vector<NetworkAddress> coordinators;
ReplyPromise<CachedSerialization<struct ClientDBInfo>> reply; ReplyPromise<CachedSerialization<struct ClientDBInfo>> reply;
template <class Ar> template <class Ar>
void serialize(Ar& ar) { void serialize(Ar& ar) {
serializer(ar, issues, supportedVersions, traceLogGroup, knownClientInfoID, clusterKey, coordinators, reply); serializer(ar,
issues,
supportedVersions,
traceLogGroup,
knownClientInfoID,
clusterKey,
hostnames,
coordinators,
reply);
} }
}; };

View File

@ -20,6 +20,7 @@
#include "fdbclient/FDBTypes.h" #include "fdbclient/FDBTypes.h"
#include "fdbclient/Knobs.h" #include "fdbclient/Knobs.h"
#include "fdbclient/NativeAPI.actor.h"
KeyRef keyBetween(const KeyRangeRef& keys) { KeyRef keyBetween(const KeyRangeRef& keys) {
int pos = 0; // will be the position of the first difference between keys.begin and keys.end int pos = 0; // will be the position of the first difference between keys.begin and keys.end
@ -40,16 +41,14 @@ KeyRef keyBetween(const KeyRangeRef& keys) {
} }
void KeySelectorRef::setKey(KeyRef const& key) { void KeySelectorRef::setKey(KeyRef const& key) {
// There are no keys in the database with size greater than KEY_SIZE_LIMIT, so if this key selector has a key // There are no keys in the database with size greater than the max key size, so if this key selector has a key
// which is large, then we can translate it to an equivalent key selector with a smaller key // which is large, then we can translate it to an equivalent key selector with a smaller key
if (key.size() > int64_t maxKeySize = getMaxKeySize(key);
(key.startsWith(LiteralStringRef("\xff")) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) if (key.size() > maxKeySize) {
this->key = key.substr(0, this->key = key.substr(0, maxKeySize + 1);
(key.startsWith(LiteralStringRef("\xff")) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT } else {
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
else
this->key = key; this->key = key;
}
} }
void KeySelectorRef::setKeyUnlimited(KeyRef const& key) { void KeySelectorRef::setKeyUnlimited(KeyRef const& key) {

View File

@ -746,6 +746,17 @@ Future<Optional<TenantMapEntry>> createTenantTransaction(Transaction tr, TenantN
state Optional<Value> lastIdVal = wait(safeThreadFutureToFuture(lastIdFuture)); state Optional<Value> lastIdVal = wait(safeThreadFutureToFuture(lastIdFuture));
Optional<Value> tenantDataPrefix = wait(safeThreadFutureToFuture(tenantDataPrefixFuture)); Optional<Value> tenantDataPrefix = wait(safeThreadFutureToFuture(tenantDataPrefixFuture));
if (tenantDataPrefix.present() &&
tenantDataPrefix.get().size() + TenantMapEntry::ROOT_PREFIX_SIZE > CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT) {
TraceEvent(SevWarnAlways, "TenantPrefixTooLarge")
.detail("TenantSubspace", tenantDataPrefix.get())
.detail("TenantSubspaceLength", tenantDataPrefix.get().size())
.detail("RootPrefixLength", TenantMapEntry::ROOT_PREFIX_SIZE)
.detail("MaxTenantPrefixSize", CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT);
throw client_invalid_operation();
}
state TenantMapEntry newTenant(lastIdVal.present() ? TenantMapEntry::prefixToId(lastIdVal.get()) + 1 : 0, state TenantMapEntry newTenant(lastIdVal.present() ? TenantMapEntry::prefixToId(lastIdVal.get()) + 1 : 0,
tenantDataPrefix.present() ? (KeyRef)tenantDataPrefix.get() : ""_sr); tenantDataPrefix.present() ? (KeyRef)tenantDataPrefix.get() : ""_sr);

View File

@ -782,7 +782,7 @@ ACTOR Future<std::vector<ProcessData>> getWorkers(Database cx) {
} }
} }
ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) { ACTOR Future<Optional<ClusterConnectionString>> getConnectionString(Database cx) {
state Transaction tr(cx); state Transaction tr(cx);
loop { loop {
try { try {
@ -790,9 +790,8 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
Optional<Value> currentKey = wait(tr.get(coordinatorsKey)); Optional<Value> currentKey = wait(tr.get(coordinatorsKey));
if (!currentKey.present()) if (!currentKey.present())
return std::vector<NetworkAddress>(); return Optional<ClusterConnectionString>();
return ClusterConnectionString(currentKey.get().toString());
return ClusterConnectionString(currentKey.get().toString()).coordinators();
} catch (Error& e) { } catch (Error& e) {
wait(tr.onError(e)); wait(tr.onError(e));
} }
@ -801,7 +800,7 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr, ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
Reference<IQuorumChange> change, Reference<IQuorumChange> change,
ClusterConnectionString* conn) { std::vector<NetworkAddress> desiredCoordinators) {
tr->setOption(FDBTransactionOptions::LOCK_AWARE); tr->setOption(FDBTransactionOptions::LOCK_AWARE);
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES); tr->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
@ -812,47 +811,45 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone deleted this key entirely? return CoordinatorsResult::BAD_DATABASE_STATE; // Someone deleted this key entirely?
state ClusterConnectionString old(currentKey.get().toString()); state ClusterConnectionString old(currentKey.get().toString());
wait(old.resolveHostnames());
if (tr->getDatabase()->getConnectionRecord() && if (tr->getDatabase()->getConnectionRecord() &&
old.clusterKeyName().toString() != old.clusterKeyName().toString() !=
tr->getDatabase()->getConnectionRecord()->getConnectionString().clusterKeyName()) tr->getDatabase()->getConnectionRecord()->getConnectionString().clusterKeyName())
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database?? return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
state CoordinatorsResult result = CoordinatorsResult::SUCCESS; state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
if (!conn->coords.size()) { if (!desiredCoordinators.size()) {
std::vector<NetworkAddress> desiredCoordinatorAddresses = wait(change->getDesiredCoordinators( std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
tr, tr,
old.coordinators(), oldCoordinators,
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)), Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result)); result));
conn->coords = desiredCoordinatorAddresses; desiredCoordinators = _desiredCoordinators;
} }
if (result != CoordinatorsResult::SUCCESS) if (result != CoordinatorsResult::SUCCESS)
return result; return result;
if (!conn->coordinators().size()) if (!desiredCoordinators.size())
return CoordinatorsResult::INVALID_NETWORK_ADDRESSES; return CoordinatorsResult::INVALID_NETWORK_ADDRESSES;
std::sort(conn->coords.begin(), conn->coords.end()); std::sort(desiredCoordinators.begin(), desiredCoordinators.end());
std::sort(conn->hostnames.begin(), conn->hostnames.end());
std::string newName = change->getDesiredClusterKeyName(); std::string newName = change->getDesiredClusterKeyName();
if (newName.empty()) if (newName.empty())
newName = old.clusterKeyName().toString(); newName = old.clusterKeyName().toString();
if (old.coordinators() == conn->coordinators() && old.clusterKeyName() == newName) if (oldCoordinators == desiredCoordinators && old.clusterKeyName() == newName)
return CoordinatorsResult::SAME_NETWORK_ADDRESSES; return CoordinatorsResult::SAME_NETWORK_ADDRESSES;
std::string key(newName + ':' + deterministicRandom()->randomAlphaNumeric(32)); state ClusterConnectionString conn(desiredCoordinators,
conn->parseKey(key); StringRef(newName + ':' + deterministicRandom()->randomAlphaNumeric(32)));
conn->resetConnectionString();
if (g_network->isSimulated()) { if (g_network->isSimulated()) {
int i = 0; int i = 0;
int protectedCount = 0; int protectedCount = 0;
while ((protectedCount < ((conn->coordinators().size() / 2) + 1)) && (i < conn->coordinators().size())) { while ((protectedCount < ((desiredCoordinators.size() / 2) + 1)) && (i < desiredCoordinators.size())) {
auto process = g_simulator.getProcessByAddress(conn->coordinators()[i]); auto process = g_simulator.getProcessByAddress(desiredCoordinators[i]);
auto addresses = process->addresses; auto addresses = process->addresses;
if (!process->isReliable()) { if (!process->isReliable()) {
@ -864,14 +861,14 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
if (addresses.secondaryAddress.present()) { if (addresses.secondaryAddress.present()) {
g_simulator.protectedAddresses.insert(process->addresses.secondaryAddress.get()); g_simulator.protectedAddresses.insert(process->addresses.secondaryAddress.get());
} }
TraceEvent("ProtectCoordinator").detail("Address", conn->coordinators()[i]).backtrace(); TraceEvent("ProtectCoordinator").detail("Address", desiredCoordinators[i]).backtrace();
protectedCount++; protectedCount++;
i++; i++;
} }
} }
std::vector<Future<Optional<LeaderInfo>>> leaderServers; std::vector<Future<Optional<LeaderInfo>>> leaderServers;
ClientCoordinators coord(Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(*conn))); ClientCoordinators coord(Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(conn)));
leaderServers.reserve(coord.clientLeaderServers.size()); leaderServers.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) for (int i = 0; i < coord.clientLeaderServers.size(); i++)
@ -883,7 +880,7 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
when(wait(waitForAll(leaderServers))) {} when(wait(waitForAll(leaderServers))) {}
when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; } when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; }
} }
tr->set(coordinatorsKey, conn->toString()); tr->set(coordinatorsKey, conn.toString());
return Optional<CoordinatorsResult>(); return Optional<CoordinatorsResult>();
} }
@ -909,11 +906,12 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
old.clusterKeyName().toString() != cx->getConnectionRecord()->getConnectionString().clusterKeyName()) old.clusterKeyName().toString() != cx->getConnectionRecord()->getConnectionString().clusterKeyName())
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database?? return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
state CoordinatorsResult result = CoordinatorsResult::SUCCESS; state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
if (!desiredCoordinators.size()) { if (!desiredCoordinators.size()) {
std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators( std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
&tr, &tr,
old.coordinators(), oldCoordinators,
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)), Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result)); result));
desiredCoordinators = _desiredCoordinators; desiredCoordinators = _desiredCoordinators;
@ -937,7 +935,7 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
if (newName.empty()) if (newName.empty())
newName = old.clusterKeyName().toString(); newName = old.clusterKeyName().toString();
if (old.coordinators() == desiredCoordinators && old.clusterKeyName() == newName) if (oldCoordinators == desiredCoordinators && old.clusterKeyName() == newName)
return retries ? CoordinatorsResult::SUCCESS : CoordinatorsResult::SAME_NETWORK_ADDRESSES; return retries ? CoordinatorsResult::SUCCESS : CoordinatorsResult::SAME_NETWORK_ADDRESSES;
state ClusterConnectionString conn( state ClusterConnectionString conn(
@ -1075,9 +1073,16 @@ struct AutoQuorumChange final : IQuorumChange {
std::vector<Future<Optional<LeaderInfo>>> leaderServers; std::vector<Future<Optional<LeaderInfo>>> leaderServers;
leaderServers.reserve(coord.clientLeaderServers.size()); leaderServers.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) { for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, if (coord.clientLeaderServers[i].hostname.present()) {
GetLeaderRequest(coord.clusterKey, UID()), leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coord.clusterKey, UID()),
TaskPriority::CoordinationReply)); coord.clientLeaderServers[i].hostname.get(),
WLTOKEN_CLIENTLEADERREG_GETLEADER,
TaskPriority::CoordinationReply));
} else {
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
GetLeaderRequest(coord.clusterKey, UID()),
TaskPriority::CoordinationReply));
}
} }
Optional<std::vector<Optional<LeaderInfo>>> results = Optional<std::vector<Optional<LeaderInfo>>> results =
wait(timeout(getAll(leaderServers), CLIENT_KNOBS->IS_ACCEPTABLE_DELAY)); wait(timeout(getAll(leaderServers), CLIENT_KNOBS->IS_ACCEPTABLE_DELAY));

View File

@ -56,7 +56,7 @@ struct IQuorumChange : ReferenceCounted<IQuorumChange> {
// Change to use the given set of coordination servers // Change to use the given set of coordination servers
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr, ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
Reference<IQuorumChange> change, Reference<IQuorumChange> change,
ClusterConnectionString* conn); std::vector<NetworkAddress> desiredCoordinators);
ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change); ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change);
Reference<IQuorumChange> autoQuorumChange(int desired = -1); Reference<IQuorumChange> autoQuorumChange(int desired = -1);
Reference<IQuorumChange> noQuorumChange(); Reference<IQuorumChange> noQuorumChange();
@ -146,7 +146,7 @@ ACTOR Future<bool> setHealthyZone(Database cx, StringRef zoneId, double seconds,
ACTOR Future<Void> waitForPrimaryDC(Database cx, StringRef dcId); ACTOR Future<Void> waitForPrimaryDC(Database cx, StringRef dcId);
// Gets the cluster connection string // Gets the cluster connection string
ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx); ACTOR Future<Optional<ClusterConnectionString>> getConnectionString(Database cx);
void schemaCoverage(std::string const& spath, bool covered = true); void schemaCoverage(std::string const& spath, bool covered = true);
bool schemaMatch(json_spirit::mValue const& schema, bool schemaMatch(json_spirit::mValue const& schema,

View File

@ -77,18 +77,6 @@ void IClusterConnectionRecord::setPersisted() {
connectionStringNeedsPersisted = false; connectionStringNeedsPersisted = false;
} }
ClusterConnectionString::ConnectionStringStatus IClusterConnectionRecord::connectionStringStatus() const {
return cs.status;
}
Future<Void> IClusterConnectionRecord::resolveHostnames() {
return cs.resolveHostnames();
}
void IClusterConnectionRecord::resolveHostnamesBlocking() {
cs.resolveHostnamesBlocking();
}
std::string ClusterConnectionString::getErrorString(std::string const& source, Error const& e) { std::string ClusterConnectionString::getErrorString(std::string const& source, Error const& e) {
if (e.code() == error_code_connection_string_invalid) { if (e.code() == error_code_connection_string_invalid) {
return format("Invalid connection string `%s: %d %s", source.c_str(), e.code(), e.what()); return format("Invalid connection string `%s: %d %s", source.c_str(), e.code(), e.what());
@ -97,101 +85,19 @@ std::string ClusterConnectionString::getErrorString(std::string const& source, E
} }
} }
ACTOR Future<Void> resolveHostnamesImpl(ClusterConnectionString* self) { ClusterConnectionString::ClusterConnectionString(const std::string& connectionString) {
loop { auto trimmed = trim(connectionString);
if (self->status == ClusterConnectionString::UNRESOLVED) {
self->status = ClusterConnectionString::RESOLVING;
std::vector<Future<Void>> fs;
for (auto const& hostname : self->hostnames) {
fs.push_back(map(INetworkConnections::net()->resolveTCPEndpoint(hostname.host, hostname.service),
[=](std::vector<NetworkAddress> const& addresses) -> Void {
NetworkAddress address =
addresses[deterministicRandom()->randomInt(0, addresses.size())];
address.flags = 0; // Reset the parsed address to public
address.fromHostname = NetworkAddressFromHostname::True;
if (hostname.isTLS) {
address.flags |= NetworkAddress::FLAG_TLS;
}
self->addResolved(hostname, address);
return Void();
}));
}
wait(waitForAll(fs));
std::sort(self->coords.begin(), self->coords.end());
if (std::unique(self->coords.begin(), self->coords.end()) != self->coords.end()) {
self->status = ClusterConnectionString::UNRESOLVED;
self->resolveFinish.trigger();
throw connection_string_invalid();
}
self->status = ClusterConnectionString::RESOLVED;
self->resolveFinish.trigger();
break;
} else if (self->status == ClusterConnectionString::RESOLVING) {
wait(self->resolveFinish.onTrigger());
if (self->status == ClusterConnectionString::RESOLVED) {
break;
}
// Otherwise, this means other threads failed on resolve, so here we go back to the loop and try to resolve
// again.
} else {
// status is RESOLVED, nothing to do.
break;
}
}
return Void();
}
Future<Void> ClusterConnectionString::resolveHostnames() {
return resolveHostnamesImpl(this);
}
void ClusterConnectionString::resolveHostnamesBlocking() {
if (status != RESOLVED) {
status = RESOLVING;
for (auto const& hostname : hostnames) {
std::vector<NetworkAddress> addresses =
INetworkConnections::net()->resolveTCPEndpointBlocking(hostname.host, hostname.service);
NetworkAddress address = addresses[deterministicRandom()->randomInt(0, addresses.size())];
address.flags = 0; // Reset the parsed address to public
address.fromHostname = NetworkAddressFromHostname::True;
if (hostname.isTLS) {
address.flags |= NetworkAddress::FLAG_TLS;
}
addResolved(hostname, address);
}
std::sort(coords.begin(), coords.end());
if (std::unique(coords.begin(), coords.end()) != coords.end()) {
status = UNRESOLVED;
throw connection_string_invalid();
}
status = RESOLVED;
}
}
void ClusterConnectionString::resetToUnresolved() {
if (status == RESOLVED && hostnames.size() > 0) {
coords.clear();
hostnames.clear();
networkAddressToHostname.clear();
status = UNRESOLVED;
parseConnString();
}
}
void ClusterConnectionString::resetConnectionString() {
connectionString = toString();
}
void ClusterConnectionString::parseConnString() {
// Split on '@' into key@addrs // Split on '@' into key@addrs
int pAt = connectionString.find_first_of('@'); int pAt = trimmed.find_first_of('@');
if (pAt == connectionString.npos) { if (pAt == trimmed.npos) {
throw connection_string_invalid(); throw connection_string_invalid();
} }
std::string key = connectionString.substr(0, pAt); std::string key = trimmed.substr(0, pAt);
std::string addrs = connectionString.substr(pAt + 1); std::string addrs = trimmed.substr(pAt + 1);
parseKey(key); parseKey(key);
std::set<Hostname> hostnameSet;
std::set<NetworkAddress> addressSet;
std::string curAddr; std::string curAddr;
for (int p = 0; p <= addrs.size();) { for (int p = 0; p <= addrs.size();) {
int pComma = addrs.find_first_of(',', p); int pComma = addrs.find_first_of(',', p);
@ -199,31 +105,29 @@ void ClusterConnectionString::parseConnString() {
pComma = addrs.size(); pComma = addrs.size();
curAddr = addrs.substr(p, pComma - p); curAddr = addrs.substr(p, pComma - p);
if (Hostname::isHostname(curAddr)) { if (Hostname::isHostname(curAddr)) {
Hostname h = Hostname::parse(curAddr);
// Check that there are no duplicate hostnames
if (hostnameSet.find(h) != hostnameSet.end()) {
throw connection_string_invalid();
}
hostnames.push_back(Hostname::parse(curAddr)); hostnames.push_back(Hostname::parse(curAddr));
hostnameSet.insert(h);
} else { } else {
coords.push_back(NetworkAddress::parse(curAddr)); NetworkAddress n = NetworkAddress::parse(curAddr);
// Check that there are no duplicate addresses
if (addressSet.find(n) != addressSet.end()) {
throw connection_string_invalid();
}
coords.push_back(n);
addressSet.insert(n);
} }
p = pComma + 1; p = pComma + 1;
} }
if (hostnames.size() > 0) {
status = UNRESOLVED;
}
ASSERT((coords.size() + hostnames.size()) > 0); ASSERT((coords.size() + hostnames.size()) > 0);
std::sort(coords.begin(), coords.end());
// Check that there are no duplicate addresses
if (std::unique(coords.begin(), coords.end()) != coords.end()) {
throw connection_string_invalid();
}
}
ClusterConnectionString::ClusterConnectionString(const std::string& connStr) {
connectionString = trim(connStr);
parseConnString();
} }
TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") { TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
std::string input; state std::string input;
{ {
input = "asdf:2345@1.1.1.1:345"; input = "asdf:2345@1.1.1.1:345";
@ -231,6 +135,15 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
ASSERT(input == cs.toString()); ASSERT(input == cs.toString());
} }
{
input = "asdf:2345@1.1.1.1:345,1.1.1.1:345";
try {
ClusterConnectionString cs(input);
} catch (Error& e) {
ASSERT(e.code() == error_code_connection_string_invalid);
}
}
{ {
input = "0xxdeadbeef:100100100@1.1.1.1:34534,5.1.5.3:23443"; input = "0xxdeadbeef:100100100@1.1.1.1:34534,5.1.5.3:23443";
ClusterConnectionString cs(input); ClusterConnectionString cs(input);
@ -274,20 +187,27 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
} }
TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") { TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
std::string input; state std::string input;
{ {
input = "asdf:2345@localhost:1234"; input = "asdf:2345@localhost:1234";
ClusterConnectionString cs(input); ClusterConnectionString cs(input);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 1); ASSERT(cs.hostnames.size() == 1);
ASSERT(input == cs.toString()); ASSERT(input == cs.toString());
} }
{
input = "asdf:2345@localhost:1234,localhost:1234";
try {
ClusterConnectionString cs(input);
} catch (Error& e) {
ASSERT(e.code() == error_code_connection_string_invalid);
}
}
{ {
input = "0xxdeadbeef:100100100@localhost:34534,host-name:23443"; input = "0xxdeadbeef:100100100@localhost:34534,host-name:23443";
ClusterConnectionString cs(input); ClusterConnectionString cs(input);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2); ASSERT(cs.hostnames.size() == 2);
ASSERT(input == cs.toString()); ASSERT(input == cs.toString());
} }
@ -300,7 +220,6 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
commented += "# asdfasdf ##"; commented += "# asdfasdf ##";
ClusterConnectionString cs(commented); ClusterConnectionString cs(commented);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2); ASSERT(cs.hostnames.size() == 2);
ASSERT(input == cs.toString()); ASSERT(input == cs.toString());
} }
@ -313,7 +232,6 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
commented += "# asdfasdf ##"; commented += "# asdfasdf ##";
ClusterConnectionString cs(commented); ClusterConnectionString cs(commented);
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2); ASSERT(cs.hostnames.size() == 2);
ASSERT(input == cs.toString()); ASSERT(input == cs.toString());
} }
@ -321,44 +239,30 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
return Void(); return Void();
} }
TEST_CASE("/fdbclient/MonitorLeader/ConnectionString") { TEST_CASE("/fdbclient/MonitorLeader/ConnectionString/hostname") {
state std::string connectionString = "TestCluster:0@localhost:1234,host-name:5678"; std::string connectionString = "TestCluster:0@localhost:1234,host-name:5678";
std::string hn1 = "localhost", port1 = "1234"; std::string hn1 = "localhost", port1 = "1234", hn2 = "host-name", port2 = "5678";
state std::string hn2 = "host-name"; std::vector<Hostname> hostnames;
state std::string port2 = "5678";
state std::vector<Hostname> hostnames;
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
hostnames.push_back(Hostname::parse(hn2 + ":" + port2));
NetworkAddress address1 = NetworkAddress::parse("127.0.0.0:1234"); {
NetworkAddress address2 = NetworkAddress::parse("127.0.0.1:5678"); hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
hostnames.push_back(Hostname::parse(hn2 + ":" + port2));
INetworkConnections::net()->addMockTCPEndpoint(hn1, port1, { address1 }); ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
INetworkConnections::net()->addMockTCPEndpoint(hn2, port2, { address2 }); ASSERT(cs.hostnames.size() == 2);
ASSERT(cs.coordinators().size() == 0);
ASSERT(cs.toString() == connectionString);
}
state ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0")); {
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED); hostnames.clear();
ASSERT(cs.hostnames.size() == 2); hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
ASSERT(cs.coordinators().size() == 0); hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
wait(cs.resolveHostnames()); try {
ASSERT(cs.status == ClusterConnectionString::RESOLVED); ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
ASSERT(cs.hostnames.size() == 2); } catch (Error& e) {
ASSERT(cs.coordinators().size() == 2); ASSERT(e.code() == error_code_connection_string_invalid);
ASSERT(cs.toString() == connectionString); }
cs.resetToUnresolved();
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
ASSERT(cs.hostnames.size() == 2);
ASSERT(cs.coordinators().size() == 0);
ASSERT(cs.toString() == connectionString);
INetworkConnections::net()->removeMockTCPEndpoint(hn2, port2);
NetworkAddress address3 = NetworkAddress::parse("127.0.0.0:5678");
INetworkConnections::net()->addMockTCPEndpoint(hn2, port2, { address3 });
try {
wait(cs.resolveHostnames());
} catch (Error& e) {
ASSERT(e.code() == error_code_connection_string_invalid);
} }
return Void(); return Void();
@ -380,6 +284,7 @@ ACTOR Future<std::vector<NetworkAddress>> tryResolveHostnamesImpl(ClusterConnect
allCoordinatorsSet.insert(coord); allCoordinatorsSet.insert(coord);
} }
std::vector<NetworkAddress> allCoordinators(allCoordinatorsSet.begin(), allCoordinatorsSet.end()); std::vector<NetworkAddress> allCoordinators(allCoordinatorsSet.begin(), allCoordinatorsSet.end());
std::sort(allCoordinators.begin(), allCoordinators.end());
return allCoordinators; return allCoordinators;
} }
@ -484,17 +389,22 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/fuzz") {
} }
ClusterConnectionString::ClusterConnectionString(const std::vector<NetworkAddress>& servers, Key key) ClusterConnectionString::ClusterConnectionString(const std::vector<NetworkAddress>& servers, Key key)
: status(RESOLVED), coords(servers) { : coords(servers) {
std::set<NetworkAddress> s(servers.begin(), servers.end());
if (s.size() != servers.size()) {
throw connection_string_invalid();
}
std::string keyString = key.toString(); std::string keyString = key.toString();
parseKey(keyString); parseKey(keyString);
resetConnectionString();
} }
ClusterConnectionString::ClusterConnectionString(const std::vector<Hostname>& hosts, Key key) ClusterConnectionString::ClusterConnectionString(const std::vector<Hostname>& hosts, Key key) : hostnames(hosts) {
: status(UNRESOLVED), hostnames(hosts) { std::set<Hostname> h(hosts.begin(), hosts.end());
if (h.size() != hosts.size()) {
throw connection_string_invalid();
}
std::string keyString = key.toString(); std::string keyString = key.toString();
parseKey(keyString); parseKey(keyString);
resetConnectionString();
} }
void ClusterConnectionString::parseKey(const std::string& key) { void ClusterConnectionString::parseKey(const std::string& key) {
@ -529,13 +439,11 @@ void ClusterConnectionString::parseKey(const std::string& key) {
std::string ClusterConnectionString::toString() const { std::string ClusterConnectionString::toString() const {
std::string s = key.toString(); std::string s = key.toString();
s += '@'; s += '@';
for (int i = 0; i < coords.size(); i++) { for (auto const& coord : coords) {
if (networkAddressToHostname.find(coords[i]) == networkAddressToHostname.end()) { if (s.find('@') != s.length() - 1) {
if (s.find('@') != s.length() - 1) { s += ',';
s += ',';
}
s += coords[i].toString();
} }
s += coord.toString();
} }
for (auto const& host : hostnames) { for (auto const& host : hostnames) {
if (s.find('@') != s.length() - 1) { if (s.find('@') != s.length() - 1) {
@ -547,11 +455,14 @@ std::string ClusterConnectionString::toString() const {
} }
ClientCoordinators::ClientCoordinators(Reference<IClusterConnectionRecord> ccr) : ccr(ccr) { ClientCoordinators::ClientCoordinators(Reference<IClusterConnectionRecord> ccr) : ccr(ccr) {
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
ClusterConnectionString cs = ccr->getConnectionString(); ClusterConnectionString cs = ccr->getConnectionString();
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s)
clientLeaderServers.push_back(ClientLeaderRegInterface(*s));
clusterKey = cs.clusterKey(); clusterKey = cs.clusterKey();
for (auto h : cs.hostnames) {
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
}
for (auto s : cs.coordinators()) {
clientLeaderServers.push_back(ClientLeaderRegInterface(s));
}
} }
ClientCoordinators::ClientCoordinators(Key clusterKey, std::vector<NetworkAddress> coordinators) ClientCoordinators::ClientCoordinators(Key clusterKey, std::vector<NetworkAddress> coordinators)
@ -576,49 +487,32 @@ ClientLeaderRegInterface::ClientLeaderRegInterface(INetwork* local) {
// Nominee is the worker among all workers that are considered as leader by one coordinator // Nominee is the worker among all workers that are considered as leader by one coordinator
// This function contacts a coordinator coord to ask who is its nominee. // This function contacts a coordinator coord to ask who is its nominee.
// Note: for coordinators whose NetworkAddress is parsed out of a hostname, a connection failure will cause this actor
// to throw `coordinators_changed()` error
ACTOR Future<Void> monitorNominee(Key key, ACTOR Future<Void> monitorNominee(Key key,
ClientLeaderRegInterface coord, ClientLeaderRegInterface coord,
AsyncTrigger* nomineeChange, AsyncTrigger* nomineeChange,
Optional<LeaderInfo>* info, Optional<LeaderInfo>* info) {
Optional<Hostname> hostname = Optional<Hostname>()) {
loop { loop {
state Optional<LeaderInfo> li; state Optional<LeaderInfo> li;
if (coord.hostname.present()) {
if (coord.getLeader.getEndpoint().getPrimaryAddress().fromHostname) { wait(store(li,
state ErrorOr<Optional<LeaderInfo>> rep = retryGetReplyFromHostname(GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
wait(coord.getLeader.tryGetReply(GetLeaderRequest(key, info->present() ? info->get().changeID : UID()), coord.hostname.get(),
TaskPriority::CoordinationReply)); WLTOKEN_CLIENTLEADERREG_GETLEADER,
if (rep.isError()) { TaskPriority::CoordinationReply)));
// Connecting to nominee failed, most likely due to connection failed.
TraceEvent("MonitorNomineeError")
.error(rep.getError())
.detail("Hostname", hostname.present() ? hostname.get().toString() : "UnknownHostname")
.detail("OldAddr", coord.getLeader.getEndpoint().getPrimaryAddress().toString());
if (rep.getError().code() == error_code_request_maybe_delivered) {
// Delay to prevent tight resolving loop due to outdated DNS cache
wait(delay(FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL));
throw coordinators_changed();
} else {
throw rep.getError();
}
} else if (rep.present()) {
li = rep.get();
}
} else { } else {
Optional<LeaderInfo> tmp = wait(store(li,
wait(retryBrokenPromise(coord.getLeader, retryBrokenPromise(coord.getLeader,
GetLeaderRequest(key, info->present() ? info->get().changeID : UID()), GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
TaskPriority::CoordinationReply)); TaskPriority::CoordinationReply)));
li = tmp;
} }
wait(Future<Void>(Void())); // Make sure we weren't cancelled wait(Future<Void>(Void())); // Make sure we weren't cancelled
TraceEvent("GetLeaderReply") TraceEvent("GetLeaderReply")
.suppressFor(1.0) .suppressFor(1.0)
.detail("Coordinator", coord.getLeader.getEndpoint().getPrimaryAddress()) .detail("Coordinator",
coord.hostname.present() ? coord.hostname.get().toString()
: coord.getLeader.getEndpoint().getPrimaryAddress().toString())
.detail("Nominee", li.present() ? li.get().changeID : UID()) .detail("Nominee", li.present() ? li.get().changeID : UID())
.detail("ClusterKey", key.printable()); .detail("ClusterKey", key.printable());
@ -687,74 +581,54 @@ Optional<std::pair<LeaderInfo, bool>> getLeader(const std::vector<Optional<Leade
ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterConnectionRecord> connRecord, ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> outSerializedLeaderInfo, Reference<AsyncVar<Value>> outSerializedLeaderInfo,
MonitorLeaderInfo info) { MonitorLeaderInfo info) {
state ClientCoordinators coordinators(info.intermediateConnRecord);
state AsyncTrigger nomineeChange;
state std::vector<Optional<LeaderInfo>> nominees;
state Future<Void> allActors;
nominees.resize(coordinators.clientLeaderServers.size());
state std::vector<Future<Void>> actors;
// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator.
actors.reserve(coordinators.clientLeaderServers.size());
for (int i = 0; i < coordinators.clientLeaderServers.size(); i++) {
actors.push_back(
monitorNominee(coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i]));
}
allActors = waitForAll(actors);
loop { loop {
wait(connRecord->resolveHostnames()); Optional<std::pair<LeaderInfo, bool>> leader = getLeader(nominees);
wait(info.intermediateConnRecord->resolveHostnames()); TraceEvent("MonitorLeaderChange")
state ClientCoordinators coordinators(info.intermediateConnRecord); .detail("NewLeader", leader.present() ? leader.get().first.changeID : UID(1, 1));
state AsyncTrigger nomineeChange; if (leader.present()) {
state std::vector<Optional<LeaderInfo>> nominees; if (leader.get().first.forward) {
state Future<Void> allActors; TraceEvent("MonitorLeaderForwarding")
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
nominees.resize(coordinators.clientLeaderServers.size()); .detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString())
.trackLatest("MonitorLeaderForwarding");
state std::vector<Future<Void>> actors; info.intermediateConnRecord = connRecord->makeIntermediateRecord(
// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator. ClusterConnectionString(leader.get().first.serializedInfo.toString()));
actors.reserve(coordinators.clientLeaderServers.size()); return info;
for (int i = 0; i < coordinators.clientLeaderServers.size(); i++) {
Optional<Hostname> hostname;
auto r = connRecord->getConnectionString().networkAddressToHostname.find(
coordinators.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress());
if (r != connRecord->getConnectionString().networkAddressToHostname.end()) {
hostname = r->second;
}
actors.push_back(monitorNominee(
coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i], hostname));
}
allActors = waitForAll(actors);
loop {
Optional<std::pair<LeaderInfo, bool>> leader = getLeader(nominees);
TraceEvent("MonitorLeaderChange")
.detail("NewLeader", leader.present() ? leader.get().first.changeID : UID(1, 1));
if (leader.present()) {
if (leader.get().first.forward) {
TraceEvent("MonitorLeaderForwarding")
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
.detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString())
.trackLatest("MonitorLeaderForwarding");
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
ClusterConnectionString(leader.get().first.serializedInfo.toString()));
return info;
}
if (connRecord != info.intermediateConnRecord) {
if (!info.hasConnected) {
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
.detail("ClusterFile", connRecord->toString())
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
.detail("CurrentConnectionString",
info.intermediateConnRecord->getConnectionString().toString());
}
connRecord->setAndPersistConnectionString(info.intermediateConnRecord->getConnectionString());
info.intermediateConnRecord = connRecord;
}
info.hasConnected = true;
connRecord->notifyConnected();
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
}
try {
wait(nomineeChange.onTrigger() || allActors);
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
TraceEvent("MonitorLeaderCoordinatorsChanged").suppressFor(1.0);
connRecord->getConnectionString().resetToUnresolved();
break;
} else {
throw e;
}
} }
if (connRecord != info.intermediateConnRecord) {
if (!info.hasConnected) {
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
.detail("ClusterFile", connRecord->toString())
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
.detail("CurrentConnectionString",
info.intermediateConnRecord->getConnectionString().toString());
}
connRecord->setAndPersistConnectionString(info.intermediateConnRecord->getConnectionString());
info.intermediateConnRecord = connRecord;
}
info.hasConnected = true;
connRecord->notifyConnected();
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
} }
wait(nomineeChange.onTrigger() || allActors);
} }
} }
@ -885,10 +759,10 @@ ACTOR Future<Void> getClientInfoFromLeader(Reference<AsyncVar<Optional<ClusterCo
} }
ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey, ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
std::vector<Hostname> hostnames,
std::vector<NetworkAddress> coordinators, std::vector<NetworkAddress> coordinators,
ClientData* clientData, ClientData* clientData,
Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo, Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo) {
Reference<AsyncVar<Void>> coordinatorsChanged) {
state std::vector<ClientLeaderRegInterface> clientLeaderServers; state std::vector<ClientLeaderRegInterface> clientLeaderServers;
state AsyncTrigger nomineeChange; state AsyncTrigger nomineeChange;
state std::vector<Optional<LeaderInfo>> nominees; state std::vector<Optional<LeaderInfo>> nominees;
@ -896,8 +770,12 @@ ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
state Reference<AsyncVar<Optional<ClusterControllerClientInterface>>> knownLeader( state Reference<AsyncVar<Optional<ClusterControllerClientInterface>>> knownLeader(
new AsyncVar<Optional<ClusterControllerClientInterface>>{}); new AsyncVar<Optional<ClusterControllerClientInterface>>{});
for (auto s = coordinators.begin(); s != coordinators.end(); ++s) { clientLeaderServers.reserve(hostnames.size() + coordinators.size());
clientLeaderServers.push_back(ClientLeaderRegInterface(*s)); for (auto h : hostnames) {
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
}
for (auto s : coordinators) {
clientLeaderServers.push_back(ClientLeaderRegInterface(s));
} }
nominees.resize(clientLeaderServers.size()); nominees.resize(clientLeaderServers.size());
@ -936,14 +814,7 @@ ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
leaderInfo->set(leader.get().first); leaderInfo->set(leader.get().first);
} }
} }
try { wait(nomineeChange.onTrigger() || allActors);
wait(nomineeChange.onTrigger() || allActors);
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
coordinatorsChanged->trigger();
}
throw e;
}
} }
} }
@ -995,7 +866,7 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions, Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
Key traceLogGroup) { Key traceLogGroup) {
state ClusterConnectionString cs = info.intermediateConnRecord->getConnectionString(); state ClusterConnectionString cs = info.intermediateConnRecord->getConnectionString();
state std::vector<NetworkAddress> addrs = cs.coordinators(); state int coordinatorsSize = cs.hostnames.size() + cs.coordinators().size();
state int index = 0; state int index = 0;
state int successIndex = 0; state int successIndex = 0;
state Optional<double> incorrectTime; state Optional<double> incorrectTime;
@ -1003,15 +874,26 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
state std::vector<CommitProxyInterface> lastCommitProxies; state std::vector<CommitProxyInterface> lastCommitProxies;
state std::vector<UID> lastGrvProxyUIDs; state std::vector<UID> lastGrvProxyUIDs;
state std::vector<GrvProxyInterface> lastGrvProxies; state std::vector<GrvProxyInterface> lastGrvProxies;
state std::vector<ClientLeaderRegInterface> clientLeaderServers;
clientLeaderServers.reserve(coordinatorsSize);
for (const auto& h : cs.hostnames) {
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
}
for (const auto& c : cs.coordinators()) {
clientLeaderServers.push_back(ClientLeaderRegInterface(c));
}
deterministicRandom()->randomShuffle(clientLeaderServers);
deterministicRandom()->randomShuffle(addrs);
loop { loop {
state ClientLeaderRegInterface clientLeaderServer(addrs[index]); state ClientLeaderRegInterface clientLeaderServer = clientLeaderServers[index];
state OpenDatabaseCoordRequest req; state OpenDatabaseCoordRequest req;
coordinator->set(clientLeaderServer); coordinator->set(clientLeaderServer);
req.clusterKey = cs.clusterKey(); req.clusterKey = cs.clusterKey();
req.hostnames = cs.hostnames;
req.coordinators = cs.coordinators(); req.coordinators = cs.coordinators();
req.knownClientInfoID = clientInfo->get().id; req.knownClientInfoID = clientInfo->get().id;
req.supportedVersions = supportedVersions->get(); req.supportedVersions = supportedVersions->get();
@ -1040,8 +922,16 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
incorrectTime = Optional<double>(); incorrectTime = Optional<double>();
} }
state ErrorOr<CachedSerialization<ClientDBInfo>> rep = state ErrorOr<CachedSerialization<ClientDBInfo>> rep;
wait(clientLeaderServer.openDatabase.tryGetReply(req, TaskPriority::CoordinationReply)); if (clientLeaderServer.hostname.present()) {
wait(store(rep,
tryGetReplyFromHostname(req,
clientLeaderServer.hostname.get(),
WLTOKEN_CLIENTLEADERREG_OPENDATABASE,
TaskPriority::CoordinationReply)));
} else {
wait(store(rep, clientLeaderServer.openDatabase.tryGetReply(req, TaskPriority::CoordinationReply)));
}
if (rep.present()) { if (rep.present()) {
if (rep.get().read().forward.present()) { if (rep.get().read().forward.present()) {
TraceEvent("MonitorProxiesForwarding") TraceEvent("MonitorProxiesForwarding")
@ -1072,15 +962,10 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
successIndex = index; successIndex = index;
} else { } else {
TEST(rep.getError().code() == error_code_failed_to_progress); // Coordinator cant talk to cluster controller TEST(rep.getError().code() == error_code_failed_to_progress); // Coordinator cant talk to cluster controller
if (rep.getError().code() == error_code_coordinators_changed) { TEST(rep.getError().code() == error_code_lookup_failed); // Coordinator hostname resolving failure
throw coordinators_changed(); index = (index + 1) % coordinatorsSize;
}
index = (index + 1) % addrs.size();
if (index == successIndex) { if (index == successIndex) {
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY)); wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
// When the client fails talking to all coordinators, we throw coordinators_changed() and let the caller
// re-resolve the connection string and retry.
throw coordinators_changed();
} }
} }
} }
@ -1092,27 +977,16 @@ ACTOR Future<Void> monitorProxies(
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator, Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions, Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
Key traceLogGroup) { Key traceLogGroup) {
wait(connRecord->get()->resolveHostnames());
state MonitorLeaderInfo info(connRecord->get()); state MonitorLeaderInfo info(connRecord->get());
loop { loop {
try { choose {
wait(info.intermediateConnRecord->resolveHostnames()); when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration(
choose { connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration( info = _info;
connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
info = _info;
}
when(wait(connRecord->onChange())) {
info.hasConnected = false;
info.intermediateConnRecord = connRecord->get();
}
} }
} catch (Error& e) { when(wait(connRecord->onChange())) {
if (e.code() == error_code_coordinators_changed) { info.hasConnected = false;
TraceEvent("MonitorProxiesCoordinatorsChanged").suppressFor(1.0); info.intermediateConnRecord = connRecord->get();
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
} else {
throw e;
} }
} }
} }

View File

@ -75,10 +75,10 @@ Future<Void> monitorLeader(Reference<IClusterConnectionRecord> const& connFile,
// nominees, the nominee with the most nomination is the leader, and collects client data from the leader. This function // nominees, the nominee with the most nomination is the leader, and collects client data from the leader. This function
// also monitors the change of the leader. // also monitors the change of the leader.
Future<Void> monitorLeaderAndGetClientInfo(Key const& clusterKey, Future<Void> monitorLeaderAndGetClientInfo(Key const& clusterKey,
std::vector<Hostname> const& hostnames,
std::vector<NetworkAddress> const& coordinators, std::vector<NetworkAddress> const& coordinators,
ClientData* const& clientData, ClientData* const& clientData,
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo, Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo);
Reference<AsyncVar<Void>> const& coordinatorsChanged);
Future<Void> monitorProxies( Future<Void> monitorProxies(
Reference<AsyncVar<Reference<IClusterConnectionRecord>>> const& connRecord, Reference<AsyncVar<Reference<IClusterConnectionRecord>>> const& connRecord,

View File

@ -18,6 +18,10 @@
* limitations under the License. * limitations under the License.
*/ */
#ifdef ADDRESS_SANITIZER
#include <sanitizer/lsan_interface.h>
#endif
#include "fdbclient/FDBOptions.g.h" #include "fdbclient/FDBOptions.g.h"
#include "fdbclient/FDBTypes.h" #include "fdbclient/FDBTypes.h"
#include "fdbclient/GenericManagementAPI.actor.h" #include "fdbclient/GenericManagementAPI.actor.h"
@ -2763,6 +2767,11 @@ template <class T>
THREAD_FUNC runSingleAssignmentVarTest(void* arg) { THREAD_FUNC runSingleAssignmentVarTest(void* arg) {
noUnseed = true; noUnseed = true;
// This test intentionally leaks memory
#ifdef ADDRESS_SANITIZER
__lsan::ScopedDisabler disableLeakChecks;
#endif
volatile bool* done = (volatile bool*)arg; volatile bool* done = (volatile bool*)arg;
try { try {
for (int i = 0; i < 25; ++i) { for (int i = 0; i < 25; ++i) {

View File

@ -265,11 +265,11 @@ void DatabaseContext::getLatestCommitVersions(const Reference<LocationInfo>& loc
void updateCachedReadVersionShared(double t, Version v, DatabaseSharedState* p) { void updateCachedReadVersionShared(double t, Version v, DatabaseSharedState* p) {
MutexHolder mutex(p->mutexLock); MutexHolder mutex(p->mutexLock);
if (v >= p->grvCacheSpace.cachedReadVersion) { if (v >= p->grvCacheSpace.cachedReadVersion) {
TraceEvent(SevDebug, "CacheReadVersionUpdate") //TraceEvent(SevDebug, "CacheReadVersionUpdate")
.detail("Version", v) // .detail("Version", v)
.detail("CurTime", t) // .detail("CurTime", t)
.detail("LastVersion", p->grvCacheSpace.cachedReadVersion) // .detail("LastVersion", p->grvCacheSpace.cachedReadVersion)
.detail("LastTime", p->grvCacheSpace.lastGrvTime); // .detail("LastTime", p->grvCacheSpace.lastGrvTime);
p->grvCacheSpace.cachedReadVersion = v; p->grvCacheSpace.cachedReadVersion = v;
if (t > p->grvCacheSpace.lastGrvTime) { if (t > p->grvCacheSpace.lastGrvTime) {
p->grvCacheSpace.lastGrvTime = t; p->grvCacheSpace.lastGrvTime = t;
@ -282,11 +282,11 @@ void DatabaseContext::updateCachedReadVersion(double t, Version v) {
return updateCachedReadVersionShared(t, v, sharedStatePtr); return updateCachedReadVersionShared(t, v, sharedStatePtr);
} }
if (v >= cachedReadVersion) { if (v >= cachedReadVersion) {
TraceEvent(SevDebug, "CachedReadVersionUpdate") //TraceEvent(SevDebug, "CachedReadVersionUpdate")
.detail("Version", v) // .detail("Version", v)
.detail("GrvStartTime", t) // .detail("GrvStartTime", t)
.detail("LastVersion", cachedReadVersion) // .detail("LastVersion", cachedReadVersion)
.detail("LastTime", lastGrvTime); // .detail("LastTime", lastGrvTime);
cachedReadVersion = v; cachedReadVersion = v;
// Since the time is based on the start of the request, it's possible that we // Since the time is based on the start of the request, it's possible that we
// get a newer version with an older time. // get a newer version with an older time.
@ -5100,10 +5100,10 @@ Future<Optional<Value>> Transaction::get(const Key& key, Snapshot snapshot) {
++trState->cx->transactionGetValueRequests; ++trState->cx->transactionGetValueRequests;
// ASSERT (key < allKeys.end); // ASSERT (key < allKeys.end);
// There are no keys in the database with size greater than KEY_SIZE_LIMIT // There are no keys in the database with size greater than the max key size
if (key.size() > if (key.size() > getMaxReadKeySize(key)) {
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
return Optional<Value>(); return Optional<Value>();
}
auto ver = getReadVersion(); auto ver = getReadVersion();
@ -5484,23 +5484,19 @@ Future<Void> Transaction::getRangeStream(const PromiseStream<RangeResult>& resul
void Transaction::addReadConflictRange(KeyRangeRef const& keys) { void Transaction::addReadConflictRange(KeyRangeRef const& keys) {
ASSERT(!keys.empty()); ASSERT(!keys.empty());
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys // There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys // we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin; KeyRef begin = keys.begin;
KeyRef end = keys.end; KeyRef end = keys.end;
if (begin.size() > int64_t beginMaxSize = getMaxReadKeySize(begin);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) int64_t endMaxSize = getMaxReadKeySize(end);
begin = begin.substr( if (begin.size() > beginMaxSize) {
0, begin = begin.substr(0, beginMaxSize + 1);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) + }
1); if (end.size() > endMaxSize) {
if (end.size() > end = end.substr(0, endMaxSize + 1);
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) }
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
KeyRangeRef r = KeyRangeRef(begin, end); KeyRangeRef r = KeyRangeRef(begin, end);
@ -5522,8 +5518,7 @@ void Transaction::makeSelfConflicting() {
void Transaction::set(const KeyRef& key, const ValueRef& value, AddConflictRange addConflictRange) { void Transaction::set(const KeyRef& key, const ValueRef& value, AddConflictRange addConflictRange) {
++trState->cx->transactionSetMutations; ++trState->cx->transactionSetMutations;
if (key.size() > if (key.size() > getMaxWriteKeySize(key, trState->options.rawAccess))
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
throw key_too_large(); throw key_too_large();
if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT) if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large(); throw value_too_large();
@ -5544,8 +5539,7 @@ void Transaction::atomicOp(const KeyRef& key,
MutationRef::Type operationType, MutationRef::Type operationType,
AddConflictRange addConflictRange) { AddConflictRange addConflictRange) {
++trState->cx->transactionAtomicMutations; ++trState->cx->transactionAtomicMutations;
if (key.size() > if (key.size() > getMaxWriteKeySize(key, trState->options.rawAccess))
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
throw key_too_large(); throw key_too_large();
if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT) if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large(); throw value_too_large();
@ -5578,20 +5572,16 @@ void Transaction::clear(const KeyRangeRef& range, AddConflictRange addConflictRa
KeyRef begin = range.begin; KeyRef begin = range.begin;
KeyRef end = range.end; KeyRef end = range.end;
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys // There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys // we can translate it to an equivalent one with smaller keys
if (begin.size() > int64_t beginMaxSize = getMaxClearKeySize(begin);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) int64_t endMaxSize = getMaxClearKeySize(end);
begin = begin.substr( if (begin.size() > beginMaxSize) {
0, begin = begin.substr(0, beginMaxSize + 1);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) + }
1); if (end.size() > endMaxSize) {
if (end.size() > end = end.substr(0, endMaxSize + 1);
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) }
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
auto r = KeyRangeRef(req.arena, KeyRangeRef(begin, end)); auto r = KeyRangeRef(req.arena, KeyRangeRef(begin, end));
if (r.empty()) if (r.empty())
@ -5604,10 +5594,10 @@ void Transaction::clear(const KeyRangeRef& range, AddConflictRange addConflictRa
} }
void Transaction::clear(const KeyRef& key, AddConflictRange addConflictRange) { void Transaction::clear(const KeyRef& key, AddConflictRange addConflictRange) {
++trState->cx->transactionClearMutations; ++trState->cx->transactionClearMutations;
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT // There aren't any keys in the database with size larger than the max key size
if (key.size() > if (key.size() > getMaxClearKeySize(key)) {
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
return; return;
}
auto& req = tr; auto& req = tr;
auto& t = req.transaction; auto& t = req.transaction;
@ -5626,24 +5616,19 @@ void Transaction::addWriteConflictRange(const KeyRangeRef& keys) {
auto& req = tr; auto& req = tr;
auto& t = req.transaction; auto& t = req.transaction;
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys // There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys // we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin; KeyRef begin = keys.begin;
KeyRef end = keys.end; KeyRef end = keys.end;
if (begin.size() > int64_t beginMaxSize = getMaxKeySize(begin);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) int64_t endMaxSize = getMaxKeySize(end);
begin = begin.substr( if (begin.size() > beginMaxSize) {
0, begin = begin.substr(0, beginMaxSize + 1);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) + }
1); if (end.size() > endMaxSize) {
if (end.size() > end = end.substr(0, endMaxSize + 1);
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) }
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
KeyRangeRef r = KeyRangeRef(begin, end); KeyRangeRef r = KeyRangeRef(begin, end);
if (r.empty()) { if (r.empty()) {
@ -6942,11 +6927,18 @@ Future<Standalone<StringRef>> Transaction::getVersionstamp() {
} }
// Gets the protocol version reported by a coordinator via the protocol info interface // Gets the protocol version reported by a coordinator via the protocol info interface
ACTOR Future<ProtocolVersion> getCoordinatorProtocol(NetworkAddressList coordinatorAddresses) { ACTOR Future<ProtocolVersion> getCoordinatorProtocol(
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint::wellKnown({ coordinatorAddresses }, Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator) {
WLTOKEN_PROTOCOL_INFO) }; state ProtocolInfoReply reply;
ProtocolInfoReply reply = wait(retryBrokenPromise(requestStream, ProtocolInfoRequest{})); if (coordinator->get().get().hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(
ProtocolInfoRequest{}, coordinator->get().get().hostname.get(), WLTOKEN_PROTOCOL_INFO)));
} else {
RequestStream<ProtocolInfoRequest> requestStream(
Endpoint::wellKnown({ coordinator->get().get().getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO));
wait(store(reply, retryBrokenPromise(requestStream, ProtocolInfoRequest{})));
}
return reply.version; return reply.version;
} }
@ -6955,8 +6947,16 @@ ACTOR Future<ProtocolVersion> getCoordinatorProtocol(NetworkAddressList coordina
// function will return with an unset result. // function will return with an unset result.
// If an expected version is given, this future won't return if the actual protocol version matches the expected version // If an expected version is given, this future won't return if the actual protocol version matches the expected version
ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket( ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket(
NetworkAddress coordinatorAddress, Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator,
Optional<ProtocolVersion> expectedVersion) { Optional<ProtocolVersion> expectedVersion) {
state NetworkAddress coordinatorAddress;
if (coordinator->get().get().hostname.present()) {
Hostname h = coordinator->get().get().hostname.get();
wait(store(coordinatorAddress, h.resolveWithRetry()));
} else {
coordinatorAddress = coordinator->get().get().getLeader.getEndpoint().getPrimaryAddress();
}
state Reference<AsyncVar<Optional<ProtocolVersion>> const> protocolVersion = state Reference<AsyncVar<Optional<ProtocolVersion>> const> protocolVersion =
FlowTransport::transport().getPeerProtocolAsyncVar(coordinatorAddress); FlowTransport::transport().getPeerProtocolAsyncVar(coordinatorAddress);
@ -6991,11 +6991,10 @@ ACTOR Future<ProtocolVersion> getClusterProtocolImpl(
if (!coordinator->get().present()) { if (!coordinator->get().present()) {
wait(coordinator->onChange()); wait(coordinator->onChange());
} else { } else {
Endpoint coordinatorEndpoint = coordinator->get().get().getLeader.getEndpoint();
if (needToConnect) { if (needToConnect) {
// Even though we typically rely on the connect packet to get the protocol version, we need to send some // Even though we typically rely on the connect packet to get the protocol version, we need to send some
// request in order to start a connection. This protocol version request serves that purpose. // request in order to start a connection. This protocol version request serves that purpose.
protocolVersion = getCoordinatorProtocol(coordinatorEndpoint.addresses); protocolVersion = getCoordinatorProtocol(coordinator);
needToConnect = false; needToConnect = false;
} }
choose { choose {
@ -7011,8 +7010,8 @@ ACTOR Future<ProtocolVersion> getClusterProtocolImpl(
// Older versions of FDB don't have an endpoint to return the protocol version, so we get this info from // Older versions of FDB don't have an endpoint to return the protocol version, so we get this info from
// the connect packet // the connect packet
when(Optional<ProtocolVersion> pv = wait(getCoordinatorProtocolFromConnectPacket( when(Optional<ProtocolVersion> pv =
coordinatorEndpoint.getPrimaryAddress(), expectedVersion))) { wait(getCoordinatorProtocolFromConnectPacket(coordinator, expectedVersion))) {
if (pv.present()) { if (pv.present()) {
return pv.get(); return pv.get();
} else { } else {
@ -8186,14 +8185,20 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, std::vector<AddressExclusion
throw; throw;
} }
TraceEvent("ExclusionSafetyCheckCoordinators").log(); TraceEvent("ExclusionSafetyCheckCoordinators").log();
wait(cx->getConnectionRecord()->resolveHostnames());
state ClientCoordinators coordinatorList(cx->getConnectionRecord()); state ClientCoordinators coordinatorList(cx->getConnectionRecord());
state std::vector<Future<Optional<LeaderInfo>>> leaderServers; state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
leaderServers.reserve(coordinatorList.clientLeaderServers.size()); leaderServers.reserve(coordinatorList.clientLeaderServers.size());
for (int i = 0; i < coordinatorList.clientLeaderServers.size(); i++) { for (int i = 0; i < coordinatorList.clientLeaderServers.size(); i++) {
leaderServers.push_back(retryBrokenPromise(coordinatorList.clientLeaderServers[i].getLeader, if (coordinatorList.clientLeaderServers[i].hostname.present()) {
GetLeaderRequest(coordinatorList.clusterKey, UID()), leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coordinatorList.clusterKey, UID()),
TaskPriority::CoordinationReply)); coordinatorList.clientLeaderServers[i].hostname.get(),
WLTOKEN_CLIENTLEADERREG_GETLEADER,
TaskPriority::CoordinationReply));
} else {
leaderServers.push_back(retryBrokenPromise(coordinatorList.clientLeaderServers[i].getLeader,
GetLeaderRequest(coordinatorList.clusterKey, UID()),
TaskPriority::CoordinationReply));
}
} }
// Wait for quorum so we don't dismiss live coordinators as unreachable by acting too fast // Wait for quorum so we don't dismiss live coordinators as unreachable by acting too fast
choose { choose {
@ -9395,3 +9400,21 @@ ACTOR Future<Void> waitPurgeGranulesCompleteActor(Reference<DatabaseContext> db,
Future<Void> DatabaseContext::waitPurgeGranulesComplete(Key purgeKey) { Future<Void> DatabaseContext::waitPurgeGranulesComplete(Key purgeKey) {
return waitPurgeGranulesCompleteActor(Reference<DatabaseContext>::addRef(this), purgeKey); return waitPurgeGranulesCompleteActor(Reference<DatabaseContext>::addRef(this), purgeKey);
} }
int64_t getMaxKeySize(KeyRef const& key) {
return getMaxWriteKeySize(key, true);
}
int64_t getMaxReadKeySize(KeyRef const& key) {
return getMaxKeySize(key);
}
int64_t getMaxWriteKeySize(KeyRef const& key, bool hasRawAccess) {
int64_t tenantSize = hasRawAccess ? CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT : 0;
return key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT + tenantSize;
}
int64_t getMaxClearKeySize(KeyRef const& key) {
return getMaxKeySize(key);
}

View File

@ -539,5 +539,19 @@ ACTOR Future<std::vector<std::pair<UID, StorageWiggleValue>>> readStorageWiggleV
bool primary, bool primary,
bool use_system_priority); bool use_system_priority);
// Returns the maximum legal size of a key. This size will be determined by the prefix of the passed in key
// (system keys have a larger maximum size). This should be used for generic max key size requests.
int64_t getMaxKeySize(KeyRef const& key);
// Returns the maximum legal size of a key that can be read. Keys larger than this will be assumed not to exist.
int64_t getMaxReadKeySize(KeyRef const& key);
// Returns the maximum legal size of a key that can be written. If using raw access, writes to normal keys will
// be allowed to be slighly larger to accommodate the prefix.
int64_t getMaxWriteKeySize(KeyRef const& key, bool hasRawAccess);
// Returns the maximum legal size of a key that can be cleared. Keys larger than this will be assumed not to exist.
int64_t getMaxClearKeySize(KeyRef const& key);
#include "flow/unactorcompiler.h" #include "flow/unactorcompiler.h"
#endif #endif

View File

@ -59,8 +59,14 @@ class CommitQuorum {
ConfigGeneration generation, ConfigGeneration generation,
ConfigTransactionInterface cti) { ConfigTransactionInterface cti) {
try { try {
wait(timeoutError(cti.commit.getReply(self->getCommitRequest(generation)), if (cti.hostname.present()) {
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT)); wait(timeoutError(retryGetReplyFromHostname(
self->getCommitRequest(generation), cti.hostname.get(), WLTOKEN_CONFIGTXN_COMMIT),
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
} else {
wait(timeoutError(cti.commit.getReply(self->getCommitRequest(generation)),
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
}
++self->successful; ++self->successful;
} catch (Error& e) { } catch (Error& e) {
// self might be destroyed if this actor is cancelled // self might be destroyed if this actor is cancelled
@ -122,9 +128,20 @@ class GetGenerationQuorum {
ACTOR static Future<Void> addRequestActor(GetGenerationQuorum* self, ConfigTransactionInterface cti) { ACTOR static Future<Void> addRequestActor(GetGenerationQuorum* self, ConfigTransactionInterface cti) {
loop { loop {
try { try {
ConfigTransactionGetGenerationReply reply = wait(timeoutError( state ConfigTransactionGetGenerationReply reply;
cti.getGeneration.getReply(ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion }), if (cti.hostname.present()) {
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT)); wait(timeoutError(store(reply,
retryGetReplyFromHostname(
ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion },
cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETGENERATION)),
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
} else {
wait(timeoutError(store(reply,
cti.getGeneration.getReply(
ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion })),
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
}
++self->totalRepliesReceived; ++self->totalRepliesReceived;
auto gen = reply.generation; auto gen = reply.generation;
@ -225,9 +242,18 @@ class PaxosConfigTransactionImpl {
state ConfigKey configKey = ConfigKey::decodeKey(key); state ConfigKey configKey = ConfigKey::decodeKey(key);
loop { loop {
try { try {
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration()); state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state Reference<ConfigTransactionInfo> configNodes( state std::vector<ConfigTransactionInterface> readReplicas =
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas())); self->getGenerationQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigTransactionInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&readReplica.get, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GET));
}
}
wait(waitForAll(fs));
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
ConfigTransactionGetReply reply = ConfigTransactionGetReply reply =
wait(timeoutError(basicLoadBalance(configNodes, wait(timeoutError(basicLoadBalance(configNodes,
&ConfigTransactionInterface::get, &ConfigTransactionInterface::get,
@ -248,9 +274,17 @@ class PaxosConfigTransactionImpl {
} }
ACTOR static Future<RangeResult> getConfigClasses(PaxosConfigTransactionImpl* self) { ACTOR static Future<RangeResult> getConfigClasses(PaxosConfigTransactionImpl* self) {
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration()); state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state Reference<ConfigTransactionInfo> configNodes( state std::vector<ConfigTransactionInterface> readReplicas = self->getGenerationQuorum.getReadReplicas();
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas())); std::vector<Future<Void>> fs;
for (ConfigTransactionInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&readReplica.getClasses, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETCLASSES));
}
}
wait(waitForAll(fs));
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
ConfigTransactionGetConfigClassesReply reply = ConfigTransactionGetConfigClassesReply reply =
wait(basicLoadBalance(configNodes, wait(basicLoadBalance(configNodes,
&ConfigTransactionInterface::getClasses, &ConfigTransactionInterface::getClasses,
@ -264,9 +298,17 @@ class PaxosConfigTransactionImpl {
} }
ACTOR static Future<RangeResult> getKnobs(PaxosConfigTransactionImpl* self, Optional<Key> configClass) { ACTOR static Future<RangeResult> getKnobs(PaxosConfigTransactionImpl* self, Optional<Key> configClass) {
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration()); state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
state Reference<ConfigTransactionInfo> configNodes( state std::vector<ConfigTransactionInterface> readReplicas = self->getGenerationQuorum.getReadReplicas();
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas())); std::vector<Future<Void>> fs;
for (ConfigTransactionInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&readReplica.getKnobs, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETKNOBS));
}
}
wait(waitForAll(fs));
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
ConfigTransactionGetKnobsReply reply = ConfigTransactionGetKnobsReply reply =
wait(basicLoadBalance(configNodes, wait(basicLoadBalance(configNodes,
&ConfigTransactionInterface::getKnobs, &ConfigTransactionInterface::getKnobs,
@ -366,10 +408,13 @@ public:
Future<Void> commit() { return commit(this); } Future<Void> commit() { return commit(this); }
PaxosConfigTransactionImpl(Database const& cx) : cx(cx) { PaxosConfigTransactionImpl(Database const& cx) : cx(cx) {
auto coordinators = cx->getConnectionRecord()->getConnectionString().coordinators(); const ClusterConnectionString& cs = cx->getConnectionRecord()->getConnectionString();
ctis.reserve(coordinators.size()); ctis.reserve(cs.hostnames.size() + cs.coordinators().size());
for (const auto& coordinator : coordinators) { for (const auto& h : cs.hostnames) {
ctis.emplace_back(coordinator); ctis.emplace_back(h);
}
for (const auto& c : cs.coordinators()) {
ctis.emplace_back(c);
} }
getGenerationQuorum = GetGenerationQuorum{ ctis }; getGenerationQuorum = GetGenerationQuorum{ ctis };
commitQuorum = CommitQuorum{ ctis }; commitQuorum = CommitQuorum{ ctis };

View File

@ -19,6 +19,7 @@
*/ */
#include "fdbclient/ReadYourWrites.h" #include "fdbclient/ReadYourWrites.h"
#include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/Atomic.h" #include "fdbclient/Atomic.h"
#include "fdbclient/DatabaseContext.h" #include "fdbclient/DatabaseContext.h"
#include "fdbclient/SpecialKeySpace.actor.h" #include "fdbclient/SpecialKeySpace.actor.h"
@ -1578,10 +1579,10 @@ Future<Optional<Value>> ReadYourWritesTransaction::get(const Key& key, Snapshot
if (key >= getMaxReadKey() && key != metadataVersionKey) if (key >= getMaxReadKey() && key != metadataVersionKey)
return key_outside_legal_range(); return key_outside_legal_range();
// There are no keys in the database with size greater than KEY_SIZE_LIMIT // There are no keys in the database with size greater than the max key size
if (key.size() > if (key.size() > getMaxReadKeySize(key)) {
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
return Optional<Value>(); return Optional<Value>();
}
Future<Optional<Value>> result = RYWImpl::readWithConflictRange(this, RYWImpl::GetValueReq(key), snapshot); Future<Optional<Value>> result = RYWImpl::readWithConflictRange(this, RYWImpl::GetValueReq(key), snapshot);
reading.add(success(result)); reading.add(success(result));
@ -1822,23 +1823,19 @@ void ReadYourWritesTransaction::addReadConflictRange(KeyRangeRef const& keys) {
} }
} }
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys // There aren't any keys in the database with size larger than max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys // we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin; KeyRef begin = keys.begin;
KeyRef end = keys.end; KeyRef end = keys.end;
if (begin.size() > int64_t beginMaxSize = getMaxReadKeySize(begin);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) int64_t endMaxSize = getMaxReadKeySize(end);
begin = begin.substr( if (begin.size() > beginMaxSize) {
0, begin = begin.substr(0, beginMaxSize + 1);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) + }
1); if (end.size() > endMaxSize) {
if (end.size() > end = end.substr(0, endMaxSize + 1);
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) }
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
KeyRangeRef r = KeyRangeRef(begin, end); KeyRangeRef r = KeyRangeRef(begin, end);
@ -2111,9 +2108,9 @@ void ReadYourWritesTransaction::atomicOp(const KeyRef& key, const ValueRef& oper
if (!isValidMutationType(operationType) || !isAtomicOp((MutationRef::Type)operationType)) if (!isValidMutationType(operationType) || !isAtomicOp((MutationRef::Type)operationType))
throw invalid_mutation_type(); throw invalid_mutation_type();
if (key.size() > if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
throw key_too_large(); throw key_too_large();
}
if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT) if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large(); throw value_too_large();
@ -2218,9 +2215,9 @@ void ReadYourWritesTransaction::set(const KeyRef& key, const ValueRef& value) {
} }
// TODO: check transaction size here // TODO: check transaction size here
if (key.size() > if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
throw key_too_large(); throw key_too_large();
}
if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT) if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
throw value_too_large(); throw value_too_large();
@ -2254,23 +2251,19 @@ void ReadYourWritesTransaction::clear(const KeyRangeRef& range) {
return tr.clear(range, addWriteConflict); return tr.clear(range, addWriteConflict);
} }
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys // There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys // we can translate it to an equivalent one with smaller keys
KeyRef begin = range.begin; KeyRef begin = range.begin;
KeyRef end = range.end; KeyRef end = range.end;
if (begin.size() > int64_t beginMaxSize = getMaxClearKeySize(begin);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) int64_t endMaxSize = getMaxClearKeySize(end);
begin = begin.substr( if (begin.size() > beginMaxSize) {
0, begin = begin.substr(0, beginMaxSize + 1);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) + }
1); if (end.size() > endMaxSize) {
if (end.size() > end = end.substr(0, endMaxSize + 1);
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) }
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
KeyRangeRef r = KeyRangeRef(begin, end); KeyRangeRef r = KeyRangeRef(begin, end);
@ -2300,9 +2293,9 @@ void ReadYourWritesTransaction::clear(const KeyRef& key) {
if (key >= getMaxWriteKey()) if (key >= getMaxWriteKey())
throw key_outside_legal_range(); throw key_outside_legal_range();
if (key.size() > if (key.size() > getMaxClearKeySize(key)) {
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
return; return;
}
if (options.readYourWritesDisabled) { if (options.readYourWritesDisabled) {
return tr.clear(key, addWriteConflict); return tr.clear(key, addWriteConflict);
@ -2332,9 +2325,9 @@ Future<Void> ReadYourWritesTransaction::watch(const Key& key) {
if (key >= allKeys.end || (key >= getMaxReadKey() && key != metadataVersionKey && tr.apiVersionAtLeast(300))) if (key >= allKeys.end || (key >= getMaxReadKey() && key != metadataVersionKey && tr.apiVersionAtLeast(300)))
return key_outside_legal_range(); return key_outside_legal_range();
if (key.size() > if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
return key_too_large(); return key_too_large();
}
return RYWImpl::watch(this, key); return RYWImpl::watch(this, key);
} }
@ -2350,23 +2343,19 @@ void ReadYourWritesTransaction::addWriteConflictRange(KeyRangeRef const& keys) {
} }
} }
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys // There aren't any keys in the database with size larger than the max key size, so if range contains large keys
// we can translate it to an equivalent one with smaller keys // we can translate it to an equivalent one with smaller keys
KeyRef begin = keys.begin; KeyRef begin = keys.begin;
KeyRef end = keys.end; KeyRef end = keys.end;
if (begin.size() > int64_t beginMaxSize = getMaxKeySize(begin);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) int64_t endMaxSize = getMaxKeySize(end);
begin = begin.substr( if (begin.size() > beginMaxSize) {
0, begin = begin.substr(0, beginMaxSize + 1);
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) + }
1); if (end.size() > endMaxSize) {
if (end.size() > end = end.substr(0, endMaxSize + 1);
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT)) }
end = end.substr(
0,
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1);
KeyRangeRef r = KeyRangeRef(begin, end); KeyRangeRef r = KeyRangeRef(begin, end);

View File

@ -25,9 +25,15 @@
#include "fdbclient/sha1/SHA1.h" #include "fdbclient/sha1/SHA1.h"
#include <time.h> #include <time.h>
#include <iomanip> #include <iomanip>
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/sha.h> #include <openssl/sha.h>
#include <openssl/evp.h> #include <openssl/evp.h>
#include <openssl/hmac.h> #include <openssl/hmac.h>
#if defined(HAVE_WOLFSSL)
#undef SHA1 // wolfSSL will will shadow FDB SHA1.h
#endif
#include <boost/algorithm/string/split.hpp> #include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp> #include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>

View File

@ -26,7 +26,7 @@
#include "flow/Net2Packet.h" #include "flow/Net2Packet.h"
#include "fdbclient/Knobs.h" #include "fdbclient/Knobs.h"
#include "fdbrpc/IRateControl.h" #include "fdbrpc/IRateControl.h"
#include "fdbclient/HTTP.h" #include "fdbrpc/HTTP.h"
#include "fdbclient/JSONDoc.h" #include "fdbclient/JSONDoc.h"
// Representation of all the things you need to connect to a blob store instance with some credentials. // Representation of all the things you need to connect to a blob store instance with some credentials.

View File

@ -450,6 +450,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1; init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1;
init( MAX_PROXY_COMPUTE, 2.0 ); init( MAX_PROXY_COMPUTE, 2.0 );
init( MAX_COMPUTE_PER_OPERATION, 0.1 ); init( MAX_COMPUTE_PER_OPERATION, 0.1 );
init( MAX_COMPUTE_DURATION_LOG_CUTOFF, 0.05 );
init( PROXY_COMPUTE_BUCKETS, 20000 ); init( PROXY_COMPUTE_BUCKETS, 20000 );
init( PROXY_COMPUTE_GROWTH_RATE, 0.01 ); init( PROXY_COMPUTE_GROWTH_RATE, 0.01 );
init( TXN_STATE_SEND_AMOUNT, 4 ); init( TXN_STATE_SEND_AMOUNT, 4 );
@ -541,6 +542,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( CC_ENABLE_ENTIRE_SATELLITE_MONITORING, false ); init( CC_ENABLE_ENTIRE_SATELLITE_MONITORING, false );
init( CC_SATELLITE_DEGRADATION_MIN_COMPLAINER, 3 ); init( CC_SATELLITE_DEGRADATION_MIN_COMPLAINER, 3 );
init( CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER, 3 ); init( CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER, 3 );
init( CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL, 0.5 );
init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0; init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0;
init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit ); init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit );

View File

@ -374,6 +374,7 @@ public:
int MAX_COMMIT_UPDATES; int MAX_COMMIT_UPDATES;
double MAX_PROXY_COMPUTE; double MAX_PROXY_COMPUTE;
double MAX_COMPUTE_PER_OPERATION; double MAX_COMPUTE_PER_OPERATION;
double MAX_COMPUTE_DURATION_LOG_CUTOFF;
int PROXY_COMPUTE_BUCKETS; int PROXY_COMPUTE_BUCKETS;
double PROXY_COMPUTE_GROWTH_RATE; double PROXY_COMPUTE_GROWTH_RATE;
int TXN_STATE_SEND_AMOUNT; int TXN_STATE_SEND_AMOUNT;
@ -480,6 +481,8 @@ public:
// be determined as degraded worker. // be determined as degraded worker.
int CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER; // The minimum amount of degraded server in satellite DC to be int CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER; // The minimum amount of degraded server in satellite DC to be
// determined as degraded satellite. // determined as degraded satellite.
double CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL; // The interval to prevent re-recruiting the same singleton if a
// recruiting fight between two cluster controllers occurs.
// Knobs used to select the best policy (via monte carlo) // Knobs used to select the best policy (via monte carlo)
int POLICY_RATING_TESTS; // number of tests per policy (in order to compare) int POLICY_RATING_TESTS; // number of tests per policy (in order to compare)

View File

@ -41,9 +41,15 @@ class SimpleConfigTransactionImpl {
if (self->dID.present()) { if (self->dID.present()) {
TraceEvent("SimpleConfigTransactionGettingReadVersion", self->dID.get()); TraceEvent("SimpleConfigTransactionGettingReadVersion", self->dID.get());
} }
ConfigTransactionGetGenerationRequest req; state ConfigTransactionGetGenerationReply reply;
ConfigTransactionGetGenerationReply reply = if (self->cti.hostname.present()) {
wait(retryBrokenPromise(self->cti.getGeneration, ConfigTransactionGetGenerationRequest{})); wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetGenerationRequest{},
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETGENERATION)));
} else {
wait(store(reply, retryBrokenPromise(self->cti.getGeneration, ConfigTransactionGetGenerationRequest{})));
}
if (self->dID.present()) { if (self->dID.present()) {
TraceEvent("SimpleConfigTransactionGotReadVersion", self->dID.get()) TraceEvent("SimpleConfigTransactionGotReadVersion", self->dID.get())
.detail("Version", reply.generation.liveVersion); .detail("Version", reply.generation.liveVersion);
@ -62,8 +68,15 @@ class SimpleConfigTransactionImpl {
.detail("ConfigClass", configKey.configClass) .detail("ConfigClass", configKey.configClass)
.detail("KnobName", configKey.knobName); .detail("KnobName", configKey.knobName);
} }
ConfigTransactionGetReply reply = state ConfigTransactionGetReply reply;
wait(retryBrokenPromise(self->cti.get, ConfigTransactionGetRequest{ generation, configKey })); if (self->cti.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetRequest{ generation, configKey },
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GET)));
} else {
wait(store(reply, retryBrokenPromise(self->cti.get, ConfigTransactionGetRequest{ generation, configKey })));
}
if (self->dID.present()) { if (self->dID.present()) {
TraceEvent("SimpleConfigTransactionGotValue", self->dID.get()) TraceEvent("SimpleConfigTransactionGotValue", self->dID.get())
.detail("Value", reply.value.get().toString()); .detail("Value", reply.value.get().toString());
@ -80,8 +93,17 @@ class SimpleConfigTransactionImpl {
self->getGenerationFuture = getGeneration(self); self->getGenerationFuture = getGeneration(self);
} }
ConfigGeneration generation = wait(self->getGenerationFuture); ConfigGeneration generation = wait(self->getGenerationFuture);
ConfigTransactionGetConfigClassesReply reply = state ConfigTransactionGetConfigClassesReply reply;
wait(retryBrokenPromise(self->cti.getClasses, ConfigTransactionGetConfigClassesRequest{ generation })); if (self->cti.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetConfigClassesRequest{ generation },
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETCLASSES)));
} else {
wait(store(
reply,
retryBrokenPromise(self->cti.getClasses, ConfigTransactionGetConfigClassesRequest{ generation })));
}
RangeResult result; RangeResult result;
for (const auto& configClass : reply.configClasses) { for (const auto& configClass : reply.configClasses) {
result.push_back_deep(result.arena(), KeyValueRef(configClass, ""_sr)); result.push_back_deep(result.arena(), KeyValueRef(configClass, ""_sr));
@ -94,8 +116,17 @@ class SimpleConfigTransactionImpl {
self->getGenerationFuture = getGeneration(self); self->getGenerationFuture = getGeneration(self);
} }
ConfigGeneration generation = wait(self->getGenerationFuture); ConfigGeneration generation = wait(self->getGenerationFuture);
ConfigTransactionGetKnobsReply reply = state ConfigTransactionGetKnobsReply reply;
wait(retryBrokenPromise(self->cti.getKnobs, ConfigTransactionGetKnobsRequest{ generation, configClass })); if (self->cti.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigTransactionGetKnobsRequest{ generation, configClass },
self->cti.hostname.get(),
WLTOKEN_CONFIGTXN_GETKNOBS)));
} else {
wait(store(
reply,
retryBrokenPromise(self->cti.getKnobs, ConfigTransactionGetKnobsRequest{ generation, configClass })));
}
RangeResult result; RangeResult result;
for (const auto& knobName : reply.knobNames) { for (const auto& knobName : reply.knobNames) {
result.push_back_deep(result.arena(), KeyValueRef(knobName, ""_sr)); result.push_back_deep(result.arena(), KeyValueRef(knobName, ""_sr));
@ -109,7 +140,11 @@ class SimpleConfigTransactionImpl {
} }
wait(store(self->toCommit.generation, self->getGenerationFuture)); wait(store(self->toCommit.generation, self->getGenerationFuture));
self->toCommit.annotation.timestamp = now(); self->toCommit.annotation.timestamp = now();
wait(retryBrokenPromise(self->cti.commit, self->toCommit)); if (self->cti.hostname.present()) {
wait(retryGetReplyFromHostname(self->toCommit, self->cti.hostname.get(), WLTOKEN_CONFIGTXN_COMMIT));
} else {
wait(retryBrokenPromise(self->cti.commit, self->toCommit));
}
self->committed = true; self->committed = true;
return Void(); return Void();
} }
@ -126,9 +161,14 @@ class SimpleConfigTransactionImpl {
public: public:
SimpleConfigTransactionImpl(Database const& cx) : cx(cx) { SimpleConfigTransactionImpl(Database const& cx) : cx(cx) {
auto coordinators = cx->getConnectionRecord()->getConnectionString().coordinators(); const ClusterConnectionString& cs = cx->getConnectionRecord()->getConnectionString();
std::sort(coordinators.begin(), coordinators.end()); if (cs.coordinators().size()) {
cti = ConfigTransactionInterface(coordinators[0]); std::vector<NetworkAddress> coordinators = cs.coordinators();
std::sort(coordinators.begin(), coordinators.end());
cti = ConfigTransactionInterface(coordinators[0]);
} else {
cti = ConfigTransactionInterface(cs.hostnames[0]);
}
} }
SimpleConfigTransactionImpl(ConfigTransactionInterface const& cti) : cti(cti) {} SimpleConfigTransactionImpl(ConfigTransactionInterface const& cti) : cti(cti) {}

View File

@ -1644,13 +1644,10 @@ void TracingOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key
CoordinatorsImpl::CoordinatorsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {} CoordinatorsImpl::CoordinatorsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw, ACTOR Future<RangeResult> coordinatorsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRef prefix, KeyRangeRef kr) {
KeyRangeRef kr, state ClusterConnectionString cs = ryw->getDatabase()->getConnectionRecord()->getConnectionString();
GetRangeLimits limitsHint) const { state std::vector<NetworkAddress> coordinator_processes = wait(cs.tryResolveHostnames());
RangeResult result; RangeResult result;
KeyRef prefix(getKeyRange().begin);
auto cs = ryw->getDatabase()->getConnectionRecord()->getConnectionString();
auto coordinator_processes = cs.coordinators();
Key cluster_decription_key = prefix.withSuffix(LiteralStringRef("cluster_description")); Key cluster_decription_key = prefix.withSuffix(LiteralStringRef("cluster_description"));
if (kr.contains(cluster_decription_key)) { if (kr.contains(cluster_decription_key)) {
result.push_back_deep(result.arena(), KeyValueRef(cluster_decription_key, cs.clusterKeyName())); result.push_back_deep(result.arena(), KeyValueRef(cluster_decription_key, cs.clusterKeyName()));
@ -1673,10 +1670,16 @@ Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
return rywGetRange(ryw, kr, result); return rywGetRange(ryw, kr, result);
} }
Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
KeyRef prefix(getKeyRange().begin);
return coordinatorsGetRangeActor(ryw, prefix, kr);
}
ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) { ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
state Reference<IQuorumChange> change; state Reference<IQuorumChange> change;
state ClusterConnectionString state ClusterConnectionString conn; // We don't care about the Key here.
conn; // We don't care about the Key here, it will be overrode in changeQuorumChecker().
state std::vector<std::string> process_address_or_hostname_strs; state std::vector<std::string> process_address_or_hostname_strs;
state Optional<std::string> msg; state Optional<std::string> msg;
state int index; state int index;
@ -1700,7 +1703,6 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
try { try {
if (Hostname::isHostname(process_address_or_hostname_strs[index])) { if (Hostname::isHostname(process_address_or_hostname_strs[index])) {
conn.hostnames.push_back(Hostname::parse(process_address_or_hostname_strs[index])); conn.hostnames.push_back(Hostname::parse(process_address_or_hostname_strs[index]));
conn.status = ClusterConnectionString::ConnectionStringStatus::UNRESOLVED;
} else { } else {
NetworkAddress a = NetworkAddress::parse(process_address_or_hostname_strs[index]); NetworkAddress a = NetworkAddress::parse(process_address_or_hostname_strs[index]);
if (!a.isValid()) { if (!a.isValid()) {
@ -1717,18 +1719,19 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
if (parse_error) { if (parse_error) {
std::string error = "ERROR: \'" + process_address_or_hostname_strs[index] + std::string error = "ERROR: \'" + process_address_or_hostname_strs[index] +
"\' is not a valid network endpoint address\n"; "\' is not a valid network endpoint address\n";
if (process_address_or_hostname_strs[index].find(":tls") != std::string::npos)
error += " Do not include the `:tls' suffix when naming a process\n";
return ManagementAPIError::toJsonString(false, "coordinators", error); return ManagementAPIError::toJsonString(false, "coordinators", error);
} }
} }
} }
wait(conn.resolveHostnames()); std::vector<NetworkAddress> addressesVec = wait(conn.tryResolveHostnames());
if (conn.coordinators().size()) if (addressesVec.size() != conn.hostnames.size() + conn.coordinators().size()) {
change = specifiedQuorumChange(conn.coordinators()); return ManagementAPIError::toJsonString(false, "coordinators", "One or more hostnames are not resolvable.");
else } else if (addressesVec.size()) {
change = specifiedQuorumChange(addressesVec);
} else {
change = noQuorumChange(); change = noQuorumChange();
}
// check update for cluster_description // check update for cluster_description
Key cluster_decription_key = LiteralStringRef("cluster_description").withPrefix(kr.begin); Key cluster_decription_key = LiteralStringRef("cluster_description").withPrefix(kr.begin);
@ -1740,19 +1743,18 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
change = nameQuorumChange(entry.second.get().toString(), change); change = nameQuorumChange(entry.second.get().toString(), change);
} else { } else {
// throw the error // throw the error
return Optional<std::string>(ManagementAPIError::toJsonString( return ManagementAPIError::toJsonString(
false, "coordinators", "Cluster description must match [A-Za-z0-9_]+")); false, "coordinators", "Cluster description must match [A-Za-z0-9_]+");
} }
} }
ASSERT(change.isValid()); ASSERT(change.isValid());
TraceEvent(SevDebug, "SKSChangeCoordinatorsStart") TraceEvent(SevDebug, "SKSChangeCoordinatorsStart")
.detail("NewHostnames", conn.hostnames.size() ? describe(conn.hostnames) : "N/A") .detail("NewAddresses", describe(addressesVec))
.detail("NewAddresses", describe(conn.coordinators()))
.detail("Description", entry.first ? entry.second.get().toString() : ""); .detail("Description", entry.first ? entry.second.get().toString() : "");
Optional<CoordinatorsResult> r = wait(changeQuorumChecker(&ryw->getTransaction(), change, &conn)); Optional<CoordinatorsResult> r = wait(changeQuorumChecker(&ryw->getTransaction(), change, addressesVec));
TraceEvent(SevDebug, "SKSChangeCoordinatorsFinish") TraceEvent(SevDebug, "SKSChangeCoordinatorsFinish")
.detail("Result", r.present() ? static_cast<int>(r.get()) : -1); // -1 means success .detail("Result", r.present() ? static_cast<int>(r.get()) : -1); // -1 means success
@ -1804,9 +1806,10 @@ ACTOR static Future<RangeResult> CoordinatorsAutoImplActor(ReadYourWritesTransac
state ClusterConnectionString old(currentKey.get().toString()); state ClusterConnectionString old(currentKey.get().toString());
state CoordinatorsResult result = CoordinatorsResult::SUCCESS; state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
std::vector<NetworkAddress> _desiredCoordinators = wait(autoQuorumChange()->getDesiredCoordinators( std::vector<NetworkAddress> _desiredCoordinators = wait(autoQuorumChange()->getDesiredCoordinators(
&tr, &tr,
old.coordinators(), oldCoordinators,
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)), Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result)); result));

View File

@ -307,23 +307,35 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<I
bool* quorum_reachable, bool* quorum_reachable,
int* coordinatorsFaultTolerance) { int* coordinatorsFaultTolerance) {
try { try {
wait(connRecord->resolveHostnames());
state ClientCoordinators coord(connRecord); state ClientCoordinators coord(connRecord);
state StatusObject statusObj; state StatusObject statusObj;
state std::vector<Future<Optional<LeaderInfo>>> leaderServers; state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
leaderServers.reserve(coord.clientLeaderServers.size()); leaderServers.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader, if (coord.clientLeaderServers[i].hostname.present()) {
GetLeaderRequest(coord.clusterKey, UID()), leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coord.clusterKey, UID()),
TaskPriority::CoordinationReply)); coord.clientLeaderServers[i].hostname.get(),
WLTOKEN_CLIENTLEADERREG_GETLEADER,
TaskPriority::CoordinationReply));
} else {
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
GetLeaderRequest(coord.clusterKey, UID()),
TaskPriority::CoordinationReply));
}
}
state std::vector<Future<ProtocolInfoReply>> coordProtocols; state std::vector<Future<ProtocolInfoReply>> coordProtocols;
coordProtocols.reserve(coord.clientLeaderServers.size()); coordProtocols.reserve(coord.clientLeaderServers.size());
for (int i = 0; i < coord.clientLeaderServers.size(); i++) { for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint::wellKnown( if (coord.clientLeaderServers[i].hostname.present()) {
{ coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO) }; coordProtocols.push_back(retryGetReplyFromHostname(
coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{})); ProtocolInfoRequest{}, coord.clientLeaderServers[i].hostname.get(), WLTOKEN_PROTOCOL_INFO));
} else {
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint::wellKnown(
{ coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO) };
coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
}
} }
wait(smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) && wait(smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) &&
@ -337,8 +349,12 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<I
int coordinatorsUnavailable = 0; int coordinatorsUnavailable = 0;
for (int i = 0; i < leaderServers.size(); i++) { for (int i = 0; i < leaderServers.size(); i++) {
StatusObject coordStatus; StatusObject coordStatus;
coordStatus["address"] = if (coord.clientLeaderServers[i].hostname.present()) {
coord.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress().toString(); coordStatus["address"] = coord.clientLeaderServers[i].hostname.get().toString();
} else {
coordStatus["address"] =
coord.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress().toString();
}
if (leaderServers[i].isReady()) { if (leaderServers[i].isReady()) {
coordStatus["reachable"] = true; coordStatus["reachable"] = true;

View File

@ -48,6 +48,8 @@ struct TenantMapEntry {
int64_t id; int64_t id;
Key prefix; Key prefix;
constexpr static int ROOT_PREFIX_SIZE = sizeof(id);
private: private:
void initPrefix(KeyRef subspace) { void initPrefix(KeyRef subspace) {
ASSERT(id >= 0); ASSERT(id >= 0);

View File

@ -24,6 +24,9 @@
*/ */
#if defined(HAVE_OPENSSL) && !defined(TLS_DISABLED) #if defined(HAVE_OPENSSL) && !defined(TLS_DISABLED)
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/md5.h> #include <openssl/md5.h>
#elif !defined(_MD5_H) #elif !defined(_MD5_H)
#define _MD5_H #define _MD5_H

View File

@ -15,6 +15,7 @@ set(FDBRPC_SRCS
genericactors.actor.h genericactors.actor.h
genericactors.actor.cpp genericactors.actor.cpp
HealthMonitor.actor.cpp HealthMonitor.actor.cpp
HTTP.actor.cpp
IAsyncFile.actor.cpp IAsyncFile.actor.cpp
IPAllowList.cpp IPAllowList.cpp
LoadBalance.actor.cpp LoadBalance.actor.cpp
@ -28,6 +29,10 @@ set(FDBRPC_SRCS
ReplicationPolicy.cpp ReplicationPolicy.cpp
ReplicationTypes.cpp ReplicationTypes.cpp
ReplicationUtils.cpp ReplicationUtils.cpp
RESTClient.h
RESTClient.actor.cpp
RESTUtils.h
RESTUtils.actor.cpp
SimExternalConnection.actor.cpp SimExternalConnection.actor.cpp
SimExternalConnection.h SimExternalConnection.h
Stats.actor.cpp Stats.actor.cpp

View File

@ -18,10 +18,12 @@
* limitations under the License. * limitations under the License.
*/ */
#include "fdbclient/HTTP.h" #include "fdbrpc/HTTP.h"
#include "fdbclient/md5/md5.h" #include "fdbclient/md5/md5.h"
#include "fdbclient/libb64/encode.h" #include "fdbclient/libb64/encode.h"
#include <cctype> #include <cctype>
#include "flow/actorcompiler.h" // has to be last include #include "flow/actorcompiler.h" // has to be last include
namespace HTTP { namespace HTTP {

View File

@ -18,6 +18,11 @@
* limitations under the License. * limitations under the License.
*/ */
#ifndef FDBRPC_HTTP_H
#define FDBRPC_HTTP_H
#pragma once
#include "flow/flow.h" #include "flow/flow.h"
#include "flow/Net2Packet.h" #include "flow/Net2Packet.h"
#include "fdbrpc/IRateControl.h" #include "fdbrpc/IRateControl.h"
@ -63,4 +68,27 @@ Future<Reference<Response>> doRequest(Reference<IConnection> const& conn,
int64_t* const& pSent, int64_t* const& pSent,
Reference<IRateControl> const& recvRate, Reference<IRateControl> const& recvRate,
const std::string& requestHeader = std::string()); const std::string& requestHeader = std::string());
constexpr int HTTP_STATUS_CODE_OK = 200;
constexpr int HTTP_STATUS_CODE_CREATED = 201;
constexpr int HTTP_STATUS_CODE_ACCEPTED = 202;
constexpr int HTTP_STATUS_CODE_NO_CONTENT = 204;
constexpr int HTTP_STATUS_CODE_UNAUTHORIZED = 401;
constexpr int HTTP_STATUS_CODE_NOT_ACCEPTABLE = 406;
constexpr int HTTP_STATUS_CODE_TOO_MANY_REQUESTS = 429;
constexpr int HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR = 500;
constexpr int HTTP_STATUS_CODE_BAD_GATEWAY = 502;
constexpr int HTTP_STATUS_CODE_SERVICE_UNAVAILABLE = 503;
constexpr int HTTP_RETRYAFTER_DELAY_SECS = 300;
const std::string HTTP_VERB_GET = "GET";
const std::string HTTP_VERB_HEAD = "HEAD";
const std::string HTTP_VERB_DELETE = "DELETE";
const std::string HTTP_VERB_TRACE = "TRACE";
const std::string HTTP_VERB_PUT = "PUT";
const std::string HTTP_VERB_POST = "POST";
} // namespace HTTP } // namespace HTTP
#endif

363
fdbrpc/RESTClient.actor.cpp Normal file
View File

@ -0,0 +1,363 @@
/*
* RESTClient.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbrpc/RESTClient.h"
#include "fdbrpc/HTTP.h"
#include "fdbrpc/IRateControl.h"
#include "fdbrpc/RESTUtils.h"
#include "flow/Arena.h"
#include "flow/Error.h"
#include "flow/FastRef.h"
#include "flow/Knobs.h"
#include "flow/Net2Packet.h"
#include "flow/flow.h"
#include "flow/network.h"
#include "flow/serialize.h"
#include "flow/Trace.h"
#include "flow/UnitTest.h"
#include <memory>
#include <unordered_map>
#include "flow/actorcompiler.h" // always the last include
json_spirit::mObject RESTClient::Stats::getJSON() {
json_spirit::mObject o;
o["host_service"] = host_service;
o["requests_failed"] = requests_failed;
o["requests_successful"] = requests_successful;
o["bytes_sent"] = bytes_sent;
return o;
}
RESTClient::Stats RESTClient::Stats::operator-(const Stats& rhs) {
Stats r(host_service);
r.requests_failed = requests_failed - rhs.requests_failed;
r.requests_successful = requests_successful - rhs.requests_successful;
r.bytes_sent = bytes_sent - rhs.bytes_sent;
return r;
}
RESTClient::RESTClient() {}
RESTClient::RESTClient(std::unordered_map<std::string, int>& knobSettings) {
knobs.set(knobSettings);
}
void RESTClient::setKnobs(const std::unordered_map<std::string, int>& knobSettings) {
knobs.set(knobSettings);
}
std::unordered_map<std::string, int> RESTClient::getKnobs() const {
return knobs.get();
}
ACTOR Future<Reference<HTTP::Response>> doRequest_impl(Reference<RESTClient> client,
std::string verb,
HTTP::Headers headers,
RESTUrl* url,
std::set<unsigned int> successCodes) {
state UnsentPacketQueue content;
state int contentLen = url->body.size();
if (url->body.size() > 0) {
PacketWriter pw(content.getWriteBuffer(url->body.size()), nullptr, Unversioned());
pw.serializeBytes(url->body);
}
std::string statsKey = RESTClient::getStatsKey(url->service, url->service);
auto sItr = client->statsMap.find(statsKey);
if (sItr == client->statsMap.end()) {
client->statsMap.emplace(statsKey, std::make_unique<RESTClient::Stats>(statsKey));
}
headers["Content-Length"] = format("%d", contentLen);
headers["Host"] = url->host;
state int maxTries = std::min(client->knobs.request_tries, client->knobs.connect_tries);
state int thisTry = 1;
state double nextRetryDelay = 2.0;
state Reference<IRateControl> sendReceiveRate = makeReference<Unlimited>();
state double reqTimeout = (client->knobs.request_timeout_secs * 1.0) / 60;
state RESTConnectionPoolKey connectPoolKey = RESTConnectionPool::getConnectionPoolKey(url->host, url->service);
state RESTClient::Stats* statsPtr = client->statsMap[statsKey].get();
loop {
state Optional<Error> err;
state Optional<NetworkAddress> remoteAddress;
state bool connectionEstablished = false;
state Reference<HTTP::Response> r;
try {
// Start connecting
Future<RESTConnectionPool::ReusableConnection> frconn = client->conectionPool->connect(
connectPoolKey, client->knobs.secure_connection, client->knobs.max_connection_life);
// Finish connecting, do request
state RESTConnectionPool::ReusableConnection rconn =
wait(timeoutError(frconn, client->knobs.connect_timeout));
connectionEstablished = true;
remoteAddress = rconn.conn->getPeerAddress();
Reference<HTTP::Response> _r = wait(timeoutError(HTTP::doRequest(rconn.conn,
verb,
url->resource,
headers,
contentLen > 0 ? &content : nullptr,
contentLen,
sendReceiveRate,
&statsPtr->bytes_sent,
sendReceiveRate),
reqTimeout));
r = _r;
// Since the response was parsed successfully (which is why we are here) reuse the connection unless we
// received the "Connection: close" header.
if (r->headers["Connection"] != "close") {
client->conectionPool->returnConnection(connectPoolKey, rconn, client->knobs.connection_pool_size);
}
rconn.conn.clear();
} catch (Error& e) {
if (e.code() == error_code_actor_cancelled) {
throw;
}
err = e;
}
// If err is not present then r is valid.
// If r->code is in successCodes then record the successful request and return r.
if (!err.present() && successCodes.count(r->code) != 0) {
statsPtr->requests_successful++;
return r;
}
// Otherwise, this request is considered failed. Update failure count.
statsPtr->requests_failed++;
// All errors in err are potentially retryable as well as certain HTTP response codes...
bool retryable = err.present() || r->code == HTTP::HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR ||
r->code == HTTP::HTTP_STATUS_CODE_BAD_GATEWAY ||
r->code == HTTP::HTTP_STATUS_CODE_SERVICE_UNAVAILABLE ||
r->code == HTTP::HTTP_STATUS_CODE_TOO_MANY_REQUESTS;
// But only if our previous attempt was not the last allowable try.
retryable = retryable && (thisTry < maxTries);
TraceEvent event(SevWarn, retryable ? "RESTClient_FailedRetryable" : "RESTClient_RequestFailed");
// Attach err to trace event if present, otherwise extract some stuff from the response
if (err.present()) {
event.errorUnsuppressed(err.get());
}
event.suppressFor(60);
if (!err.present()) {
event.detail("ResponseCode", r->code);
}
event.detail("ConnectionEstablished", connectionEstablished);
if (remoteAddress.present())
event.detail("RemoteEndpoint", remoteAddress.get());
else
event.detail("RemoteHost", url->host);
event.detail("Verb", verb).detail("Resource", url->resource).detail("ThisTry", thisTry);
// If r is not valid or not code TOO_MANY_REQUESTS then increment the try count.
// TOO_MANY_REQUEST's will not count against the attempt limit.
if (!r || r->code != HTTP::HTTP_STATUS_CODE_TOO_MANY_REQUESTS) {
++thisTry;
}
// We will wait delay seconds before the next retry, start with nextRetryDelay.
double delay = nextRetryDelay;
// Double but limit the *next* nextRetryDelay.
nextRetryDelay = std::min(nextRetryDelay * 2, 60.0);
if (retryable) {
// If r is valid then obey the Retry-After response header if present.
if (r) {
auto iRetryAfter = r->headers.find("Retry-After");
if (iRetryAfter != r->headers.end()) {
event.detail("RetryAfterHeader", iRetryAfter->second);
char* pEnd;
double retryAfter = strtod(iRetryAfter->second.c_str(), &pEnd);
if (*pEnd) {
// If there were other characters then don't trust the parsed value
retryAfter = HTTP::HTTP_RETRYAFTER_DELAY_SECS;
}
// Update delay
delay = std::max(delay, retryAfter);
}
}
// Log the delay then wait.
event.detail("RetryDelay", delay);
wait(::delay(delay));
} else {
// We can't retry, so throw something.
// This error code means the authentication header was not accepted, likely the account or key is wrong.
if (r && r->code == HTTP::HTTP_STATUS_CODE_NOT_ACCEPTABLE) {
throw http_not_accepted();
}
if (r && r->code == HTTP::HTTP_STATUS_CODE_UNAUTHORIZED) {
throw http_auth_failed();
}
// Recognize and throw specific errors
if (err.present()) {
int code = err.get().code();
// If we get a timed_out error during the the connect() phase, we'll call that connection_failed despite
// the fact that there was technically never a 'connection' to begin with. It differentiates between an
// active connection timing out vs a connection timing out, though not between an active connection
// failing vs connection attempt failing.
// TODO: Add more error types?
if (code == error_code_timed_out && !connectionEstablished) {
throw connection_failed();
}
if (code == error_code_timed_out || code == error_code_connection_failed ||
code == error_code_lookup_failed) {
throw err.get();
}
}
throw http_request_failed();
}
}
}
Future<Reference<HTTP::Response>> RESTClient::doPutOrPost(const std::string& verb,
Optional<HTTP::Headers> optHeaders,
RESTUrl* url,
std::set<unsigned int> successCodes) {
HTTP::Headers headers;
if (optHeaders.present()) {
headers = optHeaders.get();
}
return doRequest_impl(Reference<RESTClient>::addRef(this), verb, headers, url, successCodes);
}
Future<Reference<HTTP::Response>> RESTClient::doPost(const std::string& fullUrl,
const std::string& requestBody,
Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, requestBody, knobs.secure_connection);
return doPutOrPost(HTTP::HTTP_VERB_POST, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
}
Future<Reference<HTTP::Response>> RESTClient::doPut(const std::string& fullUrl,
const std::string& requestBody,
Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, requestBody, knobs.secure_connection);
return doPutOrPost(
HTTP::HTTP_VERB_PUT,
optHeaders,
std::addressof(url),
// 201 - on successful resource create
// 200 / 204 - if target resource representation was successfully modified with the desired state
{ HTTP::HTTP_STATUS_CODE_OK, HTTP::HTTP_STATUS_CODE_CREATED, HTTP::HTTP_STATUS_CODE_NO_CONTENT });
}
Future<Reference<HTTP::Response>> RESTClient::doGetHeadDeleteOrTrace(const std::string& verb,
Optional<HTTP::Headers> optHeaders,
RESTUrl* url,
std::set<unsigned int> successCodes) {
HTTP::Headers headers;
if (optHeaders.present()) {
headers = optHeaders.get();
}
return doRequest_impl(Reference<RESTClient>::addRef(this), HTTP::HTTP_VERB_GET, headers, url, successCodes);
}
Future<Reference<HTTP::Response>> RESTClient::doGet(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, knobs.secure_connection);
return doGetHeadDeleteOrTrace(HTTP::HTTP_VERB_GET, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
}
Future<Reference<HTTP::Response>> RESTClient::doHead(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, knobs.secure_connection);
return doGetHeadDeleteOrTrace(HTTP::HTTP_VERB_HEAD, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
}
Future<Reference<HTTP::Response>> RESTClient::doDelete(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, knobs.secure_connection);
return doGetHeadDeleteOrTrace(
HTTP::HTTP_VERB_DELETE,
optHeaders,
std::addressof(url),
// 200 - action has been enacted.
// 202 - action will likely succeed, but, has not yet been enacted.
// 204 - action has been enated, no further information is to supplied.
{ HTTP::HTTP_STATUS_CODE_OK, HTTP::HTTP_STATUS_CODE_NO_CONTENT, HTTP::HTTP_STATUS_CODE_ACCEPTED });
}
Future<Reference<HTTP::Response>> RESTClient::doTrace(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
RESTUrl url(fullUrl, knobs.secure_connection);
return doGetHeadDeleteOrTrace(
HTTP::HTTP_VERB_TRACE, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
}
// Only used to link unit tests
void forceLinkRESTClientTests() {}
TEST_CASE("fdbrpc/RESTClient") {
RESTClient r;
std::unordered_map<std::string, int> knobs = r.getKnobs();
ASSERT_EQ(knobs["secure_connection"], RESTClientKnobs::SECURE_CONNECTION);
ASSERT_EQ(knobs["connection_pool_size"], FLOW_KNOBS->RESTCLIENT_MAX_CONNECTIONPOOL_SIZE);
ASSERT_EQ(knobs["connect_tries"], FLOW_KNOBS->RESTCLIENT_CONNECT_TRIES);
ASSERT_EQ(knobs["connect_timeout"], FLOW_KNOBS->RESTCLIENT_CONNECT_TIMEOUT);
ASSERT_EQ(knobs["max_connection_life"], FLOW_KNOBS->RESTCLIENT_MAX_CONNECTION_LIFE);
ASSERT_EQ(knobs["request_tries"], FLOW_KNOBS->RESTCLIENT_REQUEST_TRIES);
ASSERT_EQ(knobs["request_timeout_secs"], FLOW_KNOBS->RESTCLIENT_REQUEST_TIMEOUT_SEC);
for (auto& itr : knobs) {
itr.second++;
}
r.setKnobs(knobs);
std::unordered_map<std::string, int> updated = r.getKnobs();
for (auto& itr : updated) {
ASSERT_EQ(knobs[itr.first], itr.second);
}
// invalid client knob
knobs["foo"] = 100;
try {
r.setKnobs(knobs);
ASSERT(false);
} catch (Error& e) {
if (e.code() != error_code_rest_invalid_rest_client_knob) {
throw e;
}
}
return Void();
}

97
fdbrpc/RESTClient.h Normal file
View File

@ -0,0 +1,97 @@
/*
* RESTClient.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FDBRPC_RESTCLIENT_H
#define FDBRPC_RESTCLIENT_H
#include <memory>
#pragma once
#include "fdbclient/JSONDoc.h"
#include "fdbrpc/HTTP.h"
#include "fdbrpc/RESTUtils.h"
#include "flow/Arena.h"
#include "flow/FastRef.h"
#include "flow/flow.h"
#include "flow/Net2Packet.h"
// This interface enables sending REST HTTP requests and receiving REST HTTP responses from a resource identified by a
// URI.
class RESTClient : public ReferenceCounted<RESTClient> {
public:
struct Stats {
explicit Stats(const std::string& hService)
: host_service(hService), requests_successful(0), requests_failed(0), bytes_sent(0) {}
Stats operator-(const Stats& rhs);
void clear() { requests_failed = requests_successful = bytes_sent = 0; }
json_spirit::mObject getJSON();
std::string host_service;
int64_t requests_successful;
int64_t requests_failed;
int64_t bytes_sent;
};
RESTClientKnobs knobs;
Reference<RESTConnectionPool> conectionPool;
// Connection stats maintained per "host:service"
std::unordered_map<std::string, std::unique_ptr<Stats>> statsMap;
RESTClient();
explicit RESTClient(std::unordered_map<std::string, int>& params);
void setKnobs(const std::unordered_map<std::string, int>& knobSettings);
std::unordered_map<std::string, int> getKnobs() const;
// Supports common REST APIs.
// On invocation of below methods, input 'fullUrl' is parsed using RESTUrl interface,
// RESTConnectionPool is used to leverage cached connection if any for 'host:service' pair. API then leverage
// HTTP::doRequest to accomplish the specified operation
Future<Reference<HTTP::Response>> doGet(const std::string& fullUrl,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
Future<Reference<HTTP::Response>> doHead(const std::string& fullUrl,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
Future<Reference<HTTP::Response>> doDelete(const std::string& fullUrl,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
Future<Reference<HTTP::Response>> doTrace(const std::string& fullUrl,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
Future<Reference<HTTP::Response>> doPut(const std::string& fullUrl,
const std::string& requestBody,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
Future<Reference<HTTP::Response>> doPost(const std::string& fullUrl,
const std::string& requestBody,
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
static std::string getStatsKey(const std::string& host, const std::string& service) { return host + ":" + service; }
private:
Future<Reference<HTTP::Response>> doGetHeadDeleteOrTrace(const std::string& verb,
Optional<HTTP::Headers> optHeaders,
RESTUrl* url,
std::set<unsigned int> successCodes);
Future<Reference<HTTP::Response>> doPutOrPost(const std::string& verb,
Optional<HTTP::Headers> headers,
RESTUrl* url,
std::set<unsigned int> successCodes);
};
#endif

276
fdbrpc/RESTUtils.actor.cpp Normal file
View File

@ -0,0 +1,276 @@
/*
* RESTUtils.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbrpc/RESTUtils.h"
#include "flow/flat_buffers.h"
#include "flow/UnitTest.h"
#include <boost/algorithm/string.hpp>
#include "flow/actorcompiler.h" // always the last include
namespace {
std::unordered_set<std::string> protocols = { "http", "https" };
bool isProtocolSupported(const std::string& protocol) {
return protocols.find(protocol) != protocols.end();
}
bool isSecurePrototol(const std::string& protocol) {
return protocol.compare("https") == 0;
}
} // namespace
RESTClientKnobs::RESTClientKnobs() {
secure_connection = RESTClientKnobs::SECURE_CONNECTION;
connection_pool_size = FLOW_KNOBS->RESTCLIENT_MAX_CONNECTIONPOOL_SIZE;
connect_tries = FLOW_KNOBS->RESTCLIENT_CONNECT_TRIES;
connect_timeout = FLOW_KNOBS->RESTCLIENT_CONNECT_TIMEOUT;
max_connection_life = FLOW_KNOBS->RESTCLIENT_MAX_CONNECTION_LIFE;
request_tries = FLOW_KNOBS->RESTCLIENT_REQUEST_TRIES;
request_timeout_secs = FLOW_KNOBS->RESTCLIENT_REQUEST_TIMEOUT_SEC;
knobMap["connection_pool_size"] = std::addressof(connection_pool_size);
knobMap["pz"] = std::addressof(connection_pool_size);
knobMap["secure_connection"] = std::addressof(secure_connection);
knobMap["sc"] = std::addressof(secure_connection);
knobMap["connect_tries"] = std::addressof(connect_tries);
knobMap["ct"] = std::addressof(connect_tries);
knobMap["connect_timeout"] = std::addressof(connect_timeout);
knobMap["cto"] = std::addressof(connect_timeout);
knobMap["max_connection_life"] = std::addressof(max_connection_life);
knobMap["mcl"] = std::addressof(max_connection_life);
knobMap["request_tries"] = std::addressof(request_tries);
knobMap["rt"] = std::addressof(request_tries);
knobMap["request_timeout_secs"] = std::addressof(request_timeout_secs);
knobMap["rtom"] = std::addressof(request_timeout_secs);
}
void RESTClientKnobs::set(const std::unordered_map<std::string, int>& knobSettings) {
TraceEvent trace = TraceEvent("RESTClient_SetKnobs");
for (const auto& itr : knobSettings) {
const auto& kItr = RESTClientKnobs::knobMap.find(itr.first);
if (kItr == RESTClientKnobs::knobMap.end()) {
trace.detail("RESTClient_InvalidKnobName", itr.first);
throw rest_invalid_rest_client_knob();
}
*(kItr->second) = itr.second;
trace.detail(itr.first.c_str(), itr.second);
}
}
std::unordered_map<std::string, int> RESTClientKnobs::get() const {
std::unordered_map<std::string, int> details = {
{ "connection_pool_size", connection_pool_size },
{ "secure_connection", secure_connection },
{ "connect_tries", connect_tries },
{ "connect_timeout", connect_timeout },
{ "max_connection_life", max_connection_life },
{ "request_tries", request_tries },
{ "request_timeout_secs", request_timeout_secs },
};
return details;
}
ACTOR Future<RESTConnectionPool::ReusableConnection> connect_impl(Reference<RESTConnectionPool> connectionPool,
RESTConnectionPoolKey connectKey,
bool isSecure,
int maxConnLife) {
auto poolItr = connectionPool->connectionPoolMap.find(connectKey);
if (poolItr == connectionPool->connectionPoolMap.end()) {
throw rest_connectpool_key_not_found();
}
while (!poolItr->second.empty()) {
RESTConnectionPool::ReusableConnection rconn = poolItr->second.front();
poolItr->second.pop();
if (rconn.expirationTime > now()) {
TraceEvent("RESTClient_ReusableConnection")
.suppressFor(60)
.detail("RemoteEndpoint", rconn.conn->getPeerAddress())
.detail("ExpireIn", rconn.expirationTime - now());
return rconn;
}
}
state Reference<IConnection> conn =
wait(INetworkConnections::net()->connect(connectKey.first, connectKey.second, isSecure));
wait(conn->connectHandshake());
return RESTConnectionPool::ReusableConnection({ conn, now() + maxConnLife });
}
Future<RESTConnectionPool::ReusableConnection> RESTConnectionPool::connect(RESTConnectionPoolKey connectKey,
const bool isSecure,
const int maxConnLife) {
return connect_impl(Reference<RESTConnectionPool>::addRef(this), connectKey, isSecure, maxConnLife);
}
void RESTConnectionPool::returnConnection(RESTConnectionPoolKey connectKey,
ReusableConnection& rconn,
const int maxConnections) {
auto poolItr = connectionPoolMap.find(connectKey);
if (poolItr == connectionPoolMap.end()) {
throw rest_connectpool_key_not_found();
}
// If it expires in the future then add it to the pool in the front iff connection pool size is not maxed
if (rconn.expirationTime > now() && poolItr->second.size() < maxConnections) {
poolItr->second.push(rconn);
}
rconn.conn = Reference<IConnection>();
}
RESTUrl::RESTUrl(const std::string& fUrl, const bool isSecure) {
parseUrl(fUrl, isSecure);
}
RESTUrl::RESTUrl(const std::string& fullUrl, const std::string& b, const bool isSecure) : body(b) {
parseUrl(fullUrl, isSecure);
}
void RESTUrl::parseUrl(const std::string& fullUrl, const bool isSecure) {
// Sample valid URIs
// 1. With 'host' & 'resource' := '<protocol>://<host>/<resource>'
// 2. With 'host', 'service' & 'resource' := '<protocol>://<host>:port/<resource>'
// 3. With 'host', 'service', 'resource' & 'reqParameters' := '<protocol>://<host>:port/<resource>?<parameter-list>'
try {
StringRef t(fullUrl);
StringRef p = t.eat("://");
std::string protocol = p.toString();
boost::algorithm::to_lower(protocol);
if (!isProtocolSupported(protocol)) {
throw format("Invalid REST URI protocol '%s'", protocol.c_str());
}
// Ensure connection secure knob setting matches with the input URI
if ((isSecurePrototol(protocol) && !isSecure) || (!isSecurePrototol(protocol) && isSecure)) {
throw format("Invalid REST URI protocol secure knob '%s'", fullUrl.c_str());
}
// extract 'resource' and optional 'parameter list' if supplied in the URL
uint8_t foundSeparator = 0;
StringRef hostPort = t.eatAny("/?", &foundSeparator);
if (foundSeparator == '/') {
resource = t.eat("?").toString();
reqParameters = t.eat().toString();
}
// hostPort is at least a host or IP address, optionally followed by :portNumber or :serviceName
StringRef hRef(hostPort);
StringRef h = hRef.eat(":");
if (h.size() == 0) {
throw std::string("host cannot be empty");
}
host = h.toString();
service = hRef.eat().toString();
TraceEvent("RESTClient_ParseURI")
.detail("URI", fullUrl)
.detail("Host", host)
.detail("Service", service)
.detail("Resource", resource)
.detail("ReqParameters", reqParameters);
} catch (std::string& err) {
TraceEvent("RESTClient_ParseError").detail("URI", fullUrl).detail("Error", err);
throw rest_invalid_uri();
}
}
// Only used to link unit tests
void forceLinkRESTUtilsTests() {}
TEST_CASE("fdbrpc/RESTUtils") {
// invalid protocol
try {
std::string uri("httpx://foo/bar");
RESTUrl r(uri, false);
ASSERT(false);
} catch (Error& e) {
if (e.code() != error_code_rest_invalid_uri) {
throw e;
}
}
// mismatch protocol and knob values
try {
std::string uri("http://foo/bar");
RESTUrl r(uri, true);
ASSERT(false);
} catch (Error& e) {
if (e.code() != error_code_rest_invalid_uri) {
throw e;
}
}
// missing host
try {
std::string uri("https://:/bar");
RESTUrl r(uri, true);
ASSERT(false);
} catch (Error& e) {
if (e.code() != error_code_rest_invalid_uri) {
throw e;
}
}
// valid URI with service
try {
std::string uri("https://host:80/foo/bar");
RESTUrl r(uri, true);
ASSERT_EQ(r.host.compare("host"), 0);
ASSERT_EQ(r.service.compare("80"), 0);
ASSERT_EQ(r.resource.compare("foo/bar"), 0);
} catch (Error& e) {
throw e;
}
// valid URI with-out service
try {
std::string uri("https://host/foo/bar");
RESTUrl r(uri, true);
ASSERT_EQ(r.host.compare("host"), 0);
ASSERT(r.service.empty());
ASSERT_EQ(r.resource.compare("foo/bar"), 0);
} catch (Error& e) {
throw e;
}
// valid URI with parameters
try {
std::string uri("https://host/foo/bar?param1,param2");
RESTUrl r(uri, true);
ASSERT_EQ(r.host.compare("host"), 0);
ASSERT(r.service.empty());
ASSERT_EQ(r.resource.compare("foo/bar"), 0);
ASSERT_EQ(r.reqParameters.compare("param1,param2"), 0);
} catch (Error& e) {
throw e;
}
// ensure RESTClient::Knob default values and updates
return Void();
}

113
fdbrpc/RESTUtils.h Normal file
View File

@ -0,0 +1,113 @@
/*
* RESTUtils.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FDRPC_REST_UTILS_H
#define FDRPC_REST_UTILS_H
#pragma once
#include "flow/flow.h"
#include "flow/FastRef.h"
#include "flow/Net2Packet.h"
#include <unordered_map>
#include <utility>
// Util interface managing REST active connection pool.
// The interface internally constructs and maintains map {"host:service" -> activeConnection}; any new connection
// request would first access cached connection if possible (not expired), if none exists, it would establish a new
// connection and return to the caller. Caller on accomplishing the task at-hand, should return the connection back to
// the pool.
using RESTConnectionPoolKey = std::pair<std::string, std::string>;
class RESTConnectionPool : public ReferenceCounted<RESTConnectionPool> {
public:
struct ReusableConnection {
Reference<IConnection> conn;
double expirationTime;
};
// Maximum number of connections cached in the connection-pool.
int maxConnPerConnectKey;
std::map<RESTConnectionPoolKey, std::queue<ReusableConnection>> connectionPoolMap;
RESTConnectionPool(const int maxConnsPerKey) : maxConnPerConnectKey(maxConnsPerKey) {}
// Routine is responsible to provide an usable TCP connection object; it reuses an active connection from
// connection-pool if availalbe, otherwise, establish a new TCP connection
Future<ReusableConnection> connect(RESTConnectionPoolKey connectKey, const bool isSecure, const int maxConnLife);
void returnConnection(RESTConnectionPoolKey connectKey, ReusableConnection& conn, const int maxConnections);
static RESTConnectionPoolKey getConnectionPoolKey(const std::string& host, const std::string& service) {
return std::make_pair(host, service);
}
};
// Util interface facilitating management and update for RESTClient knob parameters
struct RESTClientKnobs {
int connection_pool_size, secure_connection, connect_timeout, connect_tries, max_connection_life, request_tries,
request_timeout_secs;
constexpr static int SECURE_CONNECTION = 1;
constexpr static int NOT_SECURE_CONNECTION = 0;
RESTClientKnobs();
void set(const std::unordered_map<std::string, int>& knobSettings);
std::unordered_map<std::string, int> get() const;
std::unordered_map<std::string, int*> knobMap;
static std::vector<std::string> getKnobDescriptions() {
return {
"connection_pool_size (pz) Maximum numbers of active connections in the connection-pool",
"secure_connection (or sc) Set 1 for secure connection and 0 for insecure connection.",
"connect_tries (or ct) Number of times to try to connect for each request.",
"connect_timeout (or cto) Number of seconds to wait for a connect request to succeed.",
"max_connection_life (or mcl) Maximum number of seconds to use a single TCP connection.",
"request_tries (or rt) Number of times to try each request until a parsable HTTP "
"response other than 429 is received.",
"request_timeout_secs (or rtom) Number of seconds to wait for a request to succeed after a "
"connection is established.",
};
}
};
// Util interface facilitating parsing of an input REST 'full_url'
struct RESTUrl {
public:
// Connection resources - host and port details
std::string host;
std::string service;
// resource identified by URI
std::string resource;
// optional REST request parameters
std::string reqParameters;
// Request 'body' payload
std::string body;
explicit RESTUrl(const std::string& fullUrl, const bool isSecure);
explicit RESTUrl(const std::string& fullUrl, const std::string& body, const bool isSecure);
private:
void parseUrl(const std::string& fullUrl, bool isSecure);
};
#endif

View File

@ -72,6 +72,20 @@ Future<REPLY_TYPE(Req)> retryBrokenPromise(RequestStream<Req, P> to, Req request
} }
} }
ACTOR template <class Req>
Future<Void> tryInitializeRequestStream(RequestStream<Req>* stream, Hostname hostname, WellKnownEndpoints token) {
Optional<NetworkAddress> address = wait(hostname.resolve());
if (!address.present()) {
return Void();
}
if (stream == nullptr) {
stream = new RequestStream<Req>(Endpoint::wellKnown({ address.get() }, token));
} else {
*stream = RequestStream<Req>(Endpoint::wellKnown({ address.get() }, token));
}
return Void();
}
ACTOR template <class Req> ACTOR template <class Req>
Future<ErrorOr<REPLY_TYPE(Req)>> tryGetReplyFromHostname(Req request, Hostname hostname, WellKnownEndpoints token) { Future<ErrorOr<REPLY_TYPE(Req)>> tryGetReplyFromHostname(Req request, Hostname hostname, WellKnownEndpoints token) {
// A wrapper of tryGetReply(request), except that the request is sent to an address resolved from a hostname. // A wrapper of tryGetReply(request), except that the request is sent to an address resolved from a hostname.

View File

@ -1110,10 +1110,10 @@ ACTOR Future<Void> registerWorker(RegisterWorkerRequest req,
newPriorityInfo.processClassFitness = newProcessClass.machineClassFitness(ProcessClass::ClusterController); newPriorityInfo.processClassFitness = newProcessClass.machineClassFitness(ProcessClass::ClusterController);
bool isCoordinator = bool isCoordinator =
(std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), req.wi.address()) != (std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), w.address()) !=
coordinatorAddresses.end()) || coordinatorAddresses.end()) ||
(req.wi.secondaryAddress().present() && (w.secondaryAddress().present() &&
std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), req.wi.secondaryAddress().get()) != std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), w.secondaryAddress().get()) !=
coordinatorAddresses.end()); coordinatorAddresses.end());
for (auto it : req.incompatiblePeers) { for (auto it : req.incompatiblePeers) {
@ -1933,8 +1933,24 @@ ACTOR Future<Void> handleForcedRecoveries(ClusterControllerData* self, ClusterCo
} }
} }
ACTOR Future<Void> startDataDistributor(ClusterControllerData* self) { struct SingletonRecruitThrottler {
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID. double lastRecruitStart;
SingletonRecruitThrottler() : lastRecruitStart(-1) {}
double newRecruitment() {
double n = now();
double waitTime =
std::max(0.0, (lastRecruitStart + SERVER_KNOBS->CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL - n));
lastRecruitStart = n;
return waitTime;
}
};
ACTOR Future<Void> startDataDistributor(ClusterControllerData* self, double waitTime) {
// If master fails at the same time, give it a chance to clear master PID.
// Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
TraceEvent("CCStartDataDistributor", self->id).log(); TraceEvent("CCStartDataDistributor", self->id).log();
loop { loop {
@ -2003,6 +2019,7 @@ ACTOR Future<Void> startDataDistributor(ClusterControllerData* self) {
} }
ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) { ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) { while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
wait(self->db.serverInfo->onChange()); wait(self->db.serverInfo->onChange());
} }
@ -2019,13 +2036,15 @@ ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
when(wait(self->recruitDistributor.onChange())) {} when(wait(self->recruitDistributor.onChange())) {}
} }
} else { } else {
wait(startDataDistributor(self)); wait(startDataDistributor(self, recruitThrottler.newRecruitment()));
} }
} }
} }
ACTOR Future<Void> startRatekeeper(ClusterControllerData* self) { ACTOR Future<Void> startRatekeeper(ClusterControllerData* self, double waitTime) {
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID. // If master fails at the same time, give it a chance to clear master PID.
// Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
TraceEvent("CCStartRatekeeper", self->id).log(); TraceEvent("CCStartRatekeeper", self->id).log();
loop { loop {
@ -2091,6 +2110,7 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData* self) {
} }
ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) { ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) { while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
wait(self->db.serverInfo->onChange()); wait(self->db.serverInfo->onChange());
} }
@ -2107,34 +2127,15 @@ ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
when(wait(self->recruitRatekeeper.onChange())) {} when(wait(self->recruitRatekeeper.onChange())) {}
} }
} else { } else {
wait(startRatekeeper(self)); wait(startRatekeeper(self, recruitThrottler.newRecruitment()));
} }
} }
} }
// Acquires the BM lock by getting the next epoch no. ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self, double waitTime) {
ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) { // If master fails at the same time, give it a chance to clear master PID.
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx); // Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
loop {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
try {
Optional<Value> oldEpoch = wait(tr->get(blobManagerEpochKey));
state int64_t newEpoch = oldEpoch.present() ? decodeBlobManagerEpochValue(oldEpoch.get()) + 1 : 1;
tr->set(blobManagerEpochKey, blobManagerEpochValueFor(newEpoch));
wait(tr->commit());
TraceEvent(SevDebug, "CCNextBlobManagerEpoch", self->id).detail("Epoch", newEpoch);
return newEpoch;
} catch (Error& e) {
wait(tr->onError(e));
}
}
}
ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self) {
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
TraceEvent("CCEKP_Start", self->id).log(); TraceEvent("CCEKP_Start", self->id).log();
loop { loop {
@ -2208,6 +2209,7 @@ ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self) {
} }
ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) { ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
loop { loop {
if (self->db.serverInfo->get().encryptKeyProxy.present() && !self->recruitEncryptKeyProxy.get()) { if (self->db.serverInfo->get().encryptKeyProxy.present() && !self->recruitEncryptKeyProxy.get()) {
choose { choose {
@ -2219,13 +2221,36 @@ ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) {
when(wait(self->recruitEncryptKeyProxy.onChange())) {} when(wait(self->recruitEncryptKeyProxy.onChange())) {}
} }
} else { } else {
wait(startEncryptKeyProxy(self)); wait(startEncryptKeyProxy(self, recruitThrottler.newRecruitment()));
} }
} }
} }
ACTOR Future<Void> startBlobManager(ClusterControllerData* self) { // Acquires the BM lock by getting the next epoch no.
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID. ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
loop {
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
try {
Optional<Value> oldEpoch = wait(tr->get(blobManagerEpochKey));
state int64_t newEpoch = oldEpoch.present() ? decodeBlobManagerEpochValue(oldEpoch.get()) + 1 : 1;
tr->set(blobManagerEpochKey, blobManagerEpochValueFor(newEpoch));
wait(tr->commit());
TraceEvent(SevDebug, "CCNextBlobManagerEpoch", self->id).detail("Epoch", newEpoch);
return newEpoch;
} catch (Error& e) {
wait(tr->onError(e));
}
}
}
ACTOR Future<Void> startBlobManager(ClusterControllerData* self, double waitTime) {
// If master fails at the same time, give it a chance to clear master PID.
// Also wait to avoid too many consecutive recruits in a small time window.
wait(delay(waitTime));
TraceEvent("CCStartBlobManager", self->id).log(); TraceEvent("CCStartBlobManager", self->id).log();
loop { loop {
@ -2322,6 +2347,7 @@ ACTOR Future<Void> watchBlobGranulesConfigKey(ClusterControllerData* self) {
} }
ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) { ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
state SingletonRecruitThrottler recruitThrottler;
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) { while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
wait(self->db.serverInfo->onChange()); wait(self->db.serverInfo->onChange());
} }
@ -2352,7 +2378,7 @@ ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
} }
} else if (self->db.blobGranulesEnabled.get()) { } else if (self->db.blobGranulesEnabled.get()) {
// if there is no blob manager present but blob granules are now enabled, recruit a BM // if there is no blob manager present but blob granules are now enabled, recruit a BM
wait(startBlobManager(self)); wait(startBlobManager(self, recruitThrottler.newRecruitment()));
} else { } else {
// if there is no blob manager present and blob granules are disabled, wait for a config change // if there is no blob manager present and blob granules are disabled, wait for a config change
wait(self->db.blobGranulesEnabled.onChange()); wait(self->db.blobGranulesEnabled.onChange());
@ -2481,12 +2507,11 @@ ACTOR Future<Void> workerHealthMonitor(ClusterControllerData* self) {
} }
} }
ACTOR Future<Void> clusterControllerCore(Reference<IClusterConnectionRecord> connRecord, ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
ClusterControllerFullInterface interf,
Future<Void> leaderFail, Future<Void> leaderFail,
ServerCoordinators coordinators,
LocalityData locality, LocalityData locality,
ConfigDBType configDBType) { ConfigDBType configDBType) {
state ServerCoordinators coordinators(connRecord);
state ClusterControllerData self(interf, locality, coordinators); state ClusterControllerData self(interf, locality, coordinators);
state ConfigBroadcaster configBroadcaster(coordinators, configDBType); state ConfigBroadcaster configBroadcaster(coordinators, configDBType);
state Future<Void> coordinationPingDelay = delay(SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY); state Future<Void> coordinationPingDelay = delay(SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY);
@ -2621,7 +2646,7 @@ ACTOR Future<Void> replaceInterface(ClusterControllerFullInterface interf) {
} }
} }
ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRecord, ACTOR Future<Void> clusterController(ServerCoordinators coordinators,
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC, Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC,
bool hasConnected, bool hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo, Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
@ -2632,10 +2657,9 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
state bool inRole = false; state bool inRole = false;
cci.initEndpoints(); cci.initEndpoints();
try { try {
wait(connRecord->resolveHostnames());
// Register as a possible leader; wait to be elected // Register as a possible leader; wait to be elected
state Future<Void> leaderFail = state Future<Void> leaderFail =
tryBecomeLeader(connRecord, cci, currentCC, hasConnected, asyncPriorityInfo); tryBecomeLeader(coordinators, cci, currentCC, hasConnected, asyncPriorityInfo);
state Future<Void> shouldReplace = replaceInterface(cci); state Future<Void> shouldReplace = replaceInterface(cci);
while (!currentCC->get().present() || currentCC->get().get() != cci) { while (!currentCC->get().present() || currentCC->get().get() != cci) {
@ -2654,7 +2678,7 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
startRole(Role::CLUSTER_CONTROLLER, cci.id(), UID()); startRole(Role::CLUSTER_CONTROLLER, cci.id(), UID());
inRole = true; inRole = true;
wait(clusterControllerCore(connRecord, cci, leaderFail, locality, configDBType)); wait(clusterControllerCore(cci, leaderFail, coordinators, locality, configDBType));
} }
} catch (Error& e) { } catch (Error& e) {
if (inRole) if (inRole)
@ -2683,7 +2707,8 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
state bool hasConnected = false; state bool hasConnected = false;
loop { loop {
try { try {
wait(clusterController(connRecord, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType)); ServerCoordinators coordinators(connRecord);
wait(clusterController(coordinators, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType));
hasConnected = true; hasConnected = true;
} catch (Error& e) { } catch (Error& e) {
if (e.code() != error_code_coordinators_changed) if (e.code() != error_code_coordinators_changed)

View File

@ -537,8 +537,7 @@ ACTOR Future<Void> changeCoordinators(Reference<ClusterRecoveryData> self) {
} }
try { try {
state ClusterConnectionString conn(changeCoordinatorsRequest.newConnectionString.toString()); ClusterConnectionString conn(changeCoordinatorsRequest.newConnectionString.toString());
wait(conn.resolveHostnames());
wait(self->cstate.move(conn)); wait(self->cstate.move(conn));
} catch (Error& e) { } catch (Error& e) {
if (e.code() != error_code_actor_cancelled) if (e.code() != error_code_actor_cancelled)

View File

@ -236,6 +236,105 @@ struct ResolutionRequestBuilder {
} }
}; };
ErrorOr<Optional<TenantMapEntry>> getTenantEntry(ProxyCommitData* commitData,
Optional<TenantNameRef> tenant,
Optional<int64_t> tenantId,
bool logOnFailure) {
if (tenant.present()) {
auto itr = commitData->tenantMap.find(tenant.get());
if (itr == commitData->tenantMap.end()) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyUnknownTenant", commitData->dbgid).detail("Tenant", tenant.get());
}
return unknown_tenant();
} else if (tenantId.present() && tenantId.get() != itr->second.id) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyTenantIdMismatch", commitData->dbgid)
.detail("Tenant", tenant.get())
.detail("TenantId", tenantId)
.detail("ExistingId", itr->second.id);
}
return unknown_tenant();
}
return ErrorOr<Optional<TenantMapEntry>>(Optional<TenantMapEntry>(itr->second));
}
return Optional<TenantMapEntry>();
}
bool verifyTenantPrefix(ProxyCommitData* const commitData, const CommitTransactionRequest& req) {
ErrorOr<Optional<TenantMapEntry>> tenantEntry =
getTenantEntry(commitData, req.tenantInfo.name.castTo<TenantNameRef>(), req.tenantInfo.tenantId, true);
if (tenantEntry.isError()) {
return true;
}
if (tenantEntry.get().present()) {
Key tenantPrefix = tenantEntry.get().get().prefix;
for (auto& m : req.transaction.mutations) {
if (m.param1 != metadataVersionKey) {
if (!m.param1.startsWith(tenantPrefix)) {
TraceEvent(SevWarnAlways, "TenantPrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("Key", m.param1.toHexString());
return false;
}
if (m.type == MutationRef::ClearRange && !m.param2.startsWith(tenantPrefix)) {
TraceEvent(SevWarnAlways, "TenantClearRangePrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("Key", m.param2.toHexString());
return false;
} else if (m.type == MutationRef::SetVersionstampedKey) {
ASSERT(m.param1.size() >= 4);
uint8_t* key = const_cast<uint8_t*>(m.param1.begin());
int* offset = reinterpret_cast<int*>(&key[m.param1.size() - 4]);
if (*offset < tenantPrefix.size()) {
TraceEvent(SevWarnAlways, "TenantVersionstampInvalidOffset")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("Key", m.param1.toHexString())
.detail("Offset", *offset);
return false;
}
}
}
}
for (auto& rc : req.transaction.read_conflict_ranges) {
if (rc.begin != metadataVersionKey &&
(!rc.begin.startsWith(tenantPrefix) || !rc.end.startsWith(tenantPrefix))) {
TraceEvent(SevWarnAlways, "TenantReadConflictPrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("BeginKey", rc.begin.toHexString())
.detail("EndKey", rc.end.toHexString());
return false;
}
}
for (auto& wc : req.transaction.write_conflict_ranges) {
if (wc.begin != metadataVersionKey &&
(!wc.begin.startsWith(tenantPrefix) || !wc.end.startsWith(tenantPrefix))) {
TraceEvent(SevWarnAlways, "TenantWriteConflictPrefixMismatch")
.suppressFor(60)
.detail("Prefix", tenantPrefix.toHexString())
.detail("BeginKey", wc.begin.toHexString())
.detail("EndKey", wc.end.toHexString());
return false;
}
}
}
return true;
}
ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData, ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData,
PromiseStream<std::pair<std::vector<CommitTransactionRequest>, int>> out, PromiseStream<std::pair<std::vector<CommitTransactionRequest>, int>> out,
FutureStream<CommitTransactionRequest> in, FutureStream<CommitTransactionRequest> in,
@ -282,6 +381,13 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData,
.detail("Size", bytes) .detail("Size", bytes)
.detail("Client", req.reply.getEndpoint().getPrimaryAddress()); .detail("Client", req.reply.getEndpoint().getPrimaryAddress());
} }
if (!verifyTenantPrefix(commitData, req)) {
++commitData->stats.txnCommitErrors;
req.reply.sendError(illegal_tenant_access());
continue;
}
++commitData->stats.txnCommitIn; ++commitData->stats.txnCommitIn;
if (req.debugID.present()) { if (req.debugID.present()) {
@ -450,35 +556,6 @@ ACTOR static Future<ResolveTransactionBatchReply> trackResolutionMetrics(Referen
return reply; return reply;
} }
ErrorOr<Optional<TenantMapEntry>> getTenantEntry(ProxyCommitData* commitData,
Optional<TenantNameRef> tenant,
Optional<int64_t> tenantId,
bool logOnFailure) {
if (tenant.present()) {
auto itr = commitData->tenantMap.find(tenant.get());
if (itr == commitData->tenantMap.end()) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyUnknownTenant", commitData->dbgid).detail("Tenant", tenant.get());
}
return unknown_tenant();
} else if (tenantId.present() && tenantId.get() != itr->second.id) {
if (logOnFailure) {
TraceEvent(SevWarn, "CommitProxyTenantIdMismatch", commitData->dbgid)
.detail("Tenant", tenant.get())
.detail("TenantId", tenantId)
.detail("ExistingId", itr->second.id);
}
return unknown_tenant();
}
return ErrorOr<Optional<TenantMapEntry>>(Optional<TenantMapEntry>(itr->second));
}
return Optional<TenantMapEntry>();
}
namespace CommitBatch { namespace CommitBatch {
struct CommitBatchContext { struct CommitBatchContext {
@ -685,6 +762,11 @@ bool canReject(const std::vector<CommitTransactionRequest>& trs) {
return true; return true;
} }
double computeReleaseDelay(CommitBatchContext* self, double latencyBucket) {
return std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE,
self->batchOperations * self->pProxyCommitData->commitComputePerOperation[latencyBucket]);
}
ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) { ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
state ProxyCommitData* const pProxyCommitData = self->pProxyCommitData; state ProxyCommitData* const pProxyCommitData = self->pProxyCommitData;
@ -708,6 +790,7 @@ ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
// Pre-resolution the commits // Pre-resolution the commits
TEST(pProxyCommitData->latestLocalCommitBatchResolving.get() < localBatchNumber - 1); // Wait for local batch TEST(pProxyCommitData->latestLocalCommitBatchResolving.get() < localBatchNumber - 1); // Wait for local batch
wait(pProxyCommitData->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber - 1)); wait(pProxyCommitData->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber - 1));
pProxyCommitData->stats.computeLatency.addMeasurement(now() - timeStart);
double queuingDelay = g_network->now() - timeStart; double queuingDelay = g_network->now() - timeStart;
pProxyCommitData->stats.commitBatchQueuingDist->sampleSeconds(queuingDelay); pProxyCommitData->stats.commitBatchQueuingDist->sampleSeconds(queuingDelay);
if ((queuingDelay > (double)SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS / SERVER_KNOBS->VERSIONS_PER_SECOND || if ((queuingDelay > (double)SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS / SERVER_KNOBS->VERSIONS_PER_SECOND ||
@ -736,10 +819,7 @@ ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
return Void(); return Void();
} }
self->releaseDelay = self->releaseDelay = delay(computeReleaseDelay(self, latencyBucket), TaskPriority::ProxyMasterVersionReply);
delay(std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE,
self->batchOperations * pProxyCommitData->commitComputePerOperation[latencyBucket]),
TaskPriority::ProxyMasterVersionReply);
if (debugID.present()) { if (debugID.present()) {
g_traceBatch.addEvent( g_traceBatch.addEvent(
@ -1385,8 +1465,10 @@ ACTOR Future<Void> postResolution(CommitBatchContext* self) {
self->computeDuration += g_network->timer() - self->computeStart; self->computeDuration += g_network->timer() - self->computeStart;
if (self->batchOperations > 0) { if (self->batchOperations > 0) {
double estimatedDelay = computeReleaseDelay(self, self->latencyBucket);
double computePerOperation = double computePerOperation =
std::min(SERVER_KNOBS->MAX_COMPUTE_PER_OPERATION, self->computeDuration / self->batchOperations); std::min(SERVER_KNOBS->MAX_COMPUTE_PER_OPERATION, self->computeDuration / self->batchOperations);
if (computePerOperation <= pProxyCommitData->commitComputePerOperation[self->latencyBucket]) { if (computePerOperation <= pProxyCommitData->commitComputePerOperation[self->latencyBucket]) {
pProxyCommitData->commitComputePerOperation[self->latencyBucket] = computePerOperation; pProxyCommitData->commitComputePerOperation[self->latencyBucket] = computePerOperation;
} else { } else {
@ -1401,6 +1483,20 @@ ACTOR Future<Void> postResolution(CommitBatchContext* self) {
pProxyCommitData->stats.minComputeNS = pProxyCommitData->stats.minComputeNS =
std::min<int64_t>(pProxyCommitData->stats.minComputeNS, std::min<int64_t>(pProxyCommitData->stats.minComputeNS,
1e9 * pProxyCommitData->commitComputePerOperation[self->latencyBucket]); 1e9 * pProxyCommitData->commitComputePerOperation[self->latencyBucket]);
if (estimatedDelay >= SERVER_KNOBS->MAX_COMPUTE_DURATION_LOG_CUTOFF ||
self->computeDuration >= SERVER_KNOBS->MAX_COMPUTE_DURATION_LOG_CUTOFF) {
TraceEvent(SevInfo, "LongComputeDuration", pProxyCommitData->dbgid)
.suppressFor(10.0)
.detail("EstimatedComputeDuration", estimatedDelay)
.detail("ComputeDuration", self->computeDuration)
.detail("ComputePerOperation", computePerOperation)
.detail("LatencyBucket", self->latencyBucket)
.detail("UpdatedComputePerOperationEstimate",
pProxyCommitData->commitComputePerOperation[self->latencyBucket])
.detail("BatchBytes", self->batchBytes)
.detail("BatchOperations", self->batchOperations);
}
} }
pProxyCommitData->stats.processingMutationDist->sampleSeconds(now() - postResolutionQueuing); pProxyCommitData->stats.processingMutationDist->sampleSeconds(now() - postResolutionQueuing);

View File

@ -26,21 +26,29 @@
#include "fdbserver/LeaderElection.h" #include "fdbserver/LeaderElection.h"
#include "flow/actorcompiler.h" // has to be last include #include "flow/actorcompiler.h" // has to be last include
ACTOR Future<GenerationRegReadReply> waitAndSendRead(RequestStream<GenerationRegReadRequest> to, ACTOR Future<GenerationRegReadReply> waitAndSendRead(GenerationRegInterface stateServer, GenerationRegReadRequest req) {
GenerationRegReadRequest req) {
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY) if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01())); wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
state GenerationRegReadReply reply = wait(retryBrokenPromise(to, req)); state GenerationRegReadReply reply;
if (stateServer.hostname.present()) {
wait(store(reply, retryGetReplyFromHostname(req, stateServer.hostname.get(), WLTOKEN_GENERATIONREG_READ)));
} else {
wait(store(reply, retryBrokenPromise(stateServer.read, req)));
}
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY) if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01())); wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
return reply; return reply;
} }
ACTOR Future<UniqueGeneration> waitAndSendWrite(RequestStream<GenerationRegWriteRequest> to, ACTOR Future<UniqueGeneration> waitAndSendWrite(GenerationRegInterface stateServer, GenerationRegWriteRequest req) {
GenerationRegWriteRequest req) {
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY) if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01())); wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
state UniqueGeneration reply = wait(retryBrokenPromise(to, req)); state UniqueGeneration reply;
if (stateServer.hostname.present()) {
wait(store(reply, retryGetReplyFromHostname(req, stateServer.hostname.get(), WLTOKEN_GENERATIONREG_WRITE)));
} else {
wait(store(reply, retryBrokenPromise(stateServer.write, req)));
}
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY) if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01())); wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
return reply; return reply;
@ -152,7 +160,7 @@ struct CoordinatedStateImpl {
state std::vector<Future<GenerationRegReadReply>> rep_reply; state std::vector<Future<GenerationRegReadReply>> rep_reply;
for (int i = 0; i < replicas.size(); i++) { for (int i = 0; i < replicas.size(); i++) {
Future<GenerationRegReadReply> reply = Future<GenerationRegReadReply> reply =
waitAndSendRead(replicas[i].read, GenerationRegReadRequest(req.key, req.gen)); waitAndSendRead(replicas[i], GenerationRegReadRequest(req.key, req.gen));
rep_empty_reply.push_back(nonemptyToNever(reply)); rep_empty_reply.push_back(nonemptyToNever(reply));
rep_reply.push_back(emptyToNever(reply)); rep_reply.push_back(emptyToNever(reply));
self->ac.add(success(reply)); self->ac.add(success(reply));
@ -192,8 +200,7 @@ struct CoordinatedStateImpl {
state std::vector<GenerationRegInterface>& replicas = self->coordinators.stateServers; state std::vector<GenerationRegInterface>& replicas = self->coordinators.stateServers;
state std::vector<Future<UniqueGeneration>> wrep_reply; state std::vector<Future<UniqueGeneration>> wrep_reply;
for (int i = 0; i < replicas.size(); i++) { for (int i = 0; i < replicas.size(); i++) {
Future<UniqueGeneration> reply = Future<UniqueGeneration> reply = waitAndSendWrite(replicas[i], GenerationRegWriteRequest(req.kv, req.gen));
waitAndSendWrite(replicas[i].write, GenerationRegWriteRequest(req.kv, req.gen));
wrep_reply.push_back(reply); wrep_reply.push_back(reply);
self->ac.add(success(reply)); self->ac.add(success(reply));
} }

View File

@ -98,12 +98,16 @@ LeaderElectionRegInterface::LeaderElectionRegInterface(INetwork* local) : Client
} }
ServerCoordinators::ServerCoordinators(Reference<IClusterConnectionRecord> ccr) : ClientCoordinators(ccr) { ServerCoordinators::ServerCoordinators(Reference<IClusterConnectionRecord> ccr) : ClientCoordinators(ccr) {
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
ClusterConnectionString cs = ccr->getConnectionString(); ClusterConnectionString cs = ccr->getConnectionString();
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s) { for (auto h : cs.hostnames) {
leaderElectionServers.emplace_back(*s); leaderElectionServers.emplace_back(h);
stateServers.emplace_back(*s); stateServers.emplace_back(h);
configServers.emplace_back(*s); configServers.emplace_back(h);
}
for (auto s : cs.coordinators()) {
leaderElectionServers.emplace_back(s);
stateServers.emplace_back(s);
configServers.emplace_back(s);
} }
} }
@ -208,10 +212,8 @@ ACTOR Future<Void> openDatabase(ClientData* db,
int* clientCount, int* clientCount,
Reference<AsyncVar<bool>> hasConnectedClients, Reference<AsyncVar<bool>> hasConnectedClients,
OpenDatabaseCoordRequest req, OpenDatabaseCoordRequest req,
Future<Void> checkStuck, Future<Void> checkStuck) {
Reference<AsyncVar<Void>> coordinatorsChanged) {
state ErrorOr<CachedSerialization<ClientDBInfo>> replyContents; state ErrorOr<CachedSerialization<ClientDBInfo>> replyContents;
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
state Future<Void> clientInfoOnChange = db->clientInfo->onChange(); state Future<Void> clientInfoOnChange = db->clientInfo->onChange();
++(*clientCount); ++(*clientCount);
@ -233,11 +235,6 @@ ACTOR Future<Void> openDatabase(ClientData* db,
clientInfoOnChange = db->clientInfo->onChange(); clientInfoOnChange = db->clientInfo->onChange();
replyContents = db->clientInfo->get(); replyContents = db->clientInfo->get();
} }
when(wait(coordinatorsChangedOnChange)) {
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
replyContents = coordinators_changed();
break;
}
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) { when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) {
if (db->clientInfo->get().read().id.isValid()) { if (db->clientInfo->get().read().id.isValid()) {
replyContents = db->clientInfo->get(); replyContents = db->clientInfo->get();
@ -268,10 +265,7 @@ ACTOR Future<Void> openDatabase(ClientData* db,
ACTOR Future<Void> remoteMonitorLeader(int* clientCount, ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
Reference<AsyncVar<bool>> hasConnectedClients, Reference<AsyncVar<bool>> hasConnectedClients,
Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader, Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader,
ElectionResultRequest req, ElectionResultRequest req) {
Reference<AsyncVar<Void>> coordinatorsChanged) {
state bool coordinatorsChangeDetected = false;
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
state Future<Void> currentElectedLeaderOnChange = currentElectedLeader->onChange(); state Future<Void> currentElectedLeaderOnChange = currentElectedLeader->onChange();
++(*clientCount); ++(*clientCount);
hasConnectedClients->set(true); hasConnectedClients->set(true);
@ -281,20 +275,11 @@ ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
when(wait(yieldedFuture(currentElectedLeaderOnChange))) { when(wait(yieldedFuture(currentElectedLeaderOnChange))) {
currentElectedLeaderOnChange = currentElectedLeader->onChange(); currentElectedLeaderOnChange = currentElectedLeader->onChange();
} }
when(wait(coordinatorsChangedOnChange)) {
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
coordinatorsChangeDetected = true;
break;
}
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) { break; } when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) { break; }
} }
} }
if (coordinatorsChangeDetected) { req.reply.send(currentElectedLeader->get());
req.reply.sendError(coordinators_changed());
} else {
req.reply.send(currentElectedLeader->get());
}
if (--(*clientCount) == 0) { if (--(*clientCount) == 0) {
hasConnectedClients->set(false); hasConnectedClients->set(false);
@ -325,8 +310,6 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
state Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader = state Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader =
makeReference<AsyncVar<Optional<LeaderInfo>>>(); makeReference<AsyncVar<Optional<LeaderInfo>>>();
state LivenessChecker canConnectToLeader(SERVER_KNOBS->COORDINATOR_LEADER_CONNECTION_TIMEOUT); state LivenessChecker canConnectToLeader(SERVER_KNOBS->COORDINATOR_LEADER_CONNECTION_TIMEOUT);
state Reference<AsyncVar<Void>> coordinatorsChanged = makeReference<AsyncVar<Void>>();
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
state Future<Void> hasConnectedClientsOnChange = hasConnectedClients->onChange(); state Future<Void> hasConnectedClientsOnChange = hasConnectedClients->onChange();
loop choose { loop choose {
@ -338,14 +321,10 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
} else { } else {
if (!leaderMon.isValid()) { if (!leaderMon.isValid()) {
leaderMon = monitorLeaderAndGetClientInfo( leaderMon = monitorLeaderAndGetClientInfo(
req.clusterKey, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged); req.clusterKey, req.hostnames, req.coordinators, &clientData, currentElectedLeader);
} }
actors.add(openDatabase(&clientData, actors.add(
&clientCount, openDatabase(&clientData, &clientCount, hasConnectedClients, req, canConnectToLeader.checkStuck()));
hasConnectedClients,
req,
canConnectToLeader.checkStuck(),
coordinatorsChanged));
} }
} }
when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) { when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) {
@ -355,10 +334,9 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
} else { } else {
if (!leaderMon.isValid()) { if (!leaderMon.isValid()) {
leaderMon = monitorLeaderAndGetClientInfo( leaderMon = monitorLeaderAndGetClientInfo(
req.key, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged); req.key, req.hostnames, req.coordinators, &clientData, currentElectedLeader);
} }
actors.add(remoteMonitorLeader( actors.add(remoteMonitorLeader(&clientCount, hasConnectedClients, currentElectedLeader, req));
&clientCount, hasConnectedClients, currentElectedLeader, req, coordinatorsChanged));
} }
} }
when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) { when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) {
@ -499,10 +477,6 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
} }
} }
when(wait(actors.getResult())) {} when(wait(actors.getResult())) {}
when(wait(coordinatorsChangedOnChange)) {
leaderMon = Future<Void>();
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
}
} }
} }

View File

@ -153,17 +153,21 @@ struct CandidacyRequest {
struct ElectionResultRequest { struct ElectionResultRequest {
constexpr static FileIdentifier file_identifier = 11815465; constexpr static FileIdentifier file_identifier = 11815465;
Key key; Key key;
std::vector<Hostname> hostnames;
std::vector<NetworkAddress> coordinators; std::vector<NetworkAddress> coordinators;
UID knownLeader; UID knownLeader;
ReplyPromise<Optional<LeaderInfo>> reply; ReplyPromise<Optional<LeaderInfo>> reply;
ElectionResultRequest() = default; ElectionResultRequest() = default;
ElectionResultRequest(Key key, std::vector<NetworkAddress> coordinators, UID knownLeader) ElectionResultRequest(Key key,
: key(key), coordinators(std::move(coordinators)), knownLeader(knownLeader) {} std::vector<Hostname> hostnames,
std::vector<NetworkAddress> coordinators,
UID knownLeader)
: key(key), hostnames(std::move(hostnames)), coordinators(std::move(coordinators)), knownLeader(knownLeader) {}
template <class Ar> template <class Ar>
void serialize(Ar& ar) { void serialize(Ar& ar) {
serializer(ar, key, coordinators, knownLeader, reply); serializer(ar, key, hostnames, coordinators, knownLeader, reply);
} }
}; };

View File

@ -718,18 +718,19 @@ public:
bool recheck = !healthy && (lastReady != self->initialFailureReactionDelay.isReady() || bool recheck = !healthy && (lastReady != self->initialFailureReactionDelay.isReady() ||
(lastZeroHealthy && !self->zeroHealthyTeams->get()) || containsFailed); (lastZeroHealthy && !self->zeroHealthyTeams->get()) || containsFailed);
// TraceEvent("TeamHealthChangeDetected", self->distributorId) //TraceEvent("TeamHealthChangeDetected", self->distributorId)
// .detail("Team", team->getDesc()) // .detail("Team", team->getDesc())
// .detail("ServersLeft", serversLeft) // .detail("ServersLeft", serversLeft)
// .detail("LastServersLeft", lastServersLeft) // .detail("LastServersLeft", lastServersLeft)
// .detail("AnyUndesired", anyUndesired) // .detail("AnyUndesired", anyUndesired)
// .detail("LastAnyUndesired", lastAnyUndesired) // .detail("LastAnyUndesired", lastAnyUndesired)
// .detail("AnyWrongConfiguration", anyWrongConfiguration) // .detail("AnyWrongConfiguration", anyWrongConfiguration)
// .detail("LastWrongConfiguration", lastWrongConfiguration) // .detail("LastWrongConfiguration", lastWrongConfiguration)
// .detail("Recheck", recheck) // .detail("ContainsWigglingServer", anyWigglingServer)
// .detail("BadTeam", badTeam) // .detail("Recheck", recheck)
// .detail("LastZeroHealthy", lastZeroHealthy) // .detail("BadTeam", badTeam)
// .detail("ZeroHealthyTeam", self->zeroHealthyTeams->get()); // .detail("LastZeroHealthy", lastZeroHealthy)
// .detail("ZeroHealthyTeam", self->zeroHealthyTeams->get());
lastReady = self->initialFailureReactionDelay.isReady(); lastReady = self->initialFailureReactionDelay.isReady();
lastZeroHealthy = self->zeroHealthyTeams->get(); lastZeroHealthy = self->zeroHealthyTeams->get();
@ -1103,9 +1104,8 @@ public:
if (worstStatus == DDTeamCollection::Status::WIGGLING && invalidWiggleServer(worstAddr, self, server)) { if (worstStatus == DDTeamCollection::Status::WIGGLING && invalidWiggleServer(worstAddr, self, server)) {
TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId) TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId)
.detail("Address", worstAddr.toString()) .detail("Address", worstAddr.toString())
.detail("ProcessId", server->getLastKnownInterface().locality.processId()) .detail("ServerId", server->getId())
.detail("WigglingId", self->wigglingId.present()); .detail("WigglingId", self->wigglingId.present() ? self->wigglingId.get().toString() : "");
self->excludedServers.set(worstAddr, DDTeamCollection::Status::NONE);
worstStatus = DDTeamCollection::Status::NONE; worstStatus = DDTeamCollection::Status::NONE;
} }
otherChanges.push_back(self->excludedServers.onChange(worstAddr)); otherChanges.push_back(self->excludedServers.onChange(worstAddr));
@ -1127,10 +1127,9 @@ public:
if (testStatus == DDTeamCollection::Status::WIGGLING && if (testStatus == DDTeamCollection::Status::WIGGLING &&
invalidWiggleServer(testAddr, self, server)) { invalidWiggleServer(testAddr, self, server)) {
TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId) TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId)
.detail("Address", testAddr.toString()) .detail("Address", worstAddr.toString())
.detail("ProcessId", server->getLastKnownInterface().locality.processId()) .detail("ServerId", server->getId())
.detail("ValidWigglingId", self->wigglingId.present()); .detail("WigglingId", self->wigglingId.present() ? self->wigglingId.get().toString() : "");
self->excludedServers.set(testAddr, DDTeamCollection::Status::NONE);
testStatus = DDTeamCollection::Status::NONE; testStatus = DDTeamCollection::Status::NONE;
} }
@ -2052,7 +2051,7 @@ public:
"PerpetualStorageWigglePause", "PerpetualStorageWigglePause",
self->distributorId) self->distributorId)
.detail("Primary", self->primary) .detail("Primary", self->primary)
.detail("ProcessId", id) .detail("ServerId", id)
.detail("BestTeamKeepStuckCount", self->bestTeamKeepStuckCount) .detail("BestTeamKeepStuckCount", self->bestTeamKeepStuckCount)
.detail("ExtraHealthyTeamCount", extraTeamCount) .detail("ExtraHealthyTeamCount", extraTeamCount)
.detail("HealthyTeamCount", self->healthyTeamCount); .detail("HealthyTeamCount", self->healthyTeamCount);
@ -2065,7 +2064,7 @@ public:
moveFinishFuture = fv; moveFinishFuture = fv;
TraceEvent("PerpetualStorageWiggleStart", self->distributorId) TraceEvent("PerpetualStorageWiggleStart", self->distributorId)
.detail("Primary", self->primary) .detail("Primary", self->primary)
.detail("ProcessId", id) .detail("ServerId", id)
.detail("ExtraHealthyTeamCount", extraTeamCount) .detail("ExtraHealthyTeamCount", extraTeamCount)
.detail("HealthyTeamCount", self->healthyTeamCount); .detail("HealthyTeamCount", self->healthyTeamCount);
} }
@ -2091,7 +2090,7 @@ public:
self->includeStorageServersForWiggle(); self->includeStorageServersForWiggle();
TraceEvent("PerpetualStorageWiggleFinish", self->distributorId) TraceEvent("PerpetualStorageWiggleFinish", self->distributorId)
.detail("Primary", self->primary) .detail("Primary", self->primary)
.detail("ProcessId", self->wigglingId.get()); .detail("ServerId", self->wigglingId.get());
wait(self->eraseStorageWiggleMap(&metadataMap, self->wigglingId.get()) && wait(self->eraseStorageWiggleMap(&metadataMap, self->wigglingId.get()) &&
self->storageWiggler->finishWiggle()); self->storageWiggler->finishWiggle());
@ -2112,7 +2111,7 @@ public:
self->includeStorageServersForWiggle(); self->includeStorageServersForWiggle();
TraceEvent("PerpetualStorageWiggleExitingPause", self->distributorId) TraceEvent("PerpetualStorageWiggleExitingPause", self->distributorId)
.detail("Primary", self->primary) .detail("Primary", self->primary)
.detail("ProcessId", self->wigglingId.get()); .detail("ServerId", self->wigglingId.get());
self->wigglingId.reset(); self->wigglingId.reset();
} }

View File

@ -27,44 +27,29 @@
// Keep trying to become a leader by submitting itself to all coordinators. // Keep trying to become a leader by submitting itself to all coordinators.
// Monitor the health of all coordinators at the same time. // Monitor the health of all coordinators at the same time.
// Note: for coordinators whose NetworkAddress is parsed out of a hostname, a connection failure will cause this actor
// to throw `coordinators_changed()` error
ACTOR Future<Void> submitCandidacy(Key key, ACTOR Future<Void> submitCandidacy(Key key,
LeaderElectionRegInterface coord, LeaderElectionRegInterface coord,
LeaderInfo myInfo, LeaderInfo myInfo,
UID prevChangeID, UID prevChangeID,
AsyncTrigger* nomineeChange, AsyncTrigger* nomineeChange,
Optional<LeaderInfo>* nominee, Optional<LeaderInfo>* nominee) {
Optional<Hostname> hostname = Optional<Hostname>()) {
loop { loop {
state Optional<LeaderInfo> li; state Optional<LeaderInfo> li;
if (coord.hostname.present()) {
if (coord.candidacy.getEndpoint().getPrimaryAddress().fromHostname) { wait(store(
state ErrorOr<Optional<LeaderInfo>> rep = wait(coord.candidacy.tryGetReply( li,
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID), retryGetReplyFromHostname(
TaskPriority::CoordinationReply)); CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
if (rep.isError()) { coord.hostname.get(),
// Connecting to nominee failed, most likely due to connection failed. WLTOKEN_LEADERELECTIONREG_CANDIDACY,
TraceEvent("SubmitCandadicyError") TaskPriority::CoordinationReply)));
.error(rep.getError())
.detail("Hostname", hostname.present() ? hostname.get().toString() : "UnknownHostname")
.detail("OldAddr", coord.candidacy.getEndpoint().getPrimaryAddress().toString());
if (rep.getError().code() == error_code_request_maybe_delivered) {
// Delay to prevent tight resolving loop due to outdated DNS cache
wait(delay(FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL));
throw coordinators_changed();
} else {
throw rep.getError();
}
} else if (rep.present()) {
li = rep.get();
}
} else { } else {
Optional<LeaderInfo> tmp = wait(retryBrokenPromise( wait(store(
coord.candidacy, li,
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID), retryBrokenPromise(
TaskPriority::CoordinationReply)); coord.candidacy,
li = tmp; CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
TaskPriority::CoordinationReply)));
} }
wait(Future<Void>(Void())); // Make sure we weren't cancelled wait(Future<Void>(Void())); // Make sure we weren't cancelled
@ -104,20 +89,26 @@ Future<Void> buggifyDelayedAsyncVar(Reference<AsyncVar<T>>& var) {
ACTOR Future<Void> changeLeaderCoordinators(ServerCoordinators coordinators, Value forwardingInfo) { ACTOR Future<Void> changeLeaderCoordinators(ServerCoordinators coordinators, Value forwardingInfo) {
std::vector<Future<Void>> forwardRequests; std::vector<Future<Void>> forwardRequests;
forwardRequests.reserve(coordinators.leaderElectionServers.size()); forwardRequests.reserve(coordinators.leaderElectionServers.size());
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
forwardRequests.push_back(retryBrokenPromise(coordinators.leaderElectionServers[i].forward, if (coordinators.leaderElectionServers[i].hostname.present()) {
ForwardRequest(coordinators.clusterKey, forwardingInfo))); forwardRequests.push_back(retryGetReplyFromHostname(ForwardRequest(coordinators.clusterKey, forwardingInfo),
coordinators.leaderElectionServers[i].hostname.get(),
WLTOKEN_LEADERELECTIONREG_FORWARD));
} else {
forwardRequests.push_back(retryBrokenPromise(coordinators.leaderElectionServers[i].forward,
ForwardRequest(coordinators.clusterKey, forwardingInfo)));
}
}
int quorum_size = forwardRequests.size() / 2 + 1; int quorum_size = forwardRequests.size() / 2 + 1;
wait(quorum(forwardRequests, quorum_size)); wait(quorum(forwardRequests, quorum_size));
return Void(); return Void();
} }
ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> connRecord, ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators,
Value proposedSerializedInterface, Value proposedSerializedInterface,
Reference<AsyncVar<Value>> outSerializedLeader, Reference<AsyncVar<Value>> outSerializedLeader,
bool hasConnected, bool hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo) { Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo) {
state ServerCoordinators coordinators(connRecord);
state AsyncTrigger nomineeChange; state AsyncTrigger nomineeChange;
state std::vector<Optional<LeaderInfo>> nominees; state std::vector<Optional<LeaderInfo>> nominees;
state LeaderInfo myInfo; state LeaderInfo myInfo;
@ -134,6 +125,8 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
wait(delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY)); wait(delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY));
} }
nominees.resize(coordinators.leaderElectionServers.size());
myInfo.serializedInfo = proposedSerializedInterface; myInfo.serializedInfo = proposedSerializedInterface;
outSerializedLeader->set(Value()); outSerializedLeader->set(Value());
@ -141,9 +134,6 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
(SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY) ? buggifyDelayedAsyncVar(outSerializedLeader) : Void(); (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY) ? buggifyDelayedAsyncVar(outSerializedLeader) : Void();
while (!iAmLeader) { while (!iAmLeader) {
wait(connRecord->resolveHostnames());
coordinators = ServerCoordinators(connRecord);
nominees.resize(coordinators.leaderElectionServers.size());
state Future<Void> badCandidateTimeout; state Future<Void> badCandidateTimeout;
myInfo.changeID = deterministicRandom()->randomUniqueID(); myInfo.changeID = deterministicRandom()->randomUniqueID();
@ -153,19 +143,12 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
std::vector<Future<Void>> cand; std::vector<Future<Void>> cand;
cand.reserve(coordinators.leaderElectionServers.size()); cand.reserve(coordinators.leaderElectionServers.size());
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) { for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
Optional<Hostname> hostname;
auto r = connRecord->getConnectionString().networkAddressToHostname.find(
coordinators.leaderElectionServers[i].candidacy.getEndpoint().getPrimaryAddress());
if (r != connRecord->getConnectionString().networkAddressToHostname.end()) {
hostname = r->second;
}
cand.push_back(submitCandidacy(coordinators.clusterKey, cand.push_back(submitCandidacy(coordinators.clusterKey,
coordinators.leaderElectionServers[i], coordinators.leaderElectionServers[i],
myInfo, myInfo,
prevChangeID, prevChangeID,
&nomineeChange, &nomineeChange,
&nominees[i], &nominees[i]));
hostname));
} }
candidacies = waitForAll(cand); candidacies = waitForAll(cand);
@ -220,24 +203,15 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
} else } else
badCandidateTimeout = Future<Void>(); badCandidateTimeout = Future<Void>();
try { choose {
choose { when(wait(nomineeChange.onTrigger())) {}
when(wait(nomineeChange.onTrigger())) {} when(wait(badCandidateTimeout.isValid() ? badCandidateTimeout : Never())) {
when(wait(badCandidateTimeout.isValid() ? badCandidateTimeout : Never())) { TEST(true); // Bad candidate timeout
TEST(true); // Bad candidate timeout TraceEvent("LeaderBadCandidateTimeout", myInfo.changeID).log();
TraceEvent("LeaderBadCandidateTimeout", myInfo.changeID).log();
break;
}
when(wait(candidacies)) { ASSERT(false); }
when(wait(asyncPriorityInfo->onChange())) { break; }
}
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
connRecord->getConnectionString().resetToUnresolved();
break; break;
} else {
throw e;
} }
when(wait(candidacies)) { ASSERT(false); }
when(wait(asyncPriorityInfo->onChange())) { break; }
} }
} }
@ -258,10 +232,17 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
state std::vector<Future<Void>> true_heartbeats; state std::vector<Future<Void>> true_heartbeats;
state std::vector<Future<Void>> false_heartbeats; state std::vector<Future<Void>> false_heartbeats;
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) { for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
Future<LeaderHeartbeatReply> hb = Future<LeaderHeartbeatReply> hb;
retryBrokenPromise(coordinators.leaderElectionServers[i].leaderHeartbeat, if (coordinators.leaderElectionServers[i].hostname.present()) {
LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID), hb = retryGetReplyFromHostname(LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID),
TaskPriority::CoordinationReply); coordinators.leaderElectionServers[i].hostname.get(),
WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT,
TaskPriority::CoordinationReply);
} else {
hb = retryBrokenPromise(coordinators.leaderElectionServers[i].leaderHeartbeat,
LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID),
TaskPriority::CoordinationReply);
}
true_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ true })); true_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ true }));
false_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ false })); false_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ false }));
} }

View File

@ -37,7 +37,7 @@ class ServerCoordinators;
// eventually be set. If the return value is cancelled, the candidacy or leadership of the proposedInterface // eventually be set. If the return value is cancelled, the candidacy or leadership of the proposedInterface
// will eventually end. // will eventually end.
template <class LeaderInterface> template <class LeaderInterface>
Future<Void> tryBecomeLeader(Reference<IClusterConnectionRecord> const& connRecord, Future<Void> tryBecomeLeader(ServerCoordinators const& coordinators,
LeaderInterface const& proposedInterface, LeaderInterface const& proposedInterface,
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader, Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader,
bool hasConnected, bool hasConnected,
@ -50,20 +50,20 @@ Future<Void> changeLeaderCoordinators(ServerCoordinators const& coordinators, Va
#pragma region Implementation #pragma region Implementation
#endif // __INTEL_COMPILER #endif // __INTEL_COMPILER
Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> const& connRecord, Future<Void> tryBecomeLeaderInternal(ServerCoordinators const& coordinators,
Value const& proposedSerializedInterface, Value const& proposedSerializedInterface,
Reference<AsyncVar<Value>> const& outSerializedLeader, Reference<AsyncVar<Value>> const& outSerializedLeader,
bool const& hasConnected, bool const& hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo); Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo);
template <class LeaderInterface> template <class LeaderInterface>
Future<Void> tryBecomeLeader(Reference<IClusterConnectionRecord> const& connRecord, Future<Void> tryBecomeLeader(ServerCoordinators const& coordinators,
LeaderInterface const& proposedInterface, LeaderInterface const& proposedInterface,
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader, Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader,
bool hasConnected, bool hasConnected,
Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo) { Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo) {
auto serializedInfo = makeReference<AsyncVar<Value>>(); auto serializedInfo = makeReference<AsyncVar<Value>>();
Future<Void> m = tryBecomeLeaderInternal(connRecord, Future<Void> m = tryBecomeLeaderInternal(coordinators,
ObjectWriter::toValue(proposedInterface, IncludeVersion()), ObjectWriter::toValue(proposedInterface, IncludeVersion()),
serializedInfo, serializedInfo,
hasConnected, hasConnected,

View File

@ -99,8 +99,17 @@ class GetCommittedVersionQuorum {
// Now roll node forward to match the largest committed version of // Now roll node forward to match the largest committed version of
// the replies. // the replies.
state Reference<ConfigFollowerInfo> quorumCfi(new ConfigFollowerInfo(self->replies[target]));
try { try {
state std::vector<ConfigFollowerInterface> interfs = self->replies[target];
std::vector<Future<Void>> fs;
for (ConfigFollowerInterface& interf : interfs) {
if (interf.hostname.present()) {
fs.push_back(tryInitializeRequestStream(
&interf.getChanges, interf.hostname.get(), WLTOKEN_CONFIGFOLLOWER_GETCHANGES));
}
}
wait(waitForAll(fs));
state Reference<ConfigFollowerInfo> quorumCfi(new ConfigFollowerInfo(interfs));
state Version lastSeenVersion = std::max( state Version lastSeenVersion = std::max(
rollback.present() ? rollback.get() : nodeVersion.lastCommitted, self->largestCompactedResponse); rollback.present() ? rollback.get() : nodeVersion.lastCommitted, self->largestCompactedResponse);
ConfigFollowerGetChangesReply reply = ConfigFollowerGetChangesReply reply =
@ -108,9 +117,21 @@ class GetCommittedVersionQuorum {
&ConfigFollowerInterface::getChanges, &ConfigFollowerInterface::getChanges,
ConfigFollowerGetChangesRequest{ lastSeenVersion, target }), ConfigFollowerGetChangesRequest{ lastSeenVersion, target }),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT)); SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
wait(timeoutError(cfi.rollforward.getReply(ConfigFollowerRollforwardRequest{
rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations }), if (cfi.hostname.present()) {
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT)); wait(timeoutError(
retryGetReplyFromHostname(
ConfigFollowerRollforwardRequest{
rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations },
cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_ROLLFORWARD),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
} else {
wait(timeoutError(
cfi.rollforward.getReply(ConfigFollowerRollforwardRequest{
rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations }),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
}
} catch (Error& e) { } catch (Error& e) {
if (e.code() == error_code_transaction_too_old) { if (e.code() == error_code_transaction_too_old) {
// Seeing this trace is not necessarily a problem. There // Seeing this trace is not necessarily a problem. There
@ -129,9 +150,18 @@ class GetCommittedVersionQuorum {
ACTOR static Future<Void> getCommittedVersionActor(GetCommittedVersionQuorum* self, ConfigFollowerInterface cfi) { ACTOR static Future<Void> getCommittedVersionActor(GetCommittedVersionQuorum* self, ConfigFollowerInterface cfi) {
try { try {
ConfigFollowerGetCommittedVersionReply reply = state ConfigFollowerGetCommittedVersionReply reply;
wait(timeoutError(cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{}), if (cfi.hostname.present()) {
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT)); wait(timeoutError(store(reply,
retryGetReplyFromHostname(ConfigFollowerGetCommittedVersionRequest{},
cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION)),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
} else {
wait(timeoutError(
store(reply, cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{})),
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
}
++self->totalRepliesReceived; ++self->totalRepliesReceived;
self->largestCompactedResponse = std::max(self->largestCompactedResponse, reply.lastCompacted); self->largestCompactedResponse = std::max(self->largestCompactedResponse, reply.lastCompacted);
@ -279,7 +309,15 @@ class PaxosConfigConsumerImpl {
std::vector<Future<Void>> compactionRequests; std::vector<Future<Void>> compactionRequests;
compactionRequests.reserve(compactionRequests.size()); compactionRequests.reserve(compactionRequests.size());
for (const auto& cfi : self->cfis) { for (const auto& cfi : self->cfis) {
compactionRequests.push_back(cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion })); if (cfi.hostname.present()) {
compactionRequests.push_back(
retryGetReplyFromHostname(ConfigFollowerCompactRequest{ compactionVersion },
cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_COMPACT));
} else {
compactionRequests.push_back(
cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
}
} }
try { try {
wait(timeoutError(waitForAll(compactionRequests), 1.0)); wait(timeoutError(waitForAll(compactionRequests), 1.0));
@ -294,8 +332,18 @@ class PaxosConfigConsumerImpl {
self->resetCommittedVersionQuorum(); // TODO: This seems to fix a segfault, investigate more self->resetCommittedVersionQuorum(); // TODO: This seems to fix a segfault, investigate more
try { try {
state Version committedVersion = wait(getCommittedVersion(self)); state Version committedVersion = wait(getCommittedVersion(self));
state Reference<ConfigFollowerInfo> configNodes( state std::vector<ConfigFollowerInterface> readReplicas =
new ConfigFollowerInfo(self->getCommittedVersionQuorum.getReadReplicas())); self->getCommittedVersionQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigFollowerInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(&readReplica.getSnapshotAndChanges,
readReplica.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETSNAPSHOTANDCHANGES));
}
}
wait(waitForAll(fs));
state Reference<ConfigFollowerInfo> configNodes(new ConfigFollowerInfo(readReplicas));
ConfigFollowerGetSnapshotAndChangesReply reply = ConfigFollowerGetSnapshotAndChangesReply reply =
wait(timeoutError(basicLoadBalance(configNodes, wait(timeoutError(basicLoadBalance(configNodes,
&ConfigFollowerInterface::getSnapshotAndChanges, &ConfigFollowerInterface::getSnapshotAndChanges,
@ -349,8 +397,18 @@ class PaxosConfigConsumerImpl {
// returned would be 1. // returned would be 1.
if (committedVersion > self->lastSeenVersion) { if (committedVersion > self->lastSeenVersion) {
ASSERT(self->getCommittedVersionQuorum.getReadReplicas().size() >= self->cfis.size() / 2 + 1); ASSERT(self->getCommittedVersionQuorum.getReadReplicas().size() >= self->cfis.size() / 2 + 1);
state Reference<ConfigFollowerInfo> configNodes( state std::vector<ConfigFollowerInterface> readReplicas =
new ConfigFollowerInfo(self->getCommittedVersionQuorum.getReadReplicas())); self->getCommittedVersionQuorum.getReadReplicas();
std::vector<Future<Void>> fs;
for (ConfigFollowerInterface& readReplica : readReplicas) {
if (readReplica.hostname.present()) {
fs.push_back(tryInitializeRequestStream(&readReplica.getChanges,
readReplica.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCHANGES));
}
}
wait(waitForAll(fs));
state Reference<ConfigFollowerInfo> configNodes(new ConfigFollowerInfo(readReplicas));
ConfigFollowerGetChangesReply reply = wait(timeoutError( ConfigFollowerGetChangesReply reply = wait(timeoutError(
basicLoadBalance(configNodes, basicLoadBalance(configNodes,
&ConfigFollowerInterface::getChanges, &ConfigFollowerInterface::getChanges,

View File

@ -73,6 +73,8 @@ struct ProxyStats {
LatencySample commitBatchingWindowSize; LatencySample commitBatchingWindowSize;
LatencySample computeLatency;
Future<Void> logger; Future<Void> logger;
int64_t maxComputeNS; int64_t maxComputeNS;
@ -126,6 +128,10 @@ struct ProxyStats {
id, id,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SAMPLE_SIZE), SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
computeLatency("ComputeLatency",
id,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
maxComputeNS(0), minComputeNS(1e12), maxComputeNS(0), minComputeNS(1e12),
commitBatchQueuingDist(Histogram::getHistogram(LiteralStringRef("CommitProxy"), commitBatchQueuingDist(Histogram::getHistogram(LiteralStringRef("CommitProxy"),
LiteralStringRef("CommitBatchQueuing"), LiteralStringRef("CommitBatchQueuing"),

View File

@ -161,9 +161,8 @@ ACTOR Future<std::vector<WorkerInterface>> getCoordWorkers(Database cx,
if (!coordinators.present()) { if (!coordinators.present()) {
throw operation_failed(); throw operation_failed();
} }
state ClusterConnectionString ccs(coordinators.get().toString()); ClusterConnectionString ccs(coordinators.get().toString());
wait(ccs.resolveHostnames()); std::vector<NetworkAddress> coordinatorsAddr = wait(ccs.tryResolveHostnames());
std::vector<NetworkAddress> coordinatorsAddr = ccs.coordinators();
std::set<NetworkAddress> coordinatorsAddrSet; std::set<NetworkAddress> coordinatorsAddrSet;
for (const auto& addr : coordinatorsAddr) { for (const auto& addr : coordinatorsAddr) {
TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", addr); TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", addr);

View File

@ -44,15 +44,29 @@ class SimpleConfigConsumerImpl {
loop { loop {
state Version compactionVersion = self->lastSeenVersion; state Version compactionVersion = self->lastSeenVersion;
wait(delayJittered(self->compactionInterval.get())); wait(delayJittered(self->compactionInterval.get()));
wait(self->cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion })); if (self->cfi.hostname.present()) {
wait(retryGetReplyFromHostname(ConfigFollowerCompactRequest{ compactionVersion },
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_COMPACT));
} else {
wait(self->cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
}
++self->compactRequest; ++self->compactRequest;
broadcaster->compact(compactionVersion); broadcaster->compact(compactionVersion);
} }
} }
ACTOR static Future<Version> getCommittedVersion(SimpleConfigConsumerImpl* self) { ACTOR static Future<Version> getCommittedVersion(SimpleConfigConsumerImpl* self) {
ConfigFollowerGetCommittedVersionReply committedVersionReply = state ConfigFollowerGetCommittedVersionReply committedVersionReply;
wait(self->cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{})); if (self->cfi.hostname.present()) {
wait(store(committedVersionReply,
retryGetReplyFromHostname(ConfigFollowerGetCommittedVersionRequest{},
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION)));
} else {
wait(store(committedVersionReply,
self->cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{})));
}
return committedVersionReply.lastCommitted; return committedVersionReply.lastCommitted;
} }
@ -63,8 +77,18 @@ class SimpleConfigConsumerImpl {
state Version committedVersion = wait(getCommittedVersion(self)); state Version committedVersion = wait(getCommittedVersion(self));
ASSERT_GE(committedVersion, self->lastSeenVersion); ASSERT_GE(committedVersion, self->lastSeenVersion);
if (committedVersion > self->lastSeenVersion) { if (committedVersion > self->lastSeenVersion) {
ConfigFollowerGetChangesReply reply = wait(self->cfi.getChanges.getReply( state ConfigFollowerGetChangesReply reply;
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion })); if (self->cfi.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion },
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETCHANGES)));
} else {
wait(store(reply,
self->cfi.getChanges.getReply(
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion })));
}
++self->successfulChangeRequest; ++self->successfulChangeRequest;
for (const auto& versionedMutation : reply.changes) { for (const auto& versionedMutation : reply.changes) {
TraceEvent te(SevDebug, "ConsumerFetchedMutation", self->id); TraceEvent te(SevDebug, "ConsumerFetchedMutation", self->id);
@ -96,8 +120,17 @@ class SimpleConfigConsumerImpl {
ACTOR static Future<Void> getSnapshotAndChanges(SimpleConfigConsumerImpl* self, ConfigBroadcaster* broadcaster) { ACTOR static Future<Void> getSnapshotAndChanges(SimpleConfigConsumerImpl* self, ConfigBroadcaster* broadcaster) {
state Version committedVersion = wait(getCommittedVersion(self)); state Version committedVersion = wait(getCommittedVersion(self));
ConfigFollowerGetSnapshotAndChangesReply reply = wait( state ConfigFollowerGetSnapshotAndChangesReply reply;
self->cfi.getSnapshotAndChanges.getReply(ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion })); if (self->cfi.hostname.present()) {
wait(store(reply,
retryGetReplyFromHostname(ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion },
self->cfi.hostname.get(),
WLTOKEN_CONFIGFOLLOWER_GETSNAPSHOTANDCHANGES)));
} else {
wait(store(reply,
self->cfi.getSnapshotAndChanges.getReply(
ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion })));
}
++self->snapshotRequest; ++self->snapshotRequest;
TraceEvent(SevDebug, "ConfigConsumerGotSnapshotAndChanges", self->id) TraceEvent(SevDebug, "ConfigConsumerGotSnapshotAndChanges", self->id)
.detail("SnapshotVersion", reply.snapshotVersion) .detail("SnapshotVersion", reply.snapshotVersion)

View File

@ -1980,8 +1980,8 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
TEST(useIPv6); // Use IPv6 TEST(useIPv6); // Use IPv6
TEST(!useIPv6); // Use IPv4 TEST(!useIPv6); // Use IPv4
// TODO(renxuan): Use hostname 25% of the time, unless it is disabled // Use hostname 25% of the time, unless it is disabled
bool useHostname = false; // !testConfig.disableHostname && deterministicRandom()->random01() < 0.25; bool useHostname = !testConfig.disableHostname && deterministicRandom()->random01() < 0.25;
TEST(useHostname); // Use hostname TEST(useHostname); // Use hostname
TEST(!useHostname); // Use IP address TEST(!useHostname); // Use IP address
NetworkAddressFromHostname fromHostname = NetworkAddressFromHostname fromHostname =

View File

@ -831,7 +831,8 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
} }
} }
for (auto& coordinator : coordinators.ccr->getConnectionString().coordinators()) { std::vector<NetworkAddress> addressVec = wait(coordinators.ccr->getConnectionString().tryResolveHostnames());
for (const auto& coordinator : addressVec) {
roles.addCoordinatorRole(coordinator); roles.addCoordinatorRole(coordinator);
} }
@ -1689,8 +1690,7 @@ static JsonBuilderObject configurationFetcher(Optional<DatabaseConfiguration> co
} }
statusObj["excluded_servers"] = excludedServersArr; statusObj["excluded_servers"] = excludedServersArr;
} }
std::vector<ClientLeaderRegInterface> coordinatorLeaderServers = coordinators.clientLeaderServers; int count = coordinators.clientLeaderServers.size();
int count = coordinatorLeaderServers.size();
statusObj["coordinators_count"] = count; statusObj["coordinators_count"] = count;
} catch (Error&) { } catch (Error&) {
incomplete_reasons->insert("Could not retrieve all configuration status information."); incomplete_reasons->insert("Could not retrieve all configuration status information.");
@ -2505,7 +2505,8 @@ static JsonBuilderArray tlogFetcher(int* logFaultTolerance,
static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration configuration, static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration configuration,
ServerCoordinators coordinators, ServerCoordinators coordinators,
std::vector<WorkerDetails>& workers, const std::vector<NetworkAddress>& coordinatorAddresses,
const std::vector<WorkerDetails>& workers,
int extraTlogEligibleZones, int extraTlogEligibleZones,
int minStorageReplicasRemaining, int minStorageReplicasRemaining,
int oldLogFaultTolerance, int oldLogFaultTolerance,
@ -2521,11 +2522,11 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi
int maxCoordinatorFailures = (coordinators.clientLeaderServers.size() - 1) / 2; int maxCoordinatorFailures = (coordinators.clientLeaderServers.size() - 1) / 2;
std::map<NetworkAddress, StringRef> workerZones; std::map<NetworkAddress, StringRef> workerZones;
for (auto& worker : workers) { for (const auto& worker : workers) {
workerZones[worker.interf.address()] = worker.interf.locality.zoneId().orDefault(LiteralStringRef("")); workerZones[worker.interf.address()] = worker.interf.locality.zoneId().orDefault(LiteralStringRef(""));
} }
std::map<StringRef, int> coordinatorZoneCounts; std::map<StringRef, int> coordinatorZoneCounts;
for (auto& coordinator : coordinators.ccr->getConnectionString().coordinators()) { for (const auto& coordinator : coordinatorAddresses) {
auto zone = workerZones[coordinator]; auto zone = workerZones[coordinator];
coordinatorZoneCounts[zone] += 1; coordinatorZoneCounts[zone] += 1;
} }
@ -3061,6 +3062,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2)); state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
wait(success(primaryDCFO)); wait(success(primaryDCFO));
std::vector<NetworkAddress> coordinatorAddresses =
wait(coordinators.ccr->getConnectionString().tryResolveHostnames());
int logFaultTolerance = 100; int logFaultTolerance = 100;
if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) { if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) {
statusObj["logs"] = tlogFetcher(&logFaultTolerance, db, address_workers); statusObj["logs"] = tlogFetcher(&logFaultTolerance, db, address_workers);
@ -3070,6 +3074,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
statusObj["fault_tolerance"] = statusObj["fault_tolerance"] =
faultToleranceStatusFetcher(configuration.get(), faultToleranceStatusFetcher(configuration.get(),
coordinators, coordinators,
coordinatorAddresses,
workers, workers,
extraTlogEligibleZones, extraTlogEligibleZones,
minStorageReplicasRemaining, minStorageReplicasRemaining,

View File

@ -859,9 +859,9 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
NetworkAddressList publicNetworkAddresses; NetworkAddressList publicNetworkAddresses;
NetworkAddressList listenNetworkAddresses; NetworkAddressList listenNetworkAddresses;
connectionRecord.resolveHostnamesBlocking(); std::vector<Hostname>& hostnames = connectionRecord.getConnectionString().hostnames;
auto& coordinators = connectionRecord.getConnectionString().coordinators(); const std::vector<NetworkAddress>& coords = connectionRecord.getConnectionString().coordinators();
ASSERT(coordinators.size() > 0); ASSERT(hostnames.size() + coords.size() > 0);
for (int ii = 0; ii < publicAddressStrs.size(); ++ii) { for (int ii = 0; ii < publicAddressStrs.size(); ++ii) {
const std::string& publicAddressStr = publicAddressStrs[ii]; const std::string& publicAddressStr = publicAddressStrs[ii];
@ -930,13 +930,26 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
listenNetworkAddresses.secondaryAddress = currentListenAddress; listenNetworkAddresses.secondaryAddress = currentListenAddress;
} }
bool hasSameCoord = std::all_of(coordinators.begin(), coordinators.end(), [&](const NetworkAddress& address) { bool matchCoordinatorsTls = std::all_of(coords.begin(), coords.end(), [&](const NetworkAddress& address) {
if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) { if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) {
return address.isTLS() == currentPublicAddress.isTLS(); return address.isTLS() == currentPublicAddress.isTLS();
} }
return true; return true;
}); });
if (!hasSameCoord) { // If true, further check hostnames.
if (matchCoordinatorsTls) {
matchCoordinatorsTls = std::all_of(hostnames.begin(), hostnames.end(), [&](Hostname& hostname) {
Optional<NetworkAddress> resolvedAddress = hostname.resolveBlocking();
if (resolvedAddress.present()) {
NetworkAddress address = resolvedAddress.get();
if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) {
return address.isTLS() == currentPublicAddress.isTLS();
}
}
return true;
});
}
if (!matchCoordinatorsTls) {
fprintf(stderr, fprintf(stderr,
"ERROR: TLS state of public address %s does not match in coordinator list.\n", "ERROR: TLS state of public address %s does not match in coordinator list.\n",
publicAddressStr.c_str()); publicAddressStr.c_str());

View File

@ -3455,7 +3455,8 @@ ACTOR Future<GetRangeReqAndResultRef> quickGetKeyValues(
tr.setVersion(version); tr.setVersion(version);
// TODO: is DefaultPromiseEndpoint the best priority for this? // TODO: is DefaultPromiseEndpoint the best priority for this?
tr.trState->taskID = TaskPriority::DefaultPromiseEndpoint; tr.trState->taskID = TaskPriority::DefaultPromiseEndpoint;
Future<RangeResult> rangeResultFuture = tr.getRange(prefixRange(prefix), Snapshot::True); Future<RangeResult> rangeResultFuture =
tr.getRange(prefixRange(prefix), GetRangeLimits::ROW_LIMIT_UNLIMITED, Snapshot::True);
// TODO: async in case it needs to read from other servers. // TODO: async in case it needs to read from other servers.
RangeResult rangeResult = wait(rangeResultFuture); RangeResult rangeResult = wait(rangeResultFuture);
a->dependsOn(rangeResult.arena()); a->dependsOn(rangeResult.arena());

View File

@ -2977,21 +2977,40 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
Reference<IClusterConnectionRecord> connRecord, Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> result, Reference<AsyncVar<Value>> result,
MonitorLeaderInfo info) { MonitorLeaderInfo info) {
state ClusterConnectionString ccf = info.intermediateConnRecord->getConnectionString(); ClusterConnectionString cs = info.intermediateConnRecord->getConnectionString();
state std::vector<NetworkAddress> addrs = ccf.coordinators(); state int coordinatorsSize = cs.hostnames.size() + cs.coordinators().size();
state ElectionResultRequest request; state ElectionResultRequest request;
state int index = 0; state int index = 0;
state int successIndex = 0; state int successIndex = 0;
request.key = ccf.clusterKey(); state std::vector<LeaderElectionRegInterface> leaderElectionServers;
request.coordinators = ccf.coordinators();
deterministicRandom()->randomShuffle(addrs); leaderElectionServers.reserve(coordinatorsSize);
for (const auto& h : cs.hostnames) {
leaderElectionServers.push_back(LeaderElectionRegInterface(h));
}
for (const auto& c : cs.coordinators()) {
leaderElectionServers.push_back(LeaderElectionRegInterface(c));
}
deterministicRandom()->randomShuffle(leaderElectionServers);
request.key = cs.clusterKey();
request.hostnames = cs.hostnames;
request.coordinators = cs.coordinators();
loop { loop {
LeaderElectionRegInterface interf(addrs[index]); LeaderElectionRegInterface interf = leaderElectionServers[index];
bool usingHostname = interf.hostname.present();
request.reply = ReplyPromise<Optional<LeaderInfo>>(); request.reply = ReplyPromise<Optional<LeaderInfo>>();
ErrorOr<Optional<LeaderInfo>> leader = wait(interf.electionResult.tryGetReply(request)); state ErrorOr<Optional<LeaderInfo>> leader;
if (usingHostname) {
wait(store(
leader,
tryGetReplyFromHostname(request, interf.hostname.get(), WLTOKEN_LEADERELECTIONREG_ELECTIONRESULT)));
} else {
wait(store(leader, interf.electionResult.tryGetReply(request)));
}
if (leader.present()) { if (leader.present()) {
if (leader.get().present()) { if (leader.get().present()) {
if (leader.get().get().forward) { if (leader.get().get().forward) {
@ -3027,14 +3046,9 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
} }
successIndex = index; successIndex = index;
} else { } else {
if (leader.isError() && leader.getError().code() == error_code_coordinators_changed) { index = (index + 1) % coordinatorsSize;
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
throw coordinators_changed();
}
index = (index + 1) % addrs.size();
if (index == successIndex) { if (index == successIndex) {
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY)); wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
throw coordinators_changed();
} }
} }
} }
@ -3042,22 +3056,11 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
ACTOR Future<Void> monitorLeaderWithDelayedCandidacyImplInternal(Reference<IClusterConnectionRecord> connRecord, ACTOR Future<Void> monitorLeaderWithDelayedCandidacyImplInternal(Reference<IClusterConnectionRecord> connRecord,
Reference<AsyncVar<Value>> outSerializedLeaderInfo) { Reference<AsyncVar<Value>> outSerializedLeaderInfo) {
wait(connRecord->resolveHostnames());
state MonitorLeaderInfo info(connRecord); state MonitorLeaderInfo info(connRecord);
loop { loop {
try { MonitorLeaderInfo _info =
wait(info.intermediateConnRecord->resolveHostnames()); wait(monitorLeaderWithDelayedCandidacyImplOneGeneration(connRecord, outSerializedLeaderInfo, info));
MonitorLeaderInfo _info = info = _info;
wait(monitorLeaderWithDelayedCandidacyImplOneGeneration(connRecord, outSerializedLeaderInfo, info));
info = _info;
} catch (Error& e) {
if (e.code() == error_code_coordinators_changed) {
TraceEvent("MonitorLeaderWithDelayedCandidacyCoordinatorsChanged").suppressFor(1.0);
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
} else {
throw e;
}
}
} }
} }
@ -3191,6 +3194,7 @@ ACTOR Future<Void> fdbd(Reference<IClusterConnectionRecord> connRecord,
actors.push_back(serveProcess()); actors.push_back(serveProcess());
try { try {
ServerCoordinators coordinators(connRecord);
if (g_network->isSimulated()) { if (g_network->isSimulated()) {
whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,"; whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,";
} }

View File

@ -2096,7 +2096,8 @@ struct ConsistencyCheckWorkload : TestWorkload {
return false; return false;
} }
state ClusterConnectionString old(currentKey.get().toString()); ClusterConnectionString old(currentKey.get().toString());
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
std::vector<ProcessData> workers = wait(::getWorkers(&tr)); std::vector<ProcessData> workers = wait(::getWorkers(&tr));
@ -2106,7 +2107,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
} }
std::set<Optional<Standalone<StringRef>>> checkDuplicates; std::set<Optional<Standalone<StringRef>>> checkDuplicates;
for (const auto& addr : old.coordinators()) { for (const auto& addr : oldCoordinators) {
auto findResult = addr_locality.find(addr); auto findResult = addr_locality.find(addr);
if (findResult != addr_locality.end()) { if (findResult != addr_locality.end()) {
if (checkDuplicates.count(findResult->second.zoneId())) { if (checkDuplicates.count(findResult->second.zoneId())) {

View File

@ -106,6 +106,7 @@ struct CycleWorkload : TestWorkload {
state Transaction tr(cx); state Transaction tr(cx);
if (deterministicRandom()->random01() >= self->traceParentProbability) { if (deterministicRandom()->random01() >= self->traceParentProbability) {
state Span span("CycleClient"_loc); state Span span("CycleClient"_loc);
// TraceEvent("CycleTracingTransaction", span.context).log();
TraceEvent("CycleTracingTransaction", span.context).log(); TraceEvent("CycleTracingTransaction", span.context).log();
tr.setOption(FDBTransactionOptions::SPAN_PARENT, tr.setOption(FDBTransactionOptions::SPAN_PARENT,
BinaryWriter::toValue(span.context, Unversioned())); BinaryWriter::toValue(span.context, Unversioned()));

View File

@ -132,7 +132,7 @@ struct DataLossRecoveryWorkload : TestWorkload {
} else { } else {
tr.clear(key); tr.clear(key);
} }
wait(timeoutError(tr.commit(), 30.0)); wait(tr.commit());
break; break;
} catch (Error& e) { } catch (Error& e) {
wait(tr.onError(e)); wait(tr.onError(e));

View File

@ -329,9 +329,7 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
for (int j = i; j < end; j++) { for (int j = i; j < end; j++) {
if (deterministicRandom()->random01() < self->initialKeyDensity) { if (deterministicRandom()->random01() < self->initialKeyDensity) {
Key key = self->getKeyForIndex(tenantNum, j); Key key = self->getKeyForIndex(tenantNum, j);
if (key.size() <= (key.startsWith(systemKeys.begin) if (key.size() <= getMaxWriteKeySize(key, false)) {
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
Value value = self->getRandomValue(); Value value = self->getRandomValue();
value = value.substr( value = value.substr(
0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT)); 0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT));
@ -1091,24 +1089,22 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
pos = littleEndian32(*(int32_t*)&value.end()[-4]); pos = littleEndian32(*(int32_t*)&value.end()[-4]);
} }
contract = { contract = { std::make_pair(error_code_key_too_large,
std::make_pair(error_code_key_too_large, key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin) : key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : ExceptionContract::Never),
: CLIENT_KNOBS->KEY_SIZE_LIMIT))), std::make_pair(error_code_value_too_large,
std::make_pair(error_code_value_too_large, ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)),
ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)), std::make_pair(error_code_invalid_mutation_type,
std::make_pair( ExceptionContract::requiredIf(!isValidMutationType(op) ||
error_code_invalid_mutation_type, !isAtomicOp((MutationRef::Type)op))),
ExceptionContract::requiredIf(!isValidMutationType(op) || !isAtomicOp((MutationRef::Type)op))), std::make_pair(error_code_key_outside_legal_range,
std::make_pair(error_code_key_outside_legal_range, ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))),
ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))), std::make_pair(error_code_client_invalid_operation,
std::make_pair( ExceptionContract::requiredIf((op == MutationRef::SetVersionstampedKey &&
error_code_client_invalid_operation, (pos < 0 || pos + 10 > key.size() - 4)) ||
ExceptionContract::requiredIf( (op == MutationRef::SetVersionstampedValue &&
(op == MutationRef::SetVersionstampedKey && (pos < 0 || pos + 10 > key.size() - 4)) || (pos < 0 || pos + 10 > value.size() - 4)))) };
(op == MutationRef::SetVersionstampedValue && (pos < 0 || pos + 10 > value.size() - 4))))
};
} }
void callback(Reference<ITransaction> tr) override { tr->atomicOp(key, value, (FDBMutationTypes::Option)op); } void callback(Reference<ITransaction> tr) override { tr->atomicOp(key, value, (FDBMutationTypes::Option)op); }
@ -1131,11 +1127,10 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
key = makeKey(); key = makeKey();
} }
value = makeValue(); value = makeValue();
contract = { std::make_pair( contract = { std::make_pair(error_code_key_too_large,
error_code_key_too_large, key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin) : key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : ExceptionContract::Never),
: CLIENT_KNOBS->KEY_SIZE_LIMIT))),
std::make_pair(error_code_value_too_large, std::make_pair(error_code_value_too_large,
ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)), ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)),
std::make_pair(error_code_key_outside_legal_range, std::make_pair(error_code_key_outside_legal_range,
@ -1268,11 +1263,11 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
TestWatch(unsigned int id, FuzzApiCorrectnessWorkload* workload, Reference<ITransaction> tr) TestWatch(unsigned int id, FuzzApiCorrectnessWorkload* workload, Reference<ITransaction> tr)
: BaseTest(id, workload, "TestWatch") { : BaseTest(id, workload, "TestWatch") {
key = makeKey(); key = makeKey();
contract = { std::make_pair( printf("Watching: %d %s\n", key.size(), printable(key.substr(0, std::min(key.size(), 20))).c_str());
error_code_key_too_large, contract = { std::make_pair(error_code_key_too_large,
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin) key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
: CLIENT_KNOBS->KEY_SIZE_LIMIT))), : ExceptionContract::Never),
std::make_pair(error_code_watches_disabled, ExceptionContract::Possible), std::make_pair(error_code_watches_disabled, ExceptionContract::Possible),
std::make_pair(error_code_key_outside_legal_range, std::make_pair(error_code_key_outside_legal_range,
ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))), ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))),

View File

@ -541,7 +541,12 @@ struct RemoveServersSafelyWorkload : TestWorkload {
state AddressExclusion coordExcl; state AddressExclusion coordExcl;
// Exclude a coordinator under buggify, but only if fault tolerance is > 0 and kill set is non-empty already // Exclude a coordinator under buggify, but only if fault tolerance is > 0 and kill set is non-empty already
if (BUGGIFY && toKill.size()) { if (BUGGIFY && toKill.size()) {
std::vector<NetworkAddress> coordinators = wait(getCoordinators(cx)); Optional<ClusterConnectionString> csOptional = wait(getConnectionString(cx));
state std::vector<NetworkAddress> coordinators;
if (csOptional.present()) {
ClusterConnectionString cs = csOptional.get();
wait(store(coordinators, cs.tryResolveHostnames()));
}
if (coordinators.size() > 2) { if (coordinators.size() > 2) {
auto randomCoordinator = deterministicRandom()->randomChoice(coordinators); auto randomCoordinator = deterministicRandom()->randomChoice(coordinators);
coordExcl = AddressExclusion(randomCoordinator.ip, randomCoordinator.port); coordExcl = AddressExclusion(randomCoordinator.ip, randomCoordinator.port);

View File

@ -957,9 +957,9 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
boost::split( boost::split(
process_addresses, coordinator_processes_key.get().toString(), [](char c) { return c == ','; }); process_addresses, coordinator_processes_key.get().toString(), [](char c) { return c == ','; });
ASSERT(process_addresses.size() == cs.coordinators().size() + cs.hostnames.size()); ASSERT(process_addresses.size() == cs.coordinators().size() + cs.hostnames.size());
wait(cs.resolveHostnames());
// compare the coordinator process network addresses one by one // compare the coordinator process network addresses one by one
for (const auto& network_address : cs.coordinators()) { std::vector<NetworkAddress> coordinators = wait(cs.tryResolveHostnames());
for (const auto& network_address : coordinators) {
ASSERT(std::find(process_addresses.begin(), process_addresses.end(), network_address.toString()) != ASSERT(std::find(process_addresses.begin(), process_addresses.end(), network_address.toString()) !=
process_addresses.end()); process_addresses.end());
} }
@ -1077,19 +1077,20 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
Optional<Value> res = wait(tx->get(coordinatorsKey)); Optional<Value> res = wait(tx->get(coordinatorsKey));
ASSERT(res.present()); // Otherwise, database is in a bad state ASSERT(res.present()); // Otherwise, database is in a bad state
state ClusterConnectionString csNew(res.get().toString()); ClusterConnectionString csNew(res.get().toString());
wait(csNew.resolveHostnames()); // verify the cluster decription
ASSERT(csNew.coordinators().size() == old_coordinators_processes.size() + 1); ASSERT(new_cluster_description == csNew.clusterKeyName().toString());
ASSERT(csNew.hostnames.size() + csNew.coordinators().size() ==
old_coordinators_processes.size() + 1);
std::vector<NetworkAddress> newCoordinators = wait(csNew.tryResolveHostnames());
// verify the coordinators' addresses // verify the coordinators' addresses
for (const auto& network_address : csNew.coordinators()) { for (const auto& network_address : newCoordinators) {
std::string address_str = network_address.toString(); std::string address_str = network_address.toString();
ASSERT(std::find(old_coordinators_processes.begin(), ASSERT(std::find(old_coordinators_processes.begin(),
old_coordinators_processes.end(), old_coordinators_processes.end(),
address_str) != old_coordinators_processes.end() || address_str) != old_coordinators_processes.end() ||
new_coordinator_process == address_str); new_coordinator_process == address_str);
} }
// verify the cluster decription
ASSERT(new_cluster_description == csNew.clusterKeyName().toString());
tx->reset(); tx->reset();
} catch (Error& e) { } catch (Error& e) {
wait(tx->onError(e)); wait(tx->onError(e));

View File

@ -30,7 +30,7 @@ void forceLinkMemcpyTests();
void forceLinkMemcpyPerfTests(); void forceLinkMemcpyPerfTests();
#if (!defined(TLS_DISABLED) && !defined(_WIN32)) #if (!defined(TLS_DISABLED) && !defined(_WIN32))
void forceLinkStreamCipherTests(); void forceLinkStreamCipherTests();
void forceLinkBLockCiherTests(); void forceLinkBlobCipherTests();
#endif #endif
void forceLinkParallelStreamTests(); void forceLinkParallelStreamTests();
void forceLinkSimExternalConnectionTests(); void forceLinkSimExternalConnectionTests();
@ -39,6 +39,8 @@ void forceLinkSimKmsConnectorTests();
void forceLinkIThreadPoolTests(); void forceLinkIThreadPoolTests();
void forceLinkTokenSignTests(); void forceLinkTokenSignTests();
void forceLinkVersionVectorTests(); void forceLinkVersionVectorTests();
void forceLinkRESTClientTests();
void forceLinkRESTUtilsTests();
struct UnitTestWorkload : TestWorkload { struct UnitTestWorkload : TestWorkload {
bool enabled; bool enabled;
@ -88,6 +90,8 @@ struct UnitTestWorkload : TestWorkload {
forceLinkIThreadPoolTests(); forceLinkIThreadPoolTests();
forceLinkTokenSignTests(); forceLinkTokenSignTests();
forceLinkVersionVectorTests(); forceLinkVersionVectorTests();
forceLinkRESTClientTests();
forceLinkRESTUtilsTests();
} }
std::string description() const override { return "UnitTests"; } std::string description() const override { return "UnitTests"; }

View File

@ -653,9 +653,7 @@ struct WriteDuringReadWorkload : TestWorkload {
for (int j = i; j < end; j++) { for (int j = i; j < end; j++) {
if (deterministicRandom()->random01() < self->initialKeyDensity) { if (deterministicRandom()->random01() < self->initialKeyDensity) {
Key key = self->getKeyForIndex(j); Key key = self->getKeyForIndex(j);
if (key.size() <= (key.startsWith(systemKeys.begin) if (key.size() <= getMaxWriteKeySize(key, false)) {
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
Value value = self->getRandomValue(); Value value = self->getRandomValue();
value = value =
value.substr(0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT)); value.substr(0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT));
@ -898,18 +896,10 @@ struct WriteDuringReadWorkload : TestWorkload {
tr.clear(range); tr.clear(range);
if (!noConflict) { if (!noConflict) {
KeyRangeRef conflict( KeyRangeRef conflict(
range.begin.substr(0, range.begin.substr(
std::min<int>(range.begin.size(), 0, std::min<int>(range.begin.size(), getMaxClearKeySize(range.begin) + 1)),
(range.begin.startsWith(systemKeys.begin) range.end.substr(
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT 0, std::min<int>(range.end.size(), getMaxClearKeySize(range.end) + 1)));
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)),
range.end.substr(0,
std::min<int>(range.end.size(),
(range.end.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)));
self->addedConflicts.insert(conflict, true); self->addedConflicts.insert(conflict, true);
} }
self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin), self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin),
@ -922,9 +912,7 @@ struct WriteDuringReadWorkload : TestWorkload {
if (noConflict) if (noConflict)
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE); tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
tr.clear(key); tr.clear(key);
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin) if (!noConflict && key.size() <= getMaxClearKeySize(key)) {
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
self->addedConflicts.insert(key, true); self->addedConflicts.insert(key, true);
} }
self->memoryDatabase.erase(key); self->memoryDatabase.erase(key);
@ -936,18 +924,9 @@ struct WriteDuringReadWorkload : TestWorkload {
//TraceEvent("WDRAddWriteConflict").detail("Range", range); //TraceEvent("WDRAddWriteConflict").detail("Range", range);
tr.addWriteConflictRange(range); tr.addWriteConflictRange(range);
KeyRangeRef conflict( KeyRangeRef conflict(
range.begin.substr(0, range.begin.substr(
std::min<int>(range.begin.size(), 0, std::min<int>(range.begin.size(), getMaxKeySize(range.begin) + 1)),
(range.begin.startsWith(systemKeys.begin) range.end.substr(0, std::min<int>(range.end.size(), getMaxKeySize(range.end) + 1)));
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)),
range.end.substr(0,
std::min<int>(range.end.size(),
(range.end.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)));
self->addedConflicts.insert(conflict, true); self->addedConflicts.insert(conflict, true);
} else if (operationType == 8 && !disableDelay) { } else if (operationType == 8 && !disableDelay) {
double maxTime = 6.0; double maxTime = 6.0;
@ -991,18 +970,10 @@ struct WriteDuringReadWorkload : TestWorkload {
tr.atomicOp(versionStampKey, value, MutationRef::SetVersionstampedKey); tr.atomicOp(versionStampKey, value, MutationRef::SetVersionstampedKey);
tr.clear(range); tr.clear(range);
KeyRangeRef conflict( KeyRangeRef conflict(
range.begin.substr(0, range.begin.substr(
std::min<int>(range.begin.size(), 0, std::min<int>(range.begin.size(), getMaxClearKeySize(range.begin) + 1)),
(range.begin.startsWith(systemKeys.begin) range.end.substr(
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT 0, std::min<int>(range.end.size(), getMaxClearKeySize(range.end) + 1)));
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)),
range.end.substr(0,
std::min<int>(range.end.size(),
(range.end.startsWith(systemKeys.begin)
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
1)));
self->addedConflicts.insert(conflict, true); self->addedConflicts.insert(conflict, true);
self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin), self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin),
self->memoryDatabase.lower_bound(range.end)); self->memoryDatabase.lower_bound(range.end));
@ -1043,10 +1014,9 @@ struct WriteDuringReadWorkload : TestWorkload {
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE); tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
tr.atomicOp(key, value, opType); tr.atomicOp(key, value, opType);
//TraceEvent("WDRAtomicOpSuccess").detail("Key", key).detail("Value", value.size()); //TraceEvent("WDRAtomicOpSuccess").detail("Key", key).detail("Value", value.size());
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin) if (!noConflict && key.size() <= getMaxWriteKeySize(key, false)) {
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))
self->addedConflicts.insert(key, true); self->addedConflicts.insert(key, true);
}
Optional<Value> existing = self->memoryGet(&self->memoryDatabase, key); Optional<Value> existing = self->memoryGet(&self->memoryDatabase, key);
self->memoryDatabase[key] = self->memoryDatabase[key] =
self->applyAtomicOp(existing.present() ? Optional<StringRef>(existing.get()) self->applyAtomicOp(existing.present() ? Optional<StringRef>(existing.get())
@ -1063,10 +1033,9 @@ struct WriteDuringReadWorkload : TestWorkload {
if (noConflict) if (noConflict)
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE); tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
tr.set(key, value); tr.set(key, value);
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin) if (!noConflict && key.size() <= getMaxWriteKeySize(key, false)) {
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
: CLIENT_KNOBS->KEY_SIZE_LIMIT))
self->addedConflicts.insert(key, true); self->addedConflicts.insert(key, true);
}
//TraceEvent("WDRSetSuccess").detail("Key", key).detail("Value", value.size()); //TraceEvent("WDRSetSuccess").detail("Key", key).detail("Value", value.size());
self->memoryDatabase[key] = value; self->memoryDatabase[key] = value;
} }

View File

@ -39,6 +39,9 @@
#include "flow/flow.h" #include "flow/flow.h"
#include "flow/genericactors.actor.h" #include "flow/genericactors.actor.h"
#if defined(HAVE_WOLFSSL)
#include <wolfssl/options.h>
#endif
#include <openssl/aes.h> #include <openssl/aes.h>
#include <openssl/engine.h> #include <openssl/engine.h>
#include <openssl/evp.h> #include <openssl/evp.h>

View File

@ -84,6 +84,10 @@ set(FLOW_SRCS
actorcompiler.h actorcompiler.h
crc32c.h crc32c.h
crc32c.cpp crc32c.cpp
ppc-asm.h
crc32.S
crc32_wrapper.h
crc32_wrapper.c
error_definitions.h error_definitions.h
${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h ${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h
flat_buffers.cpp flat_buffers.cpp
@ -172,6 +176,10 @@ if(NOT WITH_TLS)
else() else()
target_link_libraries(flow PUBLIC OpenSSL::SSL) target_link_libraries(flow PUBLIC OpenSSL::SSL)
target_link_libraries(flow_sampling PUBLIC OpenSSL::SSL) target_link_libraries(flow_sampling PUBLIC OpenSSL::SSL)
if(USE_WOLFSSL)
target_include_directories(flow SYSTEM BEFORE PUBLIC ${WOLFSSL_INCLUDE_DIR}/wolfssl)
target_include_directories(flow_sampling SYSTEM BEFORE PUBLIC ${WOLFSSL_INCLUDE_DIR}/wolfssl)
endif()
endif() endif()
target_link_libraries(flow PUBLIC Threads::Threads ${CMAKE_DL_LIBS}) target_link_libraries(flow PUBLIC Threads::Threads ${CMAKE_DL_LIBS})
target_link_libraries(flow_sampling PUBLIC Threads::Threads ${CMAKE_DL_LIBS}) target_link_libraries(flow_sampling PUBLIC Threads::Threads ${CMAKE_DL_LIBS})

Some files were not shown because too many files have changed in this diff Show More