Merge branch 'main' of github.com:apple/foundationdb into jfu-list-tenants
This commit is contained in:
commit
d953b961b7
|
@ -10,3 +10,4 @@ set(SRCS
|
||||||
|
|
||||||
add_library(FDBLibTLS STATIC ${SRCS})
|
add_library(FDBLibTLS STATIC ${SRCS})
|
||||||
target_link_libraries(FDBLibTLS PUBLIC OpenSSL::SSL boost_target PRIVATE flow)
|
target_link_libraries(FDBLibTLS PUBLIC OpenSSL::SSL boost_target PRIVATE flow)
|
||||||
|
target_include_directories(FDBLibTLS INTERFACE OpenSSL::SSL boost_target PRIVATE flow)
|
||||||
|
|
|
@ -22,6 +22,9 @@
|
||||||
#include "FDBLibTLS/FDBLibTLSSession.h"
|
#include "FDBLibTLS/FDBLibTLSSession.h"
|
||||||
#include "flow/Trace.h"
|
#include "flow/Trace.h"
|
||||||
|
|
||||||
|
#if defined(HAVE_WOLFSSL)
|
||||||
|
#include <wolfssl/options.h>
|
||||||
|
#endif
|
||||||
#include <openssl/bio.h>
|
#include <openssl/bio.h>
|
||||||
#include <openssl/err.h>
|
#include <openssl/err.h>
|
||||||
#include <openssl/evp.h>
|
#include <openssl/evp.h>
|
||||||
|
|
|
@ -23,6 +23,9 @@
|
||||||
#include "flow/flow.h"
|
#include "flow/flow.h"
|
||||||
#include "flow/Trace.h"
|
#include "flow/Trace.h"
|
||||||
|
|
||||||
|
#if defined(HAVE_WOLFSSL)
|
||||||
|
#include <wolfssl/options.h>
|
||||||
|
#endif
|
||||||
#include <openssl/bio.h>
|
#include <openssl/bio.h>
|
||||||
#include <openssl/err.h>
|
#include <openssl/err.h>
|
||||||
#include <openssl/pem.h>
|
#include <openssl/pem.h>
|
||||||
|
|
|
@ -20,6 +20,9 @@
|
||||||
|
|
||||||
#include "FDBLibTLS/FDBLibTLSVerify.h"
|
#include "FDBLibTLS/FDBLibTLSVerify.h"
|
||||||
|
|
||||||
|
#if defined(HAVE_WOLFSSL)
|
||||||
|
#include <wolfssl/options.h>
|
||||||
|
#endif
|
||||||
#include <openssl/objects.h>
|
#include <openssl/objects.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
|
@ -25,6 +25,9 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <boost/lexical_cast.hpp>
|
#include <boost/lexical_cast.hpp>
|
||||||
|
|
||||||
|
#if defined(HAVE_WOLFSSL)
|
||||||
|
#include <wolfssl/options.h>
|
||||||
|
#endif
|
||||||
#include <openssl/objects.h>
|
#include <openssl/objects.h>
|
||||||
|
|
||||||
#include "fdbrpc/ITLSPlugin.h"
|
#include "fdbrpc/ITLSPlugin.h"
|
||||||
|
|
|
@ -80,10 +80,23 @@ endif()
|
||||||
# The tests don't build on windows
|
# The tests don't build on windows
|
||||||
if(NOT WIN32)
|
if(NOT WIN32)
|
||||||
set(MAKO_SRCS
|
set(MAKO_SRCS
|
||||||
test/mako/mako.c
|
test/mako/async.hpp
|
||||||
test/mako/mako.h
|
test/mako/async.cpp
|
||||||
test/mako/utils.c
|
test/mako/blob_granules.hpp
|
||||||
test/mako/utils.h)
|
test/mako/blob_granules.cpp
|
||||||
|
test/mako/future.hpp
|
||||||
|
test/mako/limit.hpp
|
||||||
|
test/mako/logger.hpp
|
||||||
|
test/mako/mako.cpp
|
||||||
|
test/mako/mako.hpp
|
||||||
|
test/mako/operations.hpp
|
||||||
|
test/mako/operations.cpp
|
||||||
|
test/mako/process.hpp
|
||||||
|
test/mako/shm.hpp
|
||||||
|
test/mako/stats.hpp
|
||||||
|
test/mako/time.hpp
|
||||||
|
test/mako/utils.cpp
|
||||||
|
test/mako/utils.hpp)
|
||||||
add_subdirectory(test/unit/third_party)
|
add_subdirectory(test/unit/third_party)
|
||||||
find_package(Threads REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
set(UNIT_TEST_SRCS
|
set(UNIT_TEST_SRCS
|
||||||
|
@ -98,6 +111,11 @@ if(NOT WIN32)
|
||||||
test/unit/fdb_api.cpp
|
test/unit/fdb_api.cpp
|
||||||
test/unit/fdb_api.hpp)
|
test/unit/fdb_api.hpp)
|
||||||
|
|
||||||
|
add_library(fdb_cpp INTERFACE)
|
||||||
|
target_sources(fdb_cpp INTERFACE test/fdb_api.hpp)
|
||||||
|
target_include_directories(fdb_cpp INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/test)
|
||||||
|
target_link_libraries(fdb_cpp INTERFACE fmt::fmt)
|
||||||
|
|
||||||
set(API_TESTER_SRCS
|
set(API_TESTER_SRCS
|
||||||
test/apitester/fdb_c_api_tester.cpp
|
test/apitester/fdb_c_api_tester.cpp
|
||||||
test/apitester/TesterApiWorkload.cpp
|
test/apitester/TesterApiWorkload.cpp
|
||||||
|
@ -179,7 +197,11 @@ endif()
|
||||||
|
|
||||||
# do not set RPATH for mako
|
# do not set RPATH for mako
|
||||||
set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE)
|
set_property(TARGET mako PROPERTY SKIP_BUILD_RPATH TRUE)
|
||||||
target_link_libraries(mako PRIVATE fdb_c fdbclient)
|
if (USE_SANITIZER)
|
||||||
|
target_link_libraries(mako PRIVATE fdb_c fdbclient fmt::fmt Threads::Threads fdb_cpp boost_asan)
|
||||||
|
else()
|
||||||
|
target_link_libraries(mako PRIVATE fdb_c fdbclient fmt::fmt Threads::Threads fdb_cpp boost_target)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(NOT OPEN_FOR_IDE)
|
if(NOT OPEN_FOR_IDE)
|
||||||
# Make sure that fdb_c.h is compatible with c90
|
# Make sure that fdb_c.h is compatible with c90
|
||||||
|
@ -254,6 +276,8 @@ endif()
|
||||||
${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests
|
${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests
|
||||||
--tmp-dir
|
--tmp-dir
|
||||||
@TMP_DIR@
|
@TMP_DIR@
|
||||||
|
--log-dir
|
||||||
|
@LOG_DIR@
|
||||||
)
|
)
|
||||||
|
|
||||||
add_fdbclient_test(
|
add_fdbclient_test(
|
||||||
|
@ -271,6 +295,10 @@ endif()
|
||||||
${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/blobgranuletests
|
${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/blobgranuletests
|
||||||
--blob-granule-local-file-path
|
--blob-granule-local-file-path
|
||||||
@DATA_DIR@/fdbblob/
|
@DATA_DIR@/fdbblob/
|
||||||
|
--tmp-dir
|
||||||
|
@TMP_DIR@
|
||||||
|
--log-dir
|
||||||
|
@LOG_DIR@
|
||||||
)
|
)
|
||||||
|
|
||||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT USE_SANITIZER)
|
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT USE_SANITIZER)
|
||||||
|
|
|
@ -37,55 +37,71 @@ private:
|
||||||
enum OpType { OP_INSERT, OP_CLEAR, OP_CLEAR_RANGE, OP_READ, OP_GET_RANGES, OP_LAST = OP_GET_RANGES };
|
enum OpType { OP_INSERT, OP_CLEAR, OP_CLEAR_RANGE, OP_READ, OP_GET_RANGES, OP_LAST = OP_GET_RANGES };
|
||||||
std::vector<OpType> excludedOpTypes;
|
std::vector<OpType> excludedOpTypes;
|
||||||
|
|
||||||
|
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
|
||||||
|
// FIXME: should still guarantee a read succeeds eventually somehow
|
||||||
|
bool seenReadSuccess = false;
|
||||||
|
|
||||||
void randomReadOp(TTaskFct cont) {
|
void randomReadOp(TTaskFct cont) {
|
||||||
std::string begin = randomKeyName();
|
std::string begin = randomKeyName();
|
||||||
std::string end = randomKeyName();
|
std::string end = randomKeyName();
|
||||||
auto results = std::make_shared<std::vector<KeyValue>>();
|
auto results = std::make_shared<std::vector<KeyValue>>();
|
||||||
|
auto tooOld = std::make_shared<bool>(false);
|
||||||
if (begin > end) {
|
if (begin > end) {
|
||||||
std::swap(begin, end);
|
std::swap(begin, end);
|
||||||
}
|
}
|
||||||
execTransaction(
|
execTransaction(
|
||||||
[begin, end, results](auto ctx) {
|
[this, begin, end, results, tooOld](auto ctx) {
|
||||||
ctx->tx()->setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE);
|
ctx->tx()->setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE);
|
||||||
KeyValuesResult res = ctx->tx()->readBlobGranules(begin, end, ctx->getBGBasePath());
|
KeyValuesResult res = ctx->tx()->readBlobGranules(begin, end, ctx->getBGBasePath());
|
||||||
bool more;
|
bool more;
|
||||||
(*results) = res.getKeyValues(&more);
|
(*results) = res.getKeyValues(&more);
|
||||||
ASSERT(!more);
|
ASSERT(!more);
|
||||||
if (res.getError() != error_code_success) {
|
if (res.getError() == error_code_blob_granule_transaction_too_old) {
|
||||||
|
info("BlobGranuleCorrectness::randomReadOp bg too old\n");
|
||||||
|
ASSERT(!seenReadSuccess);
|
||||||
|
*tooOld = true;
|
||||||
|
ctx->done();
|
||||||
|
} else if (res.getError() != error_code_success) {
|
||||||
ctx->onError(res.getError());
|
ctx->onError(res.getError());
|
||||||
} else {
|
} else {
|
||||||
|
if (!seenReadSuccess) {
|
||||||
|
info("BlobGranuleCorrectness::randomReadOp first success\n");
|
||||||
|
}
|
||||||
|
seenReadSuccess = true;
|
||||||
ctx->done();
|
ctx->done();
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
[this, begin, end, results, cont]() {
|
[this, begin, end, results, tooOld, cont]() {
|
||||||
std::vector<KeyValue> expected = store.getRange(begin, end, store.size(), false);
|
if (!*tooOld) {
|
||||||
if (results->size() != expected.size()) {
|
std::vector<KeyValue> expected = store.getRange(begin, end, store.size(), false);
|
||||||
error(fmt::format("randomReadOp result size mismatch. expected: {} actual: {}",
|
if (results->size() != expected.size()) {
|
||||||
expected.size(),
|
error(fmt::format("randomReadOp result size mismatch. expected: {} actual: {}",
|
||||||
results->size()));
|
expected.size(),
|
||||||
}
|
results->size()));
|
||||||
ASSERT(results->size() == expected.size());
|
|
||||||
|
|
||||||
for (int i = 0; i < results->size(); i++) {
|
|
||||||
if ((*results)[i].key != expected[i].key) {
|
|
||||||
error(fmt::format("randomReadOp key mismatch at {}/{}. expected: {} actual: {}",
|
|
||||||
i,
|
|
||||||
results->size(),
|
|
||||||
expected[i].key,
|
|
||||||
(*results)[i].key));
|
|
||||||
}
|
}
|
||||||
ASSERT((*results)[i].key == expected[i].key);
|
ASSERT(results->size() == expected.size());
|
||||||
|
|
||||||
if ((*results)[i].value != expected[i].value) {
|
for (int i = 0; i < results->size(); i++) {
|
||||||
error(
|
if ((*results)[i].key != expected[i].key) {
|
||||||
fmt::format("randomReadOp value mismatch at {}/{}. key: {} expected: {:.80} actual: {:.80}",
|
error(fmt::format("randomReadOp key mismatch at {}/{}. expected: {} actual: {}",
|
||||||
i,
|
i,
|
||||||
results->size(),
|
results->size(),
|
||||||
expected[i].key,
|
expected[i].key,
|
||||||
expected[i].value,
|
(*results)[i].key));
|
||||||
(*results)[i].value));
|
}
|
||||||
|
ASSERT((*results)[i].key == expected[i].key);
|
||||||
|
|
||||||
|
if ((*results)[i].value != expected[i].value) {
|
||||||
|
error(fmt::format(
|
||||||
|
"randomReadOp value mismatch at {}/{}. key: {} expected: {:.80} actual: {:.80}",
|
||||||
|
i,
|
||||||
|
results->size(),
|
||||||
|
expected[i].key,
|
||||||
|
expected[i].value,
|
||||||
|
(*results)[i].value));
|
||||||
|
}
|
||||||
|
ASSERT((*results)[i].value == expected[i].value);
|
||||||
}
|
}
|
||||||
ASSERT((*results)[i].value == expected[i].value);
|
|
||||||
}
|
}
|
||||||
schedule(cont);
|
schedule(cont);
|
||||||
});
|
});
|
||||||
|
@ -110,9 +126,11 @@ private:
|
||||||
true);
|
true);
|
||||||
},
|
},
|
||||||
[this, begin, end, results, cont]() {
|
[this, begin, end, results, cont]() {
|
||||||
ASSERT(results->size() > 0);
|
if (seenReadSuccess) {
|
||||||
ASSERT(results->front().key <= begin);
|
ASSERT(results->size() > 0);
|
||||||
ASSERT(results->back().value >= end);
|
ASSERT(results->front().key <= begin);
|
||||||
|
ASSERT(results->back().value >= end);
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < results->size(); i++) {
|
for (int i = 0; i < results->size(); i++) {
|
||||||
// no empty or inverted ranges
|
// no empty or inverted ranges
|
||||||
|
|
|
@ -20,12 +20,19 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import subprocess
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
from subprocess import Popen, TimeoutExpired
|
from subprocess import Popen, TimeoutExpired
|
||||||
import logging
|
import logging
|
||||||
import signal
|
import signal
|
||||||
|
from pathlib import Path
|
||||||
|
import glob
|
||||||
|
import random
|
||||||
|
import string
|
||||||
|
|
||||||
|
|
||||||
|
def random_string(len):
|
||||||
|
return ''.join(random.choice(string.ascii_letters + string.digits) for i in range(len))
|
||||||
|
|
||||||
|
|
||||||
def get_logger():
|
def get_logger():
|
||||||
|
@ -48,6 +55,14 @@ def initialize_logger_level(logging_level):
|
||||||
logger.setLevel(logging.ERROR)
|
logger.setLevel(logging.ERROR)
|
||||||
|
|
||||||
|
|
||||||
|
def dump_client_logs(log_dir):
|
||||||
|
for log_file in glob.glob(os.path.join(log_dir, "*")):
|
||||||
|
print(">>>>>>>>>>>>>>>>>>>> Contents of {}:".format(log_file))
|
||||||
|
with open(log_file, "r") as f:
|
||||||
|
print(f.read())
|
||||||
|
print(">>>>>>>>>>>>>>>>>>>> End of {}:".format(log_file))
|
||||||
|
|
||||||
|
|
||||||
def run_tester(args, test_file):
|
def run_tester(args, test_file):
|
||||||
cmd = [args.tester_binary,
|
cmd = [args.tester_binary,
|
||||||
"--cluster-file", args.cluster_file,
|
"--cluster-file", args.cluster_file,
|
||||||
|
@ -56,6 +71,12 @@ def run_tester(args, test_file):
|
||||||
cmd += ["--external-client-library", args.external_client_library]
|
cmd += ["--external-client-library", args.external_client_library]
|
||||||
if args.tmp_dir is not None:
|
if args.tmp_dir is not None:
|
||||||
cmd += ["--tmp-dir", args.tmp_dir]
|
cmd += ["--tmp-dir", args.tmp_dir]
|
||||||
|
log_dir = None
|
||||||
|
if args.log_dir is not None:
|
||||||
|
log_dir = Path(args.log_dir).joinpath(random_string(8))
|
||||||
|
log_dir.mkdir(exist_ok=True)
|
||||||
|
cmd += ['--log', "--log-dir", str(log_dir)]
|
||||||
|
|
||||||
if args.blob_granule_local_file_path is not None:
|
if args.blob_granule_local_file_path is not None:
|
||||||
cmd += ["--blob-granule-local-file-path",
|
cmd += ["--blob-granule-local-file-path",
|
||||||
args.blob_granule_local_file_path]
|
args.blob_granule_local_file_path]
|
||||||
|
@ -63,6 +84,7 @@ def run_tester(args, test_file):
|
||||||
get_logger().info('\nRunning tester \'%s\'...' % ' '.join(cmd))
|
get_logger().info('\nRunning tester \'%s\'...' % ' '.join(cmd))
|
||||||
proc = Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
|
proc = Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
|
||||||
timed_out = False
|
timed_out = False
|
||||||
|
ret_code = 1
|
||||||
try:
|
try:
|
||||||
ret_code = proc.wait(args.timeout)
|
ret_code = proc.wait(args.timeout)
|
||||||
except TimeoutExpired:
|
except TimeoutExpired:
|
||||||
|
@ -72,15 +94,16 @@ def run_tester(args, test_file):
|
||||||
raise Exception('Unable to run tester (%s)' % e)
|
raise Exception('Unable to run tester (%s)' % e)
|
||||||
|
|
||||||
if ret_code != 0:
|
if ret_code != 0:
|
||||||
if ret_code < 0:
|
if timed_out:
|
||||||
|
reason = 'timed out after %d seconds' % args.timeout
|
||||||
|
elif ret_code < 0:
|
||||||
reason = signal.Signals(-ret_code).name
|
reason = signal.Signals(-ret_code).name
|
||||||
else:
|
else:
|
||||||
reason = 'exit code: %d' % ret_code
|
reason = 'exit code: %d' % ret_code
|
||||||
if timed_out:
|
|
||||||
reason = 'timed out after %d seconds' % args.timeout
|
|
||||||
ret_code = 1
|
|
||||||
get_logger().error('\n\'%s\' did not complete succesfully (%s)' %
|
get_logger().error('\n\'%s\' did not complete succesfully (%s)' %
|
||||||
(cmd[0], reason))
|
(cmd[0], reason))
|
||||||
|
if (log_dir is not None):
|
||||||
|
dump_client_logs(log_dir)
|
||||||
|
|
||||||
get_logger().info('')
|
get_logger().info('')
|
||||||
return ret_code
|
return ret_code
|
||||||
|
@ -115,6 +138,8 @@ def parse_args(argv):
|
||||||
help='Path to a directory with test definitions. (default: ./)')
|
help='Path to a directory with test definitions. (default: ./)')
|
||||||
parser.add_argument('--timeout', type=int, default=300,
|
parser.add_argument('--timeout', type=int, default=300,
|
||||||
help='The timeout in seconds for running each individual test. (default 300)')
|
help='The timeout in seconds for running each individual test. (default 300)')
|
||||||
|
parser.add_argument('--log-dir', type=str, default=None,
|
||||||
|
help='The directory for storing logs (default: None)')
|
||||||
parser.add_argument('--logging-level', type=str, default='INFO',
|
parser.add_argument('--logging-level', type=str, default='INFO',
|
||||||
choices=['ERROR', 'WARNING', 'INFO', 'DEBUG'], help='Specifies the level of detail in the tester output (default=\'INFO\').')
|
choices=['ERROR', 'WARNING', 'INFO', 'DEBUG'], help='Specifies the level of detail in the tester output (default=\'INFO\').')
|
||||||
parser.add_argument('--tmp-dir', type=str, default=None,
|
parser.add_argument('--tmp-dir', type=str, default=None,
|
||||||
|
|
|
@ -0,0 +1,561 @@
|
||||||
|
/*
|
||||||
|
* fdb_api.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef FDB_API_HPP
|
||||||
|
#define FDB_API_HPP
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef FDB_API_VERSION
|
||||||
|
#define FDB_API_VERSION 720
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <string>
|
||||||
|
#include <string_view>
|
||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
// introduce the option enums
|
||||||
|
#include <fdb_c_options.g.h>
|
||||||
|
|
||||||
|
namespace fdb {
|
||||||
|
|
||||||
|
// hide C API to discourage mixing C/C++ API
|
||||||
|
namespace native {
|
||||||
|
#include <foundationdb/fdb_c.h>
|
||||||
|
}
|
||||||
|
|
||||||
|
using ByteString = std::basic_string<uint8_t>;
|
||||||
|
using BytesRef = std::basic_string_view<uint8_t>;
|
||||||
|
using CharsRef = std::string_view;
|
||||||
|
using KeyRef = BytesRef;
|
||||||
|
using ValueRef = BytesRef;
|
||||||
|
|
||||||
|
inline uint8_t const* toBytePtr(char const* ptr) noexcept {
|
||||||
|
return reinterpret_cast<uint8_t const*>(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// get bytestring view from charstring: e.g. std::basic_string{_view}<char>
|
||||||
|
template <template <class...> class StringLike, class Char>
|
||||||
|
BytesRef toBytesRef(const StringLike<Char>& s) noexcept {
|
||||||
|
static_assert(sizeof(Char) == 1);
|
||||||
|
return BytesRef(reinterpret_cast<uint8_t const*>(s.data()), s.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
// get charstring view from bytestring: e.g. std::basic_string{_view}<uint8_t>
|
||||||
|
template <template <class...> class StringLike, class Char>
|
||||||
|
CharsRef toCharsRef(const StringLike<Char>& s) noexcept {
|
||||||
|
static_assert(sizeof(Char) == 1);
|
||||||
|
return CharsRef(reinterpret_cast<char const*>(s.data()), s.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
[[maybe_unused]] constexpr const bool OverflowCheck = false;
|
||||||
|
|
||||||
|
inline int intSize(BytesRef b) {
|
||||||
|
if constexpr (OverflowCheck) {
|
||||||
|
if (b.size() > static_cast<size_t>(std::numeric_limits<int>::max()))
|
||||||
|
throw std::overflow_error("byte strlen goes beyond int bounds");
|
||||||
|
}
|
||||||
|
return static_cast<int>(b.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
class Error {
|
||||||
|
public:
|
||||||
|
using CodeType = native::fdb_error_t;
|
||||||
|
|
||||||
|
Error() noexcept : err(0) {}
|
||||||
|
|
||||||
|
explicit Error(CodeType err) noexcept : err(err) {}
|
||||||
|
|
||||||
|
char const* what() noexcept { return native::fdb_get_error(err); }
|
||||||
|
|
||||||
|
explicit operator bool() const noexcept { return err != 0; }
|
||||||
|
|
||||||
|
bool is(CodeType other) const noexcept { return err == other; }
|
||||||
|
|
||||||
|
CodeType code() const noexcept { return err; }
|
||||||
|
|
||||||
|
bool retryable() const noexcept { return native::fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err) != 0; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
CodeType err;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Traits of value types held by ready futures.
|
||||||
|
Holds type and value extraction function. */
|
||||||
|
namespace future_var {
|
||||||
|
struct None {
|
||||||
|
struct Type {};
|
||||||
|
static Error extract(native::FDBFuture*, Type&) noexcept { return Error(0); }
|
||||||
|
};
|
||||||
|
struct Int64 {
|
||||||
|
using Type = int64_t;
|
||||||
|
static Error extract(native::FDBFuture* f, Type& out) noexcept {
|
||||||
|
return Error(native::fdb_future_get_int64(f, &out));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
struct Key {
|
||||||
|
using Type = std::pair<uint8_t const*, int>;
|
||||||
|
static Error extract(native::FDBFuture* f, Type& out) noexcept {
|
||||||
|
auto& [out_key, out_key_length] = out;
|
||||||
|
return Error(native::fdb_future_get_key(f, &out_key, &out_key_length));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
struct Value {
|
||||||
|
using Type = std::tuple<bool, uint8_t const*, int>;
|
||||||
|
static Error extract(native::FDBFuture* f, Type& out) noexcept {
|
||||||
|
auto& [out_present, out_value, out_value_length] = out;
|
||||||
|
auto out_present_native = native::fdb_bool_t{};
|
||||||
|
auto err = native::fdb_future_get_value(f, &out_present_native, &out_value, &out_value_length);
|
||||||
|
out_present = (out_present_native != 0);
|
||||||
|
return Error(err);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
struct StringArray {
|
||||||
|
using Type = std::pair<const char**, int>;
|
||||||
|
static Error extract(native::FDBFuture* f, Type& out) noexcept {
|
||||||
|
auto& [out_strings, out_count] = out;
|
||||||
|
return Error(native::fdb_future_get_string_array(f, &out_strings, &out_count));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
struct KeyValueArray {
|
||||||
|
using Type = std::tuple<native::FDBKeyValue const*, int, bool>;
|
||||||
|
static Error extract(native::FDBFuture* f, Type& out) noexcept {
|
||||||
|
auto& [out_kv, out_count, out_more] = out;
|
||||||
|
auto out_more_native = native::fdb_bool_t{};
|
||||||
|
auto err = native::fdb_future_get_keyvalue_array(f, &out_kv, &out_count, &out_more_native);
|
||||||
|
out_more = (out_more_native != 0);
|
||||||
|
return Error(err);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace future_var
|
||||||
|
|
||||||
|
[[noreturn]] inline void throwError(std::string_view preamble, Error err) {
|
||||||
|
auto msg = std::string(preamble);
|
||||||
|
msg.append(err.what());
|
||||||
|
throw std::runtime_error(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int maxApiVersion() {
|
||||||
|
return native::fdb_get_max_api_version();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Error selectApiVersionNothrow(int version) {
|
||||||
|
return Error(native::fdb_select_api_version(version));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void selectApiVersion(int version) {
|
||||||
|
if (auto err = selectApiVersionNothrow(version)) {
|
||||||
|
throwError(fmt::format("ERROR: fdb_select_api_version({}): ", version), err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace network {
|
||||||
|
|
||||||
|
inline Error setOptionNothrow(FDBNetworkOption option, BytesRef str) noexcept {
|
||||||
|
return Error(native::fdb_network_set_option(option, str.data(), intSize(str)));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Error setOptionNothrow(FDBNetworkOption option, int64_t value) noexcept {
|
||||||
|
return Error(native::fdb_network_set_option(
|
||||||
|
option, reinterpret_cast<const uint8_t*>(&value), static_cast<int>(sizeof(value))));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void setOption(FDBNetworkOption option, BytesRef str) {
|
||||||
|
if (auto err = setOptionNothrow(option, str)) {
|
||||||
|
throwError(fmt::format("ERROR: fdb_network_set_option({}): ",
|
||||||
|
static_cast<std::underlying_type_t<FDBNetworkOption>>(option)),
|
||||||
|
err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void setOption(FDBNetworkOption option, int64_t value) {
|
||||||
|
if (auto err = setOptionNothrow(option, value)) {
|
||||||
|
throwError(fmt::format("ERROR: fdb_network_set_option({}, {}): ",
|
||||||
|
static_cast<std::underlying_type_t<FDBNetworkOption>>(option),
|
||||||
|
value),
|
||||||
|
err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Error setupNothrow() noexcept {
|
||||||
|
return Error(native::fdb_setup_network());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void setup() {
|
||||||
|
if (auto err = setupNothrow())
|
||||||
|
throwError("ERROR: fdb_network_setup(): ", err);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Error run() {
|
||||||
|
return Error(native::fdb_run_network());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Error stop() {
|
||||||
|
return Error(native::fdb_stop_network());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace network
|
||||||
|
|
||||||
|
class Transaction;
|
||||||
|
class Database;
|
||||||
|
|
||||||
|
class Result {
|
||||||
|
friend class Transaction;
|
||||||
|
std::shared_ptr<native::FDBResult> r;
|
||||||
|
|
||||||
|
Result(native::FDBResult* result) {
|
||||||
|
if (result)
|
||||||
|
r = std::shared_ptr<native::FDBResult>(result, &native::fdb_result_destroy);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
using KeyValueArray = future_var::KeyValueArray::Type;
|
||||||
|
|
||||||
|
Error getKeyValueArrayNothrow(KeyValueArray& out) const noexcept {
|
||||||
|
auto out_more_native = native::fdb_bool_t{};
|
||||||
|
auto& [out_kv, out_count, out_more] = out;
|
||||||
|
auto err_raw = native::fdb_result_get_keyvalue_array(r.get(), &out_kv, &out_count, &out_more_native);
|
||||||
|
out_more = out_more_native != 0;
|
||||||
|
return Error(err_raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
KeyValueArray getKeyValueArray() const {
|
||||||
|
auto ret = KeyValueArray{};
|
||||||
|
if (auto err = getKeyValueArrayNothrow(ret))
|
||||||
|
throwError("ERROR: result_get_keyvalue_array(): ", err);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class Future {
|
||||||
|
protected:
|
||||||
|
friend class Transaction;
|
||||||
|
std::shared_ptr<native::FDBFuture> f;
|
||||||
|
|
||||||
|
Future(native::FDBFuture* future) {
|
||||||
|
if (future)
|
||||||
|
f = std::shared_ptr<native::FDBFuture>(future, &native::fdb_future_destroy);
|
||||||
|
}
|
||||||
|
|
||||||
|
// wrap any capturing lambda as callback passable to fdb_future_set_callback().
|
||||||
|
// destroy after invocation.
|
||||||
|
template <class Fn>
|
||||||
|
static void callback(native::FDBFuture*, void* param) {
|
||||||
|
auto fp = static_cast<Fn*>(param);
|
||||||
|
try {
|
||||||
|
(*fp)();
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
fmt::print(stderr, "ERROR: Exception thrown in user callback: {}", e.what());
|
||||||
|
}
|
||||||
|
delete fp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// set as callback user-defined completion handler of signature void(Future)
|
||||||
|
template <class FutureType, class UserFunc>
|
||||||
|
void then(UserFunc&& fn) {
|
||||||
|
auto cb = [fut = FutureType(*this), fn = std::forward<UserFunc>(fn)]() { fn(fut); };
|
||||||
|
using cb_type = std::decay_t<decltype(cb)>;
|
||||||
|
auto fp = new cb_type(std::move(cb));
|
||||||
|
if (auto err = Error(native::fdb_future_set_callback(f.get(), &callback<cb_type>, fp))) {
|
||||||
|
throwError("ERROR: future_set_callback: ", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
Future() noexcept : Future(nullptr) {}
|
||||||
|
Future(const Future&) noexcept = default;
|
||||||
|
Future& operator=(const Future&) noexcept = default;
|
||||||
|
|
||||||
|
bool valid() const noexcept { return f != nullptr; }
|
||||||
|
|
||||||
|
explicit operator bool() const noexcept { return valid(); }
|
||||||
|
|
||||||
|
bool ready() const noexcept {
|
||||||
|
assert(valid());
|
||||||
|
return native::fdb_future_is_ready(f.get()) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Error blockUntilReady() const noexcept {
|
||||||
|
assert(valid());
|
||||||
|
return Error(native::fdb_future_block_until_ready(f.get()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Error error() const noexcept {
|
||||||
|
assert(valid());
|
||||||
|
return Error(native::fdb_future_get_error(f.get()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void cancel() noexcept { native::fdb_future_cancel(f.get()); }
|
||||||
|
|
||||||
|
template <class VarTraits>
|
||||||
|
typename VarTraits::Type get() const {
|
||||||
|
assert(valid());
|
||||||
|
assert(!error());
|
||||||
|
auto out = typename VarTraits::Type{};
|
||||||
|
if (auto err = VarTraits::extract(f.get(), out)) {
|
||||||
|
throwError("future_get: ", err);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class VarTraits>
|
||||||
|
Error getNothrow(typename VarTraits::Type& var) const noexcept {
|
||||||
|
assert(valid());
|
||||||
|
assert(!error());
|
||||||
|
auto out = typename VarTraits::Type{};
|
||||||
|
return VarTraits::extract(f.get(), out);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class UserFunc>
|
||||||
|
void then(UserFunc&& fn) {
|
||||||
|
then<Future>(std::forward<UserFunc>(fn));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename VarTraits>
|
||||||
|
class TypedFuture : public Future {
|
||||||
|
friend class Future;
|
||||||
|
friend class Transaction;
|
||||||
|
using SelfType = TypedFuture<VarTraits>;
|
||||||
|
using Future::Future;
|
||||||
|
// hide type-unsafe inherited functions
|
||||||
|
using Future::get;
|
||||||
|
using Future::getNothrow;
|
||||||
|
using Future::then;
|
||||||
|
TypedFuture(const Future& f) noexcept : Future(f) {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
using ContainedType = typename VarTraits::Type;
|
||||||
|
|
||||||
|
Future eraseType() const noexcept { return static_cast<Future const&>(*this); }
|
||||||
|
|
||||||
|
ContainedType get() const { return get<VarTraits>(); }
|
||||||
|
|
||||||
|
Error getNothrow(ContainedType& out) const noexcept { return getNothrow<VarTraits>(out); }
|
||||||
|
|
||||||
|
template <class UserFunc>
|
||||||
|
void then(UserFunc&& fn) {
|
||||||
|
Future::then<SelfType>(std::forward<UserFunc>(fn));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct KeySelector {
|
||||||
|
const uint8_t* key;
|
||||||
|
int keyLength;
|
||||||
|
bool orEqual;
|
||||||
|
int offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace key_select {
|
||||||
|
|
||||||
|
inline KeySelector firstGreaterThan(KeyRef key, int offset = 0) {
|
||||||
|
return KeySelector{ FDB_KEYSEL_FIRST_GREATER_THAN(key.data(), intSize(key)) + offset };
|
||||||
|
}
|
||||||
|
|
||||||
|
inline KeySelector firstGreaterOrEqual(KeyRef key, int offset = 0) {
|
||||||
|
return KeySelector{ FDB_KEYSEL_FIRST_GREATER_OR_EQUAL(key.data(), intSize(key)) + offset };
|
||||||
|
}
|
||||||
|
|
||||||
|
inline KeySelector lastLessThan(KeyRef key, int offset = 0) {
|
||||||
|
return KeySelector{ FDB_KEYSEL_LAST_LESS_THAN(key.data(), intSize(key)) + offset };
|
||||||
|
}
|
||||||
|
|
||||||
|
inline KeySelector lastLessOrEqual(KeyRef key, int offset = 0) {
|
||||||
|
return KeySelector{ FDB_KEYSEL_LAST_LESS_OR_EQUAL(key.data(), intSize(key)) + offset };
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace key_select
|
||||||
|
|
||||||
|
class Transaction {
|
||||||
|
friend class Database;
|
||||||
|
std::shared_ptr<native::FDBTransaction> tr;
|
||||||
|
|
||||||
|
explicit Transaction(native::FDBTransaction* tr_raw) {
|
||||||
|
if (tr_raw)
|
||||||
|
tr = std::shared_ptr<native::FDBTransaction>(tr_raw, &native::fdb_transaction_destroy);
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
Transaction() noexcept : Transaction(nullptr) {}
|
||||||
|
Transaction(const Transaction&) noexcept = default;
|
||||||
|
Transaction& operator=(const Transaction&) noexcept = default;
|
||||||
|
|
||||||
|
bool valid() const noexcept { return tr != nullptr; }
|
||||||
|
|
||||||
|
explicit operator bool() const noexcept { return valid(); }
|
||||||
|
|
||||||
|
Error setOptionNothrow(FDBTransactionOption option, int64_t value) noexcept {
|
||||||
|
return Error(native::fdb_transaction_set_option(
|
||||||
|
tr.get(), option, reinterpret_cast<const uint8_t*>(&value), static_cast<int>(sizeof(value))));
|
||||||
|
}
|
||||||
|
|
||||||
|
Error setOptionNothrow(FDBTransactionOption option, BytesRef str) noexcept {
|
||||||
|
return Error(native::fdb_transaction_set_option(tr.get(), option, str.data(), intSize(str)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void setOption(FDBTransactionOption option, int64_t value) {
|
||||||
|
if (auto err = setOptionNothrow(option, value)) {
|
||||||
|
throwError(fmt::format("transaction_set_option({}, {}) returned error: ",
|
||||||
|
static_cast<std::underlying_type_t<FDBTransactionOption>>(option),
|
||||||
|
value),
|
||||||
|
err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void setOption(FDBTransactionOption option, BytesRef str) {
|
||||||
|
if (auto err = setOptionNothrow(option, str)) {
|
||||||
|
throwError(fmt::format("transaction_set_option({}) returned error: ",
|
||||||
|
static_cast<std::underlying_type_t<FDBTransactionOption>>(option)),
|
||||||
|
err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TypedFuture<future_var::Int64> getReadVersion() { return native::fdb_transaction_get_read_version(tr.get()); }
|
||||||
|
|
||||||
|
Error getCommittedVersionNothrow(int64_t& out) {
|
||||||
|
return Error(native::fdb_transaction_get_committed_version(tr.get(), &out));
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t getCommittedVersion() {
|
||||||
|
auto out = int64_t{};
|
||||||
|
if (auto err = getCommittedVersionNothrow(out)) {
|
||||||
|
throwError("get_committed_version: ", err);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
TypedFuture<future_var::Key> getKey(KeySelector sel, bool snapshot) {
|
||||||
|
return native::fdb_transaction_get_key(tr.get(), sel.key, sel.keyLength, sel.orEqual, sel.offset, snapshot);
|
||||||
|
}
|
||||||
|
|
||||||
|
TypedFuture<future_var::Value> get(KeyRef key, bool snapshot) {
|
||||||
|
return native::fdb_transaction_get(tr.get(), key.data(), intSize(key), snapshot);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Usage: tx.getRange(key_select::firstGreaterOrEqual(firstKey), key_select::lastLessThan(lastKey), ...)
|
||||||
|
// gets key-value pairs in key range [begin, end)
|
||||||
|
TypedFuture<future_var::KeyValueArray> getRange(KeySelector first,
|
||||||
|
KeySelector last,
|
||||||
|
int limit,
|
||||||
|
int target_bytes,
|
||||||
|
FDBStreamingMode mode,
|
||||||
|
int iteration,
|
||||||
|
bool snapshot,
|
||||||
|
bool reverse) {
|
||||||
|
return native::fdb_transaction_get_range(tr.get(),
|
||||||
|
first.key,
|
||||||
|
first.keyLength,
|
||||||
|
first.orEqual,
|
||||||
|
first.offset,
|
||||||
|
last.key,
|
||||||
|
last.keyLength,
|
||||||
|
last.orEqual,
|
||||||
|
last.offset,
|
||||||
|
limit,
|
||||||
|
target_bytes,
|
||||||
|
mode,
|
||||||
|
iteration,
|
||||||
|
snapshot,
|
||||||
|
reverse);
|
||||||
|
}
|
||||||
|
|
||||||
|
Result readBlobGranules(KeyRef begin,
|
||||||
|
KeyRef end,
|
||||||
|
int64_t begin_version,
|
||||||
|
int64_t read_version,
|
||||||
|
native::FDBReadBlobGranuleContext context) {
|
||||||
|
return Result(native::fdb_transaction_read_blob_granules(
|
||||||
|
tr.get(), begin.data(), intSize(begin), end.data(), intSize(end), begin_version, read_version, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
TypedFuture<future_var::None> commit() { return native::fdb_transaction_commit(tr.get()); }
|
||||||
|
|
||||||
|
TypedFuture<future_var::None> onError(Error err) { return native::fdb_transaction_on_error(tr.get(), err.code()); }
|
||||||
|
|
||||||
|
void reset() { return native::fdb_transaction_reset(tr.get()); }
|
||||||
|
|
||||||
|
void set(KeyRef key, ValueRef value) {
|
||||||
|
native::fdb_transaction_set(tr.get(), key.data(), intSize(key), value.data(), intSize(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear(KeyRef key) { native::fdb_transaction_clear(tr.get(), key.data(), intSize(key)); }
|
||||||
|
|
||||||
|
void clearRange(KeyRef begin, KeyRef end) {
|
||||||
|
native::fdb_transaction_clear_range(tr.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class Database {
|
||||||
|
std::shared_ptr<native::FDBDatabase> db;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Database(const Database&) noexcept = default;
|
||||||
|
Database& operator=(const Database&) noexcept = default;
|
||||||
|
Database(const std::string& cluster_file_path) : db(nullptr) {
|
||||||
|
auto db_raw = static_cast<native::FDBDatabase*>(nullptr);
|
||||||
|
if (auto err = Error(native::fdb_create_database(cluster_file_path.c_str(), &db_raw)))
|
||||||
|
throwError(fmt::format("Failed to create database with '{}': ", cluster_file_path), err);
|
||||||
|
db = std::shared_ptr<native::FDBDatabase>(db_raw, &native::fdb_database_destroy);
|
||||||
|
}
|
||||||
|
Database() noexcept : db(nullptr) {}
|
||||||
|
|
||||||
|
Error setOptionNothrow(FDBDatabaseOption option, int64_t value) noexcept {
|
||||||
|
return Error(native::fdb_database_set_option(
|
||||||
|
db.get(), option, reinterpret_cast<const uint8_t*>(&value), static_cast<int>(sizeof(value))));
|
||||||
|
}
|
||||||
|
|
||||||
|
Error setOptionNothrow(FDBDatabaseOption option, BytesRef str) noexcept {
|
||||||
|
return Error(native::fdb_database_set_option(db.get(), option, str.data(), intSize(str)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void setOption(FDBDatabaseOption option, int64_t value) {
|
||||||
|
if (auto err = setOptionNothrow(option, value)) {
|
||||||
|
throwError(fmt::format("database_set_option({}, {}) returned error: ",
|
||||||
|
static_cast<std::underlying_type_t<FDBDatabaseOption>>(option),
|
||||||
|
value),
|
||||||
|
err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void setOption(FDBDatabaseOption option, BytesRef str) {
|
||||||
|
if (auto err = setOptionNothrow(option, str)) {
|
||||||
|
throwError(fmt::format("database_set_option({}) returned error: ",
|
||||||
|
static_cast<std::underlying_type_t<FDBDatabaseOption>>(option)),
|
||||||
|
err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Transaction createTransaction() {
|
||||||
|
if (!db)
|
||||||
|
throw std::runtime_error("create_transaction from null database");
|
||||||
|
auto tx_native = static_cast<native::FDBTransaction*>(nullptr);
|
||||||
|
auto err = Error(native::fdb_database_create_transaction(db.get(), &tx_native));
|
||||||
|
if (err)
|
||||||
|
throwError("Failed to create transaction: ", err);
|
||||||
|
return Transaction(tx_native);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace fdb
|
||||||
|
|
||||||
|
#endif /*FDB_API_HPP*/
|
|
@ -0,0 +1,288 @@
|
||||||
|
/*
|
||||||
|
* async.cpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <boost/asio.hpp>
|
||||||
|
#include "async.hpp"
|
||||||
|
#include "future.hpp"
|
||||||
|
#include "logger.hpp"
|
||||||
|
#include "operations.hpp"
|
||||||
|
#include "stats.hpp"
|
||||||
|
#include "time.hpp"
|
||||||
|
#include "utils.hpp"
|
||||||
|
|
||||||
|
extern thread_local mako::Logger logr;
|
||||||
|
|
||||||
|
using namespace fdb;
|
||||||
|
|
||||||
|
namespace mako {
|
||||||
|
|
||||||
|
void ResumableStateForPopulate::postNextTick() {
|
||||||
|
boost::asio::post(io_context, [this, state = shared_from_this()]() { runOneTick(); });
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResumableStateForPopulate::runOneTick() {
|
||||||
|
const auto num_commit_every = args.txnspec.ops[OP_INSERT][OP_COUNT];
|
||||||
|
for (auto i = key_checkpoint; i <= key_end; i++) {
|
||||||
|
genKey(keystr.data(), KEY_PREFIX, args, i);
|
||||||
|
randomString(valstr.data(), args.value_length);
|
||||||
|
tx.set(keystr, valstr);
|
||||||
|
stats.incrOpCount(OP_INSERT);
|
||||||
|
if (i == key_end || (i - key_begin + 1) % num_commit_every == 0) {
|
||||||
|
watch_commit.start();
|
||||||
|
tx.commit().then([this, state = shared_from_this(), i](Future f) {
|
||||||
|
if (auto err = f.error()) {
|
||||||
|
logr.printWithLogLevel(err.retryable() ? VERBOSE_WARN : VERBOSE_NONE,
|
||||||
|
"ERROR",
|
||||||
|
"commit for populate returned '{}'",
|
||||||
|
err.what());
|
||||||
|
tx.onError(err).then([this, state = shared_from_this()](Future f) {
|
||||||
|
const auto f_rc = handleForOnError(tx, f, "ON_ERROR_FOR_POPULATE");
|
||||||
|
if (f_rc == FutureRC::ABORT) {
|
||||||
|
signalEnd();
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
postNextTick();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// successfully committed
|
||||||
|
watch_commit.stop();
|
||||||
|
watch_tx.setStop(watch_commit.getStop());
|
||||||
|
if (stats.getOpCount(OP_TRANSACTION) % args.sampling == 0) {
|
||||||
|
const auto commit_latency = watch_commit.diff();
|
||||||
|
const auto tx_duration = watch_tx.diff();
|
||||||
|
stats.addLatency(OP_COMMIT, commit_latency);
|
||||||
|
stats.addLatency(OP_TRANSACTION, tx_duration);
|
||||||
|
sample_bins[OP_COMMIT].put(commit_latency);
|
||||||
|
sample_bins[OP_TRANSACTION].put(tx_duration);
|
||||||
|
}
|
||||||
|
stats.incrOpCount(OP_COMMIT);
|
||||||
|
stats.incrOpCount(OP_TRANSACTION);
|
||||||
|
tx.reset();
|
||||||
|
watch_tx.startFromStop();
|
||||||
|
key_checkpoint = i + 1;
|
||||||
|
if (i != key_end) {
|
||||||
|
postNextTick();
|
||||||
|
} else {
|
||||||
|
logr.debug("Populated {} rows [{}, {}]: {:6.3f} sec",
|
||||||
|
key_end - key_begin + 1,
|
||||||
|
key_begin,
|
||||||
|
key_end,
|
||||||
|
toDoubleSeconds(watch_total.stop().diff()));
|
||||||
|
signalEnd();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResumableStateForRunWorkload::postNextTick() {
|
||||||
|
boost::asio::post(io_context, [this, state = shared_from_this()]() { runOneTick(); });
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResumableStateForRunWorkload::runOneTick() {
|
||||||
|
assert(iter != OpEnd);
|
||||||
|
if (iter.step == 0 /* first step */)
|
||||||
|
prepareKeys(iter.op, key1, key2, args);
|
||||||
|
watch_step.start();
|
||||||
|
if (iter.step == 0)
|
||||||
|
watch_op = Stopwatch(watch_step.getStart());
|
||||||
|
auto f = Future{};
|
||||||
|
// to minimize context switch overhead, repeat immediately completed ops
|
||||||
|
// in a loop, not an async continuation.
|
||||||
|
repeat_immediate_steps:
|
||||||
|
f = opTable[iter.op].stepFunction(iter.step)(tx, args, key1, key2, val);
|
||||||
|
if (!f) {
|
||||||
|
// immediately completed client-side ops: e.g. set, setrange, clear, clearrange, ...
|
||||||
|
updateStepStats();
|
||||||
|
iter = getOpNext(args, iter);
|
||||||
|
if (iter == OpEnd)
|
||||||
|
onTransactionSuccess();
|
||||||
|
else
|
||||||
|
goto repeat_immediate_steps;
|
||||||
|
} else {
|
||||||
|
// step is blocking. register a continuation and return
|
||||||
|
f.then([this, state = shared_from_this()](Future f) {
|
||||||
|
if (auto postStepFn = opTable[iter.op].postStepFunction(iter.step))
|
||||||
|
postStepFn(f, tx, args, key1, key2, val);
|
||||||
|
if (iter.stepKind() != StepKind::ON_ERROR) {
|
||||||
|
if (auto err = f.error()) {
|
||||||
|
logr.printWithLogLevel(err.retryable() ? VERBOSE_WARN : VERBOSE_NONE,
|
||||||
|
"ERROR",
|
||||||
|
"{}:{} returned '{}'",
|
||||||
|
iter.opName(),
|
||||||
|
iter.step,
|
||||||
|
err.what());
|
||||||
|
tx.onError(err).then([this, state = shared_from_this()](Future f) {
|
||||||
|
const auto rc = handleForOnError(tx, f, fmt::format("{}:{}", iter.opName(), iter.step));
|
||||||
|
if (rc == FutureRC::RETRY) {
|
||||||
|
stats.incrErrorCount(iter.op);
|
||||||
|
} else if (rc == FutureRC::CONFLICT) {
|
||||||
|
stats.incrConflictCount();
|
||||||
|
} else if (rc == FutureRC::ABORT) {
|
||||||
|
tx.reset();
|
||||||
|
signalEnd();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// restart this iteration from beginning
|
||||||
|
iter = getOpBegin(args);
|
||||||
|
needs_commit = false;
|
||||||
|
postNextTick();
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// async step succeeded
|
||||||
|
updateStepStats();
|
||||||
|
iter = getOpNext(args, iter);
|
||||||
|
if (iter == OpEnd) {
|
||||||
|
onTransactionSuccess();
|
||||||
|
} else {
|
||||||
|
postNextTick();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// blob granules op error
|
||||||
|
auto rc = handleForOnError(tx, f, "BG_ON_ERROR");
|
||||||
|
if (rc == FutureRC::RETRY) {
|
||||||
|
stats.incrErrorCount(iter.op);
|
||||||
|
} else if (rc == FutureRC::CONFLICT) {
|
||||||
|
stats.incrConflictCount();
|
||||||
|
} else if (rc == FutureRC::ABORT) {
|
||||||
|
tx.reset();
|
||||||
|
stopcount.fetch_add(1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
iter = getOpBegin(args);
|
||||||
|
needs_commit = false;
|
||||||
|
// restart this iteration from beginning
|
||||||
|
postNextTick();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResumableStateForRunWorkload::updateStepStats() {
|
||||||
|
logr.debug("Step {}:{} succeeded", iter.opName(), iter.step);
|
||||||
|
// step successful
|
||||||
|
watch_step.stop();
|
||||||
|
const auto do_sample = stats.getOpCount(OP_TRANSACTION) % args.sampling == 0;
|
||||||
|
if (iter.stepKind() == StepKind::COMMIT) {
|
||||||
|
// reset transaction boundary
|
||||||
|
const auto step_latency = watch_step.diff();
|
||||||
|
if (do_sample) {
|
||||||
|
stats.addLatency(OP_COMMIT, step_latency);
|
||||||
|
sample_bins[OP_COMMIT].put(step_latency);
|
||||||
|
}
|
||||||
|
tx.reset();
|
||||||
|
stats.incrOpCount(OP_COMMIT);
|
||||||
|
needs_commit = false;
|
||||||
|
}
|
||||||
|
// op completed successfully
|
||||||
|
if (iter.step + 1 == opTable[iter.op].steps()) {
|
||||||
|
if (opTable[iter.op].needsCommit())
|
||||||
|
needs_commit = true;
|
||||||
|
watch_op.setStop(watch_step.getStop());
|
||||||
|
if (do_sample) {
|
||||||
|
const auto op_latency = watch_op.diff();
|
||||||
|
stats.addLatency(iter.op, op_latency);
|
||||||
|
sample_bins[iter.op].put(op_latency);
|
||||||
|
}
|
||||||
|
stats.incrOpCount(iter.op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResumableStateForRunWorkload::onTransactionSuccess() {
|
||||||
|
if (needs_commit || args.commit_get) {
|
||||||
|
// task completed, need to commit before finish
|
||||||
|
watch_commit.start();
|
||||||
|
tx.commit().then([this, state = shared_from_this()](Future f) {
|
||||||
|
if (auto err = f.error()) {
|
||||||
|
// commit had errors
|
||||||
|
logr.printWithLogLevel(err.retryable() ? VERBOSE_WARN : VERBOSE_NONE,
|
||||||
|
"ERROR",
|
||||||
|
"Post-iteration commit returned error: {}",
|
||||||
|
err.what());
|
||||||
|
tx.onError(err).then([this, state = shared_from_this()](Future f) {
|
||||||
|
const auto rc = handleForOnError(tx, f, "ON_ERROR");
|
||||||
|
if (rc == FutureRC::CONFLICT)
|
||||||
|
stats.incrConflictCount();
|
||||||
|
else
|
||||||
|
stats.incrErrorCount(OP_COMMIT);
|
||||||
|
if (rc == FutureRC::ABORT) {
|
||||||
|
signalEnd();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (ended()) {
|
||||||
|
signalEnd();
|
||||||
|
} else {
|
||||||
|
iter = getOpBegin(args);
|
||||||
|
needs_commit = false;
|
||||||
|
postNextTick();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// commit successful
|
||||||
|
watch_commit.stop();
|
||||||
|
watch_tx.setStop(watch_commit.getStop());
|
||||||
|
if (stats.getOpCount(OP_TRANSACTION) % args.sampling == 0) {
|
||||||
|
const auto commit_latency = watch_commit.diff();
|
||||||
|
const auto tx_duration = watch_tx.diff();
|
||||||
|
stats.addLatency(OP_COMMIT, commit_latency);
|
||||||
|
stats.addLatency(OP_TRANSACTION, commit_latency);
|
||||||
|
sample_bins[OP_COMMIT].put(commit_latency);
|
||||||
|
sample_bins[OP_TRANSACTION].put(tx_duration);
|
||||||
|
}
|
||||||
|
stats.incrOpCount(OP_COMMIT);
|
||||||
|
stats.incrOpCount(OP_TRANSACTION);
|
||||||
|
tx.reset();
|
||||||
|
watch_tx.startFromStop();
|
||||||
|
if (ended()) {
|
||||||
|
signalEnd();
|
||||||
|
} else {
|
||||||
|
// start next iteration
|
||||||
|
iter = getOpBegin(args);
|
||||||
|
postNextTick();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// transaction completed but no need to commit
|
||||||
|
watch_tx.stop();
|
||||||
|
if (stats.getOpCount(OP_TRANSACTION) % args.sampling == 0) {
|
||||||
|
const auto tx_duration = watch_tx.diff();
|
||||||
|
stats.addLatency(OP_TRANSACTION, tx_duration);
|
||||||
|
sample_bins[OP_TRANSACTION].put(tx_duration);
|
||||||
|
}
|
||||||
|
stats.incrOpCount(OP_TRANSACTION);
|
||||||
|
watch_tx.startFromStop();
|
||||||
|
tx.reset();
|
||||||
|
if (ended()) {
|
||||||
|
signalEnd();
|
||||||
|
} else {
|
||||||
|
iter = getOpBegin(args);
|
||||||
|
// start next iteration
|
||||||
|
postNextTick();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mako
|
|
@ -0,0 +1,127 @@
|
||||||
|
/*
|
||||||
|
* async.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MAKO_ASYNC_HPP
|
||||||
|
#define MAKO_ASYNC_HPP
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <memory>
|
||||||
|
#include <boost/asio.hpp>
|
||||||
|
#include "logger.hpp"
|
||||||
|
#include "mako.hpp"
|
||||||
|
#include "shm.hpp"
|
||||||
|
#include "stats.hpp"
|
||||||
|
#include "time.hpp"
|
||||||
|
|
||||||
|
namespace mako {
|
||||||
|
|
||||||
|
// as we don't have coroutines yet, we need to store in heap the complete state of execution,
|
||||||
|
// such that we can resume exactly where we were from last database op.
|
||||||
|
struct ResumableStateForPopulate : std::enable_shared_from_this<ResumableStateForPopulate> {
|
||||||
|
Logger logr;
|
||||||
|
fdb::Database db;
|
||||||
|
fdb::Transaction tx;
|
||||||
|
boost::asio::io_context& io_context;
|
||||||
|
Arguments const& args;
|
||||||
|
ThreadStatistics& stats;
|
||||||
|
std::atomic<int>& stopcount;
|
||||||
|
LatencySampleBinArray sample_bins;
|
||||||
|
int key_begin;
|
||||||
|
int key_end;
|
||||||
|
int key_checkpoint;
|
||||||
|
fdb::ByteString keystr;
|
||||||
|
fdb::ByteString valstr;
|
||||||
|
Stopwatch watch_tx;
|
||||||
|
Stopwatch watch_commit;
|
||||||
|
Stopwatch watch_total;
|
||||||
|
|
||||||
|
ResumableStateForPopulate(Logger logr,
|
||||||
|
fdb::Database db,
|
||||||
|
fdb::Transaction tx,
|
||||||
|
boost::asio::io_context& io_context,
|
||||||
|
Arguments const& args,
|
||||||
|
ThreadStatistics& stats,
|
||||||
|
std::atomic<int>& stopcount,
|
||||||
|
int key_begin,
|
||||||
|
int key_end)
|
||||||
|
: logr(logr), db(db), tx(tx), io_context(io_context), args(args), stats(stats), stopcount(stopcount),
|
||||||
|
key_begin(key_begin), key_end(key_end), key_checkpoint(key_begin) {
|
||||||
|
keystr.resize(args.key_length);
|
||||||
|
valstr.resize(args.value_length);
|
||||||
|
}
|
||||||
|
void runOneTick();
|
||||||
|
void postNextTick();
|
||||||
|
void signalEnd() { stopcount.fetch_add(1); }
|
||||||
|
};
|
||||||
|
|
||||||
|
using PopulateStateHandle = std::shared_ptr<ResumableStateForPopulate>;
|
||||||
|
|
||||||
|
struct ResumableStateForRunWorkload : std::enable_shared_from_this<ResumableStateForRunWorkload> {
|
||||||
|
Logger logr;
|
||||||
|
fdb::Database db;
|
||||||
|
fdb::Transaction tx;
|
||||||
|
boost::asio::io_context& io_context;
|
||||||
|
Arguments const& args;
|
||||||
|
ThreadStatistics& stats;
|
||||||
|
std::atomic<int>& stopcount;
|
||||||
|
std::atomic<int> const& signal;
|
||||||
|
int max_iters;
|
||||||
|
OpIterator iter;
|
||||||
|
LatencySampleBinArray sample_bins;
|
||||||
|
fdb::ByteString key1;
|
||||||
|
fdb::ByteString key2;
|
||||||
|
fdb::ByteString val;
|
||||||
|
Stopwatch watch_step;
|
||||||
|
Stopwatch watch_op;
|
||||||
|
Stopwatch watch_commit;
|
||||||
|
Stopwatch watch_tx;
|
||||||
|
bool needs_commit;
|
||||||
|
|
||||||
|
ResumableStateForRunWorkload(Logger logr,
|
||||||
|
fdb::Database db,
|
||||||
|
fdb::Transaction tx,
|
||||||
|
boost::asio::io_context& io_context,
|
||||||
|
Arguments const& args,
|
||||||
|
ThreadStatistics& stats,
|
||||||
|
std::atomic<int>& stopcount,
|
||||||
|
std::atomic<int> const& signal,
|
||||||
|
int max_iters,
|
||||||
|
OpIterator iter)
|
||||||
|
: logr(logr), db(db), tx(tx), io_context(io_context), args(args), stats(stats), stopcount(stopcount),
|
||||||
|
signal(signal), max_iters(max_iters), iter(iter), needs_commit(false) {
|
||||||
|
key1.resize(args.key_length);
|
||||||
|
key2.resize(args.key_length);
|
||||||
|
val.resize(args.value_length);
|
||||||
|
}
|
||||||
|
void signalEnd() noexcept { stopcount.fetch_add(1); }
|
||||||
|
bool ended() noexcept {
|
||||||
|
return (max_iters != -1 && max_iters >= stats.getOpCount(OP_TRANSACTION)) || signal.load() == SIGNAL_RED;
|
||||||
|
}
|
||||||
|
void postNextTick();
|
||||||
|
void runOneTick();
|
||||||
|
void updateStepStats();
|
||||||
|
void onTransactionSuccess();
|
||||||
|
};
|
||||||
|
|
||||||
|
using RunWorkloadStateHandle = std::shared_ptr<ResumableStateForRunWorkload>;
|
||||||
|
|
||||||
|
} // namespace mako
|
||||||
|
|
||||||
|
#endif /*MAKO_ASYNC_HPP*/
|
|
@ -0,0 +1,116 @@
|
||||||
|
/*
|
||||||
|
* blob_granules.cpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "blob_granules.hpp"
|
||||||
|
#include "limit.hpp"
|
||||||
|
#include "logger.hpp"
|
||||||
|
#include <cstdio>
|
||||||
|
#include <fdb_api.hpp>
|
||||||
|
|
||||||
|
extern thread_local mako::Logger logr;
|
||||||
|
|
||||||
|
namespace mako::blob_granules::local_file {
|
||||||
|
|
||||||
|
int64_t startLoad(const char* filename,
|
||||||
|
int filenameLength,
|
||||||
|
int64_t offset,
|
||||||
|
int64_t length,
|
||||||
|
int64_t fullFileLength,
|
||||||
|
void* userContext) {
|
||||||
|
FILE* fp;
|
||||||
|
char full_fname[PATH_MAX]{
|
||||||
|
0,
|
||||||
|
};
|
||||||
|
int loadId;
|
||||||
|
uint8_t* data;
|
||||||
|
size_t readSize;
|
||||||
|
|
||||||
|
auto context = static_cast<UserContext*>(userContext);
|
||||||
|
|
||||||
|
loadId = context->nextId;
|
||||||
|
if (context->dataById[loadId] != 0) {
|
||||||
|
logr.error("too many granule file loads at once: {}", MAX_BG_IDS);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
context->nextId = (context->nextId + 1) % MAX_BG_IDS;
|
||||||
|
|
||||||
|
int ret = snprintf(full_fname, PATH_MAX, "%s%s", context->bgFilePath, filename);
|
||||||
|
if (ret < 0 || ret >= PATH_MAX) {
|
||||||
|
logr.error("BG filename too long: {}{}", context->bgFilePath, filename);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fp = fopen(full_fname, "r");
|
||||||
|
if (!fp) {
|
||||||
|
logr.error("BG could not open file: {}", full_fname);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// don't seek if offset == 0
|
||||||
|
if (offset && fseek(fp, offset, SEEK_SET)) {
|
||||||
|
// if fseek was non-zero, it failed
|
||||||
|
logr.error("BG could not seek to %{} in file {}", offset, full_fname);
|
||||||
|
fclose(fp);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
data = new uint8_t[length];
|
||||||
|
readSize = fread(data, sizeof(uint8_t), length, fp);
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
if (readSize != length) {
|
||||||
|
logr.error("BG could not read {} bytes from file: {}", length, full_fname);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
context->dataById[loadId] = data;
|
||||||
|
return loadId;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t* getLoad(int64_t loadId, void* userContext) {
|
||||||
|
auto context = static_cast<UserContext*>(userContext);
|
||||||
|
if (context->dataById[loadId] == 0) {
|
||||||
|
logr.error("BG loadId invalid for get_load: {}", loadId);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return context->dataById[loadId];
|
||||||
|
}
|
||||||
|
|
||||||
|
void freeLoad(int64_t loadId, void* userContext) {
|
||||||
|
auto context = static_cast<UserContext*>(userContext);
|
||||||
|
if (context->dataById[loadId] == 0) {
|
||||||
|
logr.error("BG loadId invalid for free_load: {}", loadId);
|
||||||
|
}
|
||||||
|
delete[] context->dataById[loadId];
|
||||||
|
context->dataById[loadId] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
fdb::native::FDBReadBlobGranuleContext createApiContext(UserContext& ctx, bool materialize_files) {
|
||||||
|
auto ret = fdb::native::FDBReadBlobGranuleContext{};
|
||||||
|
ret.userContext = &ctx;
|
||||||
|
ret.start_load_f = &startLoad;
|
||||||
|
ret.get_load_f = &getLoad;
|
||||||
|
ret.free_load_f = &freeLoad;
|
||||||
|
ret.debugNoMaterialize = !materialize_files;
|
||||||
|
ret.granuleParallelism = 2; // TODO make knob or setting for changing this?
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mako::blob_granules::local_file
|
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* blob_granules.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MAKO_BLOB_GRANULES_HPP
|
||||||
|
#define MAKO_BLOB_GRANULES_HPP
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
#include <fdb_api.hpp>
|
||||||
|
|
||||||
|
namespace mako::blob_granules::local_file {
|
||||||
|
|
||||||
|
constexpr const int MAX_BG_IDS = 1000;
|
||||||
|
|
||||||
|
// TODO: could always abstract this into something more generically usable by something other than mako.
|
||||||
|
// But outside of testing there are likely few use cases for local granules
|
||||||
|
struct UserContext {
|
||||||
|
char const* bgFilePath;
|
||||||
|
int nextId;
|
||||||
|
std::unique_ptr<uint8_t*[]> dataByIdMem;
|
||||||
|
uint8_t** dataById;
|
||||||
|
|
||||||
|
UserContext(char const* filePath)
|
||||||
|
: bgFilePath(filePath), nextId(0), dataByIdMem(new uint8_t*[MAX_BG_IDS]()), dataById(dataByIdMem.get()) {}
|
||||||
|
|
||||||
|
void clear() { dataByIdMem.reset(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
fdb::native::FDBReadBlobGranuleContext createApiContext(UserContext& ctx, bool materialize_files);
|
||||||
|
|
||||||
|
} // namespace mako::blob_granules::local_file
|
||||||
|
|
||||||
|
#endif /*MAKO_BLOB_GRANULES_HPP*/
|
|
@ -0,0 +1,89 @@
|
||||||
|
/*
|
||||||
|
* future.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MAKO_FUTURE_HPP
|
||||||
|
#define MAKO_FUTURE_HPP
|
||||||
|
|
||||||
|
#include <fdb_api.hpp>
|
||||||
|
#include <cassert>
|
||||||
|
#include <string_view>
|
||||||
|
#include "logger.hpp"
|
||||||
|
#include "macro.hpp"
|
||||||
|
|
||||||
|
extern thread_local mako::Logger logr;
|
||||||
|
|
||||||
|
namespace mako {
|
||||||
|
|
||||||
|
enum class FutureRC { OK, RETRY, CONFLICT, ABORT };
|
||||||
|
|
||||||
|
template <class FutureType>
|
||||||
|
force_inline FutureRC handleForOnError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
|
||||||
|
if (auto err = f.error()) {
|
||||||
|
if (err.is(1020 /*not_committed*/)) {
|
||||||
|
return FutureRC::CONFLICT;
|
||||||
|
} else if (err.retryable()) {
|
||||||
|
logr.warn("Retryable error '{}' found at on_error(), step: {}", err.what(), step);
|
||||||
|
return FutureRC::RETRY;
|
||||||
|
} else {
|
||||||
|
logr.error("Unretryable error '{}' found at on_error(), step: {}", err.what(), step);
|
||||||
|
tx.reset();
|
||||||
|
return FutureRC::ABORT;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return FutureRC::RETRY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class FutureType>
|
||||||
|
force_inline FutureRC waitAndHandleForOnError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
|
||||||
|
assert(f);
|
||||||
|
if (auto err = f.blockUntilReady()) {
|
||||||
|
logr.error("'{}' found while waiting for on_error() future, step: {}", err.what(), step);
|
||||||
|
return FutureRC::ABORT;
|
||||||
|
}
|
||||||
|
return handleForOnError(tx, f, step);
|
||||||
|
}
|
||||||
|
|
||||||
|
// wait on any non-immediate tx-related step to complete. Follow up with on_error().
|
||||||
|
template <class FutureType>
|
||||||
|
force_inline FutureRC waitAndHandleError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
|
||||||
|
assert(f);
|
||||||
|
auto err = fdb::Error{};
|
||||||
|
if ((err = f.blockUntilReady())) {
|
||||||
|
const auto retry = err.retryable();
|
||||||
|
logr.error("{} error '{}' found during step: {}", (retry ? "Retryable" : "Unretryable"), err.what(), step);
|
||||||
|
return retry ? FutureRC::RETRY : FutureRC::ABORT;
|
||||||
|
}
|
||||||
|
err = f.error();
|
||||||
|
if (!err)
|
||||||
|
return FutureRC::OK;
|
||||||
|
if (err.retryable()) {
|
||||||
|
logr.warn("step {} returned '{}'", step, err.what());
|
||||||
|
} else {
|
||||||
|
logr.error("step {} returned '{}'", step, err.what());
|
||||||
|
}
|
||||||
|
// implicit backoff
|
||||||
|
auto follow_up = tx.onError(err);
|
||||||
|
return waitAndHandleForOnError(tx, f, step);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mako
|
||||||
|
|
||||||
|
#endif /*MAKO_FUTURE_HPP*/
|
|
@ -0,0 +1,32 @@
|
||||||
|
/*
|
||||||
|
* limit.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIMIT_HPP
|
||||||
|
#define LIMIT_HPP
|
||||||
|
|
||||||
|
#if defined(__linux__)
|
||||||
|
#include <linux/limits.h>
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#include <sys/syslimits.h>
|
||||||
|
#else
|
||||||
|
#include <limits.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,117 @@
|
||||||
|
/*
|
||||||
|
* logger.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MAKO_LOGGER_HPP
|
||||||
|
#define MAKO_LOGGER_HPP
|
||||||
|
#include <fmt/format.h>
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <iterator>
|
||||||
|
#include <string_view>
|
||||||
|
#include "process.hpp"
|
||||||
|
|
||||||
|
namespace mako {
|
||||||
|
|
||||||
|
constexpr const int VERBOSE_NONE = 0; // will still print errors
|
||||||
|
constexpr const int VERBOSE_DEFAULT = 1; // will print info and work stats
|
||||||
|
constexpr const int VERBOSE_WARN = 2; // will print expected errors
|
||||||
|
constexpr const int VERBOSE_DEBUG = 3; // will print everything
|
||||||
|
|
||||||
|
template <ProcKind P>
|
||||||
|
using ProcKindConstant = std::integral_constant<ProcKind, P>;
|
||||||
|
|
||||||
|
using MainProcess = ProcKindConstant<ProcKind::MAIN>;
|
||||||
|
using StatsProcess = ProcKindConstant<ProcKind::STATS>;
|
||||||
|
using WorkerProcess = ProcKindConstant<ProcKind::WORKER>;
|
||||||
|
|
||||||
|
class Logger {
|
||||||
|
ProcKind proc;
|
||||||
|
int verbosity{ VERBOSE_DEFAULT };
|
||||||
|
int process_id{ -1 };
|
||||||
|
int thread_id{ -1 };
|
||||||
|
|
||||||
|
void putHeader(fmt::memory_buffer& buf, std::string_view category) {
|
||||||
|
if (proc == ProcKind::MAIN) {
|
||||||
|
fmt::format_to(std::back_inserter(buf), "[MAIN] {}: ", category);
|
||||||
|
} else if (proc == ProcKind::STATS) {
|
||||||
|
fmt::format_to(std::back_inserter(buf), "[STATS] {}: ", category);
|
||||||
|
} else {
|
||||||
|
if (thread_id == -1) {
|
||||||
|
fmt::format_to(std::back_inserter(buf), "[WORKER{:3d}] {}: ", process_id + 1, category);
|
||||||
|
} else {
|
||||||
|
fmt::format_to(
|
||||||
|
std::back_inserter(buf), "[WORKER{:3d}:{:3d}] {}: ", process_id + 1, thread_id + 1, category);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
Logger(MainProcess, int verbosity) noexcept : proc(MainProcess::value), verbosity(verbosity) {}
|
||||||
|
|
||||||
|
Logger(StatsProcess, int verbosity) noexcept : proc(StatsProcess::value), verbosity(verbosity) {}
|
||||||
|
|
||||||
|
Logger(WorkerProcess, int verbosity, int process_id, int thread_id = -1) noexcept
|
||||||
|
: proc(WorkerProcess::value), verbosity(verbosity), process_id(process_id), thread_id(thread_id) {}
|
||||||
|
|
||||||
|
Logger(const Logger&) noexcept = default;
|
||||||
|
Logger& operator=(const Logger&) noexcept = default;
|
||||||
|
|
||||||
|
void setVerbosity(int value) noexcept {
|
||||||
|
assert(value >= VERBOSE_NONE && value <= VERBOSE_DEBUG);
|
||||||
|
verbosity = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
void printWithLogLevel(int log_level, std::string_view header, Args&&... args) {
|
||||||
|
assert(log_level >= VERBOSE_NONE && log_level <= VERBOSE_DEBUG);
|
||||||
|
if (log_level <= verbosity) {
|
||||||
|
const auto fp = log_level == VERBOSE_NONE ? stderr : stdout;
|
||||||
|
// 500B inline buffer
|
||||||
|
auto buf = fmt::memory_buffer{};
|
||||||
|
putHeader(buf, header);
|
||||||
|
fmt::format_to(std::back_inserter(buf), std::forward<Args>(args)...);
|
||||||
|
fmt::print(fp, "{}\n", std::string_view(buf.data(), buf.size()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
void error(Args&&... args) {
|
||||||
|
printWithLogLevel(VERBOSE_NONE, "ERROR", std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
void info(Args&&... args) {
|
||||||
|
printWithLogLevel(VERBOSE_DEFAULT, "INFO", std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
void warn(Args&&... args) {
|
||||||
|
printWithLogLevel(VERBOSE_WARN, "WARNING", std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
void debug(Args&&... args) {
|
||||||
|
printWithLogLevel(VERBOSE_DEBUG, "DEBUG", std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mako
|
||||||
|
|
||||||
|
#endif /*MAKO_LOGGER_HPP*/
|
|
@ -0,0 +1,32 @@
|
||||||
|
/*
|
||||||
|
* macro.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MAKO_MACRO_HPP
|
||||||
|
#define MAKO_MACRO_HPP
|
||||||
|
|
||||||
|
#if defined(__GNUG__)
|
||||||
|
#define force_inline inline __attribute__((__always_inline__))
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
#define force_inline __forceinline
|
||||||
|
#else
|
||||||
|
#error Missing force inline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /*MAKO_MACRO_HPP*/
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,209 +0,0 @@
|
||||||
#ifndef MAKO_H
|
|
||||||
#define MAKO_H
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#ifndef FDB_API_VERSION
|
|
||||||
#define FDB_API_VERSION 720
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <foundationdb/fdb_c.h>
|
|
||||||
#include <pthread.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#if defined(__linux__)
|
|
||||||
#include <linux/limits.h>
|
|
||||||
#elif defined(__APPLE__)
|
|
||||||
#include <sys/syslimits.h>
|
|
||||||
#else
|
|
||||||
#include <limits.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define VERBOSE_NONE 0
|
|
||||||
#define VERBOSE_DEFAULT 1
|
|
||||||
#define VERBOSE_ANNOYING 2
|
|
||||||
#define VERBOSE_DEBUG 3
|
|
||||||
|
|
||||||
#define MODE_INVALID -1
|
|
||||||
#define MODE_CLEAN 0
|
|
||||||
#define MODE_BUILD 1
|
|
||||||
#define MODE_RUN 2
|
|
||||||
|
|
||||||
#define FDB_SUCCESS 0
|
|
||||||
#define FDB_ERROR_RETRY -1
|
|
||||||
#define FDB_ERROR_ABORT -2
|
|
||||||
#define FDB_ERROR_CONFLICT -3
|
|
||||||
|
|
||||||
#define LAT_BLOCK_SIZE 511 /* size of each block to get detailed latency for each operation */
|
|
||||||
|
|
||||||
/* transaction specification */
|
|
||||||
enum Operations {
|
|
||||||
OP_GETREADVERSION,
|
|
||||||
OP_GET,
|
|
||||||
OP_GETRANGE,
|
|
||||||
OP_SGET,
|
|
||||||
OP_SGETRANGE,
|
|
||||||
OP_UPDATE,
|
|
||||||
OP_INSERT,
|
|
||||||
OP_INSERTRANGE,
|
|
||||||
OP_OVERWRITE,
|
|
||||||
OP_CLEAR,
|
|
||||||
OP_SETCLEAR,
|
|
||||||
OP_CLEARRANGE,
|
|
||||||
OP_SETCLEARRANGE,
|
|
||||||
OP_COMMIT,
|
|
||||||
OP_TRANSACTION, /* pseudo-operation - cumulative time for the operation + commit */
|
|
||||||
OP_READ_BG,
|
|
||||||
MAX_OP /* must be the last item */
|
|
||||||
};
|
|
||||||
|
|
||||||
#define OP_COUNT 0
|
|
||||||
#define OP_RANGE 1
|
|
||||||
#define OP_REVERSE 2
|
|
||||||
|
|
||||||
/* for long arguments */
|
|
||||||
enum Arguments {
|
|
||||||
ARG_KEYLEN,
|
|
||||||
ARG_VALLEN,
|
|
||||||
ARG_TPS,
|
|
||||||
ARG_COMMITGET,
|
|
||||||
ARG_SAMPLING,
|
|
||||||
ARG_VERSION,
|
|
||||||
ARG_KNOBS,
|
|
||||||
ARG_FLATBUFFERS,
|
|
||||||
ARG_LOGGROUP,
|
|
||||||
ARG_PREFIXPADDING,
|
|
||||||
ARG_TRACE,
|
|
||||||
ARG_TRACEPATH,
|
|
||||||
ARG_TRACEFORMAT,
|
|
||||||
ARG_TPSMAX,
|
|
||||||
ARG_TPSMIN,
|
|
||||||
ARG_TPSINTERVAL,
|
|
||||||
ARG_TPSCHANGE,
|
|
||||||
ARG_TXNTRACE,
|
|
||||||
ARG_TXNTAGGING,
|
|
||||||
ARG_TXNTAGGINGPREFIX,
|
|
||||||
ARG_STREAMING_MODE,
|
|
||||||
ARG_DISABLE_RYW,
|
|
||||||
ARG_CLIENT_THREADS_PER_VERSION,
|
|
||||||
ARG_JSON_REPORT,
|
|
||||||
ARG_BG_FILE_PATH // if blob granule files are stored locally, mako will read and materialize them if this is set
|
|
||||||
};
|
|
||||||
|
|
||||||
enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE };
|
|
||||||
|
|
||||||
#define KEYPREFIX "mako"
|
|
||||||
#define KEYPREFIXLEN 4
|
|
||||||
|
|
||||||
#define TEMP_DATA_STORE "/tmp/makoTemp"
|
|
||||||
|
|
||||||
/* we set mako_txnspec_t and mako_args_t only once in the master process,
|
|
||||||
* and won't be touched by child processes.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
/* for each operation, it stores "count", "range" and "reverse" */
|
|
||||||
int ops[MAX_OP][3];
|
|
||||||
} mako_txnspec_t;
|
|
||||||
|
|
||||||
#define LOGGROUP_MAX 256
|
|
||||||
#define KNOB_MAX 256
|
|
||||||
#define TAGPREFIXLENGTH_MAX 8
|
|
||||||
#define NUM_CLUSTERS_MAX 3
|
|
||||||
#define NUM_DATABASES_MAX 10
|
|
||||||
#define MAX_BG_IDS 1000
|
|
||||||
|
|
||||||
/* benchmark parameters */
|
|
||||||
typedef struct {
|
|
||||||
int api_version;
|
|
||||||
int json;
|
|
||||||
int num_processes;
|
|
||||||
int num_threads;
|
|
||||||
int mode;
|
|
||||||
int rows; /* is 2 billion enough? */
|
|
||||||
int seconds;
|
|
||||||
int iteration;
|
|
||||||
int tpsmax;
|
|
||||||
int tpsmin;
|
|
||||||
int tpsinterval;
|
|
||||||
int tpschange;
|
|
||||||
int sampling;
|
|
||||||
int key_length;
|
|
||||||
int value_length;
|
|
||||||
int zipf;
|
|
||||||
int commit_get;
|
|
||||||
int verbose;
|
|
||||||
mako_txnspec_t txnspec;
|
|
||||||
char cluster_files[NUM_CLUSTERS_MAX][PATH_MAX];
|
|
||||||
int num_fdb_clusters;
|
|
||||||
int num_databases;
|
|
||||||
char log_group[LOGGROUP_MAX];
|
|
||||||
int prefixpadding;
|
|
||||||
int trace;
|
|
||||||
char tracepath[PATH_MAX];
|
|
||||||
int traceformat; /* 0 - XML, 1 - JSON */
|
|
||||||
char knobs[KNOB_MAX];
|
|
||||||
uint8_t flatbuffers;
|
|
||||||
int txntrace;
|
|
||||||
int txntagging;
|
|
||||||
char txntagging_prefix[TAGPREFIXLENGTH_MAX];
|
|
||||||
FDBStreamingMode streaming_mode;
|
|
||||||
int client_threads_per_version;
|
|
||||||
int disable_ryw;
|
|
||||||
char json_output_path[PATH_MAX];
|
|
||||||
bool bg_materialize_files;
|
|
||||||
char bg_file_path[PATH_MAX];
|
|
||||||
} mako_args_t;
|
|
||||||
|
|
||||||
/* shared memory */
|
|
||||||
#define SIGNAL_RED 0
|
|
||||||
#define SIGNAL_GREEN 1
|
|
||||||
#define SIGNAL_OFF 2
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int signal;
|
|
||||||
int readycount;
|
|
||||||
double throttle_factor;
|
|
||||||
int stopcount;
|
|
||||||
} mako_shmhdr_t;
|
|
||||||
|
|
||||||
/* memory block allocated to each operation when collecting detailed latency */
|
|
||||||
typedef struct {
|
|
||||||
uint64_t data[LAT_BLOCK_SIZE];
|
|
||||||
void* next_block;
|
|
||||||
} lat_block_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint64_t xacts;
|
|
||||||
uint64_t conflicts;
|
|
||||||
uint64_t ops[MAX_OP];
|
|
||||||
uint64_t errors[MAX_OP];
|
|
||||||
uint64_t latency_samples[MAX_OP];
|
|
||||||
uint64_t latency_us_total[MAX_OP];
|
|
||||||
uint64_t latency_us_min[MAX_OP];
|
|
||||||
uint64_t latency_us_max[MAX_OP];
|
|
||||||
} mako_stats_t;
|
|
||||||
|
|
||||||
/* per-process information */
|
|
||||||
typedef struct {
|
|
||||||
int worker_id;
|
|
||||||
pid_t parent_id;
|
|
||||||
mako_args_t* args;
|
|
||||||
mako_shmhdr_t* shm;
|
|
||||||
FDBDatabase* databases[NUM_DATABASES_MAX];
|
|
||||||
} process_info_t;
|
|
||||||
|
|
||||||
/* args for threads */
|
|
||||||
typedef struct {
|
|
||||||
int thread_id;
|
|
||||||
int database_index; // index of the database to do work to
|
|
||||||
int elem_size[MAX_OP]; /* stores the multiple of LAT_BLOCK_SIZE to check the memory allocation of each operation */
|
|
||||||
bool is_memory_allocated[MAX_OP]; /* flag specified for each operation, whether the memory was allocated to that
|
|
||||||
specific operation */
|
|
||||||
lat_block_t* block[MAX_OP];
|
|
||||||
process_info_t* process;
|
|
||||||
} thread_args_t;
|
|
||||||
|
|
||||||
/* process type */
|
|
||||||
typedef enum { proc_master = 0, proc_worker, proc_stats } proc_type_t;
|
|
||||||
|
|
||||||
#endif /* MAKO_H */
|
|
|
@ -0,0 +1,168 @@
|
||||||
|
/*
|
||||||
|
* mako.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MAKO_HPP
|
||||||
|
#define MAKO_HPP
|
||||||
|
|
||||||
|
#ifndef FDB_API_VERSION
|
||||||
|
#define FDB_API_VERSION 720
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <atomic>
|
||||||
|
#include <cassert>
|
||||||
|
#include <chrono>
|
||||||
|
#include <list>
|
||||||
|
#include <vector>
|
||||||
|
#include <string_view>
|
||||||
|
#include <fdb_api.hpp>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include "limit.hpp"
|
||||||
|
|
||||||
|
namespace mako {
|
||||||
|
|
||||||
|
constexpr const int MODE_INVALID = -1;
|
||||||
|
constexpr const int MODE_CLEAN = 0;
|
||||||
|
constexpr const int MODE_BUILD = 1;
|
||||||
|
constexpr const int MODE_RUN = 2;
|
||||||
|
|
||||||
|
/* for long arguments */
|
||||||
|
enum ArgKind {
|
||||||
|
ARG_KEYLEN,
|
||||||
|
ARG_VALLEN,
|
||||||
|
ARG_TPS,
|
||||||
|
ARG_ASYNC,
|
||||||
|
ARG_COMMITGET,
|
||||||
|
ARG_SAMPLING,
|
||||||
|
ARG_VERSION,
|
||||||
|
ARG_KNOBS,
|
||||||
|
ARG_FLATBUFFERS,
|
||||||
|
ARG_LOGGROUP,
|
||||||
|
ARG_PREFIXPADDING,
|
||||||
|
ARG_TRACE,
|
||||||
|
ARG_TRACEPATH,
|
||||||
|
ARG_TRACEFORMAT,
|
||||||
|
ARG_TPSMAX,
|
||||||
|
ARG_TPSMIN,
|
||||||
|
ARG_TPSINTERVAL,
|
||||||
|
ARG_TPSCHANGE,
|
||||||
|
ARG_TXNTRACE,
|
||||||
|
ARG_TXNTAGGING,
|
||||||
|
ARG_TXNTAGGINGPREFIX,
|
||||||
|
ARG_STREAMING_MODE,
|
||||||
|
ARG_DISABLE_RYW,
|
||||||
|
ARG_CLIENT_THREADS_PER_VERSION,
|
||||||
|
ARG_JSON_REPORT,
|
||||||
|
ARG_BG_FILE_PATH // if blob granule files are stored locally, mako will read and materialize them if this is set
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr const int OP_COUNT = 0;
|
||||||
|
constexpr const int OP_RANGE = 1;
|
||||||
|
constexpr const int OP_REVERSE = 2;
|
||||||
|
|
||||||
|
/* transaction specification */
|
||||||
|
enum OpKind {
|
||||||
|
OP_GETREADVERSION,
|
||||||
|
OP_GET,
|
||||||
|
OP_GETRANGE,
|
||||||
|
OP_SGET,
|
||||||
|
OP_SGETRANGE,
|
||||||
|
OP_UPDATE,
|
||||||
|
OP_INSERT,
|
||||||
|
OP_INSERTRANGE,
|
||||||
|
OP_OVERWRITE,
|
||||||
|
OP_CLEAR,
|
||||||
|
OP_SETCLEAR,
|
||||||
|
OP_CLEARRANGE,
|
||||||
|
OP_SETCLEARRANGE,
|
||||||
|
OP_COMMIT,
|
||||||
|
OP_TRANSACTION, /* pseudo-operation - time it takes to run one iteration of ops sequence */
|
||||||
|
OP_READ_BG,
|
||||||
|
MAX_OP /* must be the last item */
|
||||||
|
};
|
||||||
|
|
||||||
|
enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE };
|
||||||
|
|
||||||
|
/* we set WorkloadSpec and Arguments only once in the master process,
|
||||||
|
* and won't be touched by child processes.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct WorkloadSpec {
|
||||||
|
/* for each operation, it stores "count", "range" and "reverse" */
|
||||||
|
int ops[MAX_OP][3];
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr const int LOGGROUP_MAX = 256;
|
||||||
|
constexpr const int KNOB_MAX = 256;
|
||||||
|
constexpr const int TAGPREFIXLENGTH_MAX = 8;
|
||||||
|
constexpr const int NUM_CLUSTERS_MAX = 3;
|
||||||
|
constexpr const int NUM_DATABASES_MAX = 10;
|
||||||
|
constexpr const std::string_view KEY_PREFIX{ "mako" };
|
||||||
|
constexpr const std::string_view TEMP_DATA_STORE{ "/tmp/makoTemp" };
|
||||||
|
|
||||||
|
/* benchmark parameters */
|
||||||
|
struct Arguments {
|
||||||
|
int api_version;
|
||||||
|
int json;
|
||||||
|
int num_processes;
|
||||||
|
int num_threads;
|
||||||
|
int async_xacts;
|
||||||
|
int mode;
|
||||||
|
int rows; /* is 2 billion enough? */
|
||||||
|
int row_digits;
|
||||||
|
int seconds;
|
||||||
|
int iteration;
|
||||||
|
int tpsmax;
|
||||||
|
int tpsmin;
|
||||||
|
int tpsinterval;
|
||||||
|
int tpschange;
|
||||||
|
int sampling;
|
||||||
|
int key_length;
|
||||||
|
int value_length;
|
||||||
|
int zipf;
|
||||||
|
int commit_get;
|
||||||
|
int verbose;
|
||||||
|
WorkloadSpec txnspec;
|
||||||
|
char cluster_files[NUM_CLUSTERS_MAX][PATH_MAX];
|
||||||
|
int num_fdb_clusters;
|
||||||
|
int num_databases;
|
||||||
|
char log_group[LOGGROUP_MAX];
|
||||||
|
int prefixpadding;
|
||||||
|
int trace;
|
||||||
|
char tracepath[PATH_MAX];
|
||||||
|
int traceformat; /* 0 - XML, 1 - JSON */
|
||||||
|
char knobs[KNOB_MAX];
|
||||||
|
uint8_t flatbuffers;
|
||||||
|
int txntrace;
|
||||||
|
int txntagging;
|
||||||
|
char txntagging_prefix[TAGPREFIXLENGTH_MAX];
|
||||||
|
FDBStreamingMode streaming_mode;
|
||||||
|
int64_t client_threads_per_version;
|
||||||
|
int disable_ryw;
|
||||||
|
char json_output_path[PATH_MAX];
|
||||||
|
bool bg_materialize_files;
|
||||||
|
char bg_file_path[PATH_MAX];
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mako
|
||||||
|
|
||||||
|
#endif /* MAKO_HPP */
|
|
@ -53,6 +53,13 @@ Arguments
|
||||||
|
|
||||||
- | ``-t | --threads <threads>``
|
- | ``-t | --threads <threads>``
|
||||||
| Number of threads per worker process (Default: 1)
|
| Number of threads per worker process (Default: 1)
|
||||||
|
| With ``--async_xacts <xacts>`` == 0 (Default), each of the ``<threads>`` operates on a transaction object with blocking API calls
|
||||||
|
| Otherwise, all of the ``<threads>`` run an asynchronous job scheduler, serving ``<xacts>`` transactions
|
||||||
|
|
||||||
|
- | ``--async_xacts <xacts>``
|
||||||
|
| Number of transactions per worker process to run asynchronously (Default: 0)
|
||||||
|
| ``<xacts>`` > 0 switches the execution mode to non-blocking (See ``-t | --threads``), with the exception of blob granules API
|
||||||
|
| Note: throttling options, e.g. ``--tpsmax``, ``--tpsmin``, ``--tpschange``, ``--tpsinterval``, are ignored in asynchronous mode
|
||||||
|
|
||||||
- | ``-r | --rows <rows>``
|
- | ``-r | --rows <rows>``
|
||||||
| Number of rows initially populated (Default: 100000)
|
| Number of rows initially populated (Default: 100000)
|
||||||
|
|
|
@ -0,0 +1,275 @@
|
||||||
|
/*
|
||||||
|
* operations.cpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "blob_granules.hpp"
|
||||||
|
#include "operations.hpp"
|
||||||
|
#include "mako.hpp"
|
||||||
|
#include "logger.hpp"
|
||||||
|
#include "utils.hpp"
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
extern thread_local mako::Logger logr;
|
||||||
|
|
||||||
|
namespace mako {
|
||||||
|
|
||||||
|
using namespace fdb;
|
||||||
|
|
||||||
|
const std::array<Operation, MAX_OP> opTable{
|
||||||
|
{ { "GRV",
|
||||||
|
{ { StepKind::READ,
|
||||||
|
[](Transaction& tx, Arguments const&, ByteString&, ByteString&, ByteString&) {
|
||||||
|
return tx.getReadVersion().eraseType();
|
||||||
|
},
|
||||||
|
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString&) {
|
||||||
|
if (f && !f.error()) {
|
||||||
|
f.get<future_var::Int64>();
|
||||||
|
}
|
||||||
|
} } },
|
||||||
|
1,
|
||||||
|
false },
|
||||||
|
{ "GET",
|
||||||
|
{ { StepKind::READ,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
|
||||||
|
return tx.get(key, false /*snapshot*/).eraseType();
|
||||||
|
},
|
||||||
|
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
|
||||||
|
if (f && !f.error()) {
|
||||||
|
f.get<future_var::Value>();
|
||||||
|
}
|
||||||
|
} } },
|
||||||
|
1,
|
||||||
|
false },
|
||||||
|
{ "GETRANGE",
|
||||||
|
{ { StepKind::READ,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
|
||||||
|
return tx
|
||||||
|
.getRange(key_select::firstGreaterOrEqual(begin),
|
||||||
|
key_select::lastLessOrEqual(end, 1),
|
||||||
|
0 /*limit*/,
|
||||||
|
0 /*target_bytes*/,
|
||||||
|
args.streaming_mode,
|
||||||
|
0 /*iteration*/,
|
||||||
|
false /*snapshot*/,
|
||||||
|
args.txnspec.ops[OP_GETRANGE][OP_REVERSE])
|
||||||
|
.eraseType();
|
||||||
|
},
|
||||||
|
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
|
||||||
|
if (f && !f.error()) {
|
||||||
|
f.get<future_var::KeyValueArray>();
|
||||||
|
}
|
||||||
|
} } },
|
||||||
|
1,
|
||||||
|
false },
|
||||||
|
{ "SGET",
|
||||||
|
{ { StepKind::READ,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
|
||||||
|
return tx.get(key, true /*snapshot*/).eraseType();
|
||||||
|
},
|
||||||
|
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
|
||||||
|
if (f && !f.error()) {
|
||||||
|
f.get<future_var::Value>();
|
||||||
|
}
|
||||||
|
} } },
|
||||||
|
1,
|
||||||
|
false },
|
||||||
|
{ "SGETRANGE",
|
||||||
|
{ {
|
||||||
|
|
||||||
|
StepKind::READ,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
|
||||||
|
return tx
|
||||||
|
.getRange(key_select::firstGreaterOrEqual(begin),
|
||||||
|
key_select::lastLessOrEqual(end, 1),
|
||||||
|
0 /*limit*/,
|
||||||
|
0 /*target_bytes*/,
|
||||||
|
args.streaming_mode,
|
||||||
|
0 /*iteration*/,
|
||||||
|
true /*snapshot*/,
|
||||||
|
args.txnspec.ops[OP_GETRANGE][OP_REVERSE])
|
||||||
|
.eraseType();
|
||||||
|
},
|
||||||
|
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
|
||||||
|
if (f && !f.error()) {
|
||||||
|
f.get<future_var::KeyValueArray>();
|
||||||
|
}
|
||||||
|
} } },
|
||||||
|
1,
|
||||||
|
false },
|
||||||
|
{ "UPDATE",
|
||||||
|
{ { StepKind::READ,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
|
||||||
|
return tx.get(key, false /*snapshot*/).eraseType();
|
||||||
|
},
|
||||||
|
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
|
||||||
|
if (f && !f.error()) {
|
||||||
|
f.get<future_var::Value>();
|
||||||
|
}
|
||||||
|
} },
|
||||||
|
{ StepKind::IMM,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
|
||||||
|
randomString(value.data(), args.value_length);
|
||||||
|
tx.set(key, value);
|
||||||
|
return Future();
|
||||||
|
} } },
|
||||||
|
2,
|
||||||
|
true },
|
||||||
|
{ "INSERT",
|
||||||
|
{ { StepKind::IMM,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
|
||||||
|
// key[0..args.key_length] := concat(key_prefix, random_string)
|
||||||
|
randomString(key.data() + intSize(KEY_PREFIX), args.key_length - intSize(KEY_PREFIX));
|
||||||
|
randomString(value.data(), args.value_length);
|
||||||
|
tx.set(key, value);
|
||||||
|
return Future();
|
||||||
|
} } },
|
||||||
|
1,
|
||||||
|
true },
|
||||||
|
{ "INSERTRANGE",
|
||||||
|
{ { StepKind::IMM,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
|
||||||
|
randomString(value.data(), args.value_length);
|
||||||
|
|
||||||
|
// key[0..args.key_length] := concat(prefix, random_string, num[0..range_digits])
|
||||||
|
const auto range = args.txnspec.ops[OP_INSERTRANGE][OP_RANGE];
|
||||||
|
assert(range > 0);
|
||||||
|
const auto range_digits = digits(range);
|
||||||
|
const auto random_len = args.key_length - intSize(KEY_PREFIX) - range_digits;
|
||||||
|
randomString(&key[intSize(KEY_PREFIX)], random_len);
|
||||||
|
for (auto i = 0; i < range; i++) {
|
||||||
|
numericWithFill(&key[args.key_length - range_digits], range_digits, i);
|
||||||
|
tx.set(key, value);
|
||||||
|
}
|
||||||
|
return Future();
|
||||||
|
} } },
|
||||||
|
1,
|
||||||
|
true },
|
||||||
|
{ "OVERWRITE",
|
||||||
|
{ { StepKind::IMM,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
|
||||||
|
randomString(value.data(), args.value_length);
|
||||||
|
tx.set(key, value);
|
||||||
|
return Future();
|
||||||
|
} } },
|
||||||
|
1,
|
||||||
|
true },
|
||||||
|
{ "CLEAR",
|
||||||
|
{ { StepKind::IMM,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
|
||||||
|
tx.clear(key);
|
||||||
|
return Future();
|
||||||
|
} } },
|
||||||
|
1,
|
||||||
|
true },
|
||||||
|
{ "SETCLEAR",
|
||||||
|
{ { StepKind::COMMIT,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString& value) {
|
||||||
|
randomString(&key[KEY_PREFIX.size()], args.key_length - intSize(KEY_PREFIX));
|
||||||
|
randomString(value.data(), args.value_length);
|
||||||
|
tx.set(key, value);
|
||||||
|
return tx.commit().eraseType();
|
||||||
|
} },
|
||||||
|
{ StepKind::IMM,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& key, ByteString&, ByteString&) {
|
||||||
|
tx.reset(); // assuming commit from step 0 worked.
|
||||||
|
tx.clear(key); // key should forward unchanged from step 0
|
||||||
|
return Future();
|
||||||
|
} } },
|
||||||
|
2,
|
||||||
|
true },
|
||||||
|
{ "CLEARRANGE",
|
||||||
|
{ { StepKind::IMM,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
|
||||||
|
tx.clearRange(begin, end);
|
||||||
|
return Future();
|
||||||
|
} } },
|
||||||
|
1,
|
||||||
|
true },
|
||||||
|
{ "SETCLEARRANGE",
|
||||||
|
{ { StepKind::COMMIT,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& key_begin, ByteString& key, ByteString& value) {
|
||||||
|
randomString(value.data(), args.value_length);
|
||||||
|
|
||||||
|
// key[0..args.key_length] := concat(prefix, random_string, num[0..range_digits])
|
||||||
|
const auto range = args.txnspec.ops[OP_SETCLEARRANGE][OP_RANGE];
|
||||||
|
assert(range > 0);
|
||||||
|
const auto range_digits = digits(range);
|
||||||
|
const auto random_len = args.key_length - intSize(KEY_PREFIX) - range_digits;
|
||||||
|
randomString(&key[KEY_PREFIX.size()], random_len);
|
||||||
|
for (auto i = 0; i < range; i++) {
|
||||||
|
numericWithFill(&key[args.key_length - range_digits], range_digits, i);
|
||||||
|
tx.set(key, value);
|
||||||
|
if (i == 0)
|
||||||
|
key_begin.assign(key);
|
||||||
|
}
|
||||||
|
return tx.commit().eraseType();
|
||||||
|
} },
|
||||||
|
{ StepKind::IMM,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
|
||||||
|
tx.reset();
|
||||||
|
tx.clearRange(begin, end);
|
||||||
|
return Future();
|
||||||
|
} } },
|
||||||
|
2,
|
||||||
|
true },
|
||||||
|
{ "COMMIT", { { StepKind::NONE, nullptr } }, 0, false },
|
||||||
|
{ "TRANSACTION", { { StepKind::NONE, nullptr } }, 0, false },
|
||||||
|
{ "READBLOBGRANULE",
|
||||||
|
{ { StepKind::ON_ERROR,
|
||||||
|
[](Transaction& tx, Arguments const& args, ByteString& begin, ByteString& end, ByteString&) {
|
||||||
|
auto err = Error{};
|
||||||
|
|
||||||
|
err = tx.setOptionNothrow(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, BytesRef());
|
||||||
|
if (err) {
|
||||||
|
// Issuing read/writes before disabling RYW results in error.
|
||||||
|
// Possible malformed workload?
|
||||||
|
// As workloads execute in sequence, retrying would likely repeat this error.
|
||||||
|
fmt::print(stderr, "ERROR: TR_OPTION_READ_YOUR_WRITES_DISABLE: {}", err.what());
|
||||||
|
return Future();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate a separate context per call to avoid multiple threads accessing
|
||||||
|
auto user_context = blob_granules::local_file::UserContext(args.bg_file_path);
|
||||||
|
|
||||||
|
auto api_context = blob_granules::local_file::createApiContext(user_context, args.bg_materialize_files);
|
||||||
|
|
||||||
|
auto r = tx.readBlobGranules(begin,
|
||||||
|
end,
|
||||||
|
0 /* beginVersion*/,
|
||||||
|
-2, /* endVersion. -2 (latestVersion) is use txn read version */
|
||||||
|
api_context);
|
||||||
|
|
||||||
|
user_context.clear();
|
||||||
|
|
||||||
|
auto out = Result::KeyValueArray{};
|
||||||
|
err = r.getKeyValueArrayNothrow(out);
|
||||||
|
if (!err || err.is(2037 /*blob_granule_not_materialized*/))
|
||||||
|
return Future();
|
||||||
|
const auto level = (err.is(1020 /*not_committed*/) || err.is(1021 /*commit_unknown_result*/) ||
|
||||||
|
err.is(1213 /*tag_throttled*/))
|
||||||
|
? VERBOSE_WARN
|
||||||
|
: VERBOSE_NONE;
|
||||||
|
logr.printWithLogLevel(level, "ERROR", "get_keyvalue_array() after readBlobGranules(): {}", err.what());
|
||||||
|
return tx.onError(err).eraseType();
|
||||||
|
} } },
|
||||||
|
1,
|
||||||
|
false } }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mako
|
|
@ -0,0 +1,140 @@
|
||||||
|
/*
|
||||||
|
* operations.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MAKO_OPERATIONS_HPP
|
||||||
|
#define MAKO_OPERATIONS_HPP
|
||||||
|
|
||||||
|
#include <fdb_api.hpp>
|
||||||
|
#include <array>
|
||||||
|
#include <cassert>
|
||||||
|
#include <string_view>
|
||||||
|
#include <tuple>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
#include "macro.hpp"
|
||||||
|
#include "mako.hpp"
|
||||||
|
|
||||||
|
namespace mako {
|
||||||
|
|
||||||
|
// determines how resultant future will be handled
|
||||||
|
enum class StepKind {
|
||||||
|
NONE, ///< not part of the table: OP_TRANSACTION, OP_COMMIT
|
||||||
|
IMM, ///< non-future ops that return immediately: e.g. set, clear_range
|
||||||
|
READ, ///< blockable reads: get(), get_range(), get_read_version, ...
|
||||||
|
COMMIT, ///< self-explanatory
|
||||||
|
ON_ERROR ///< future is a result of tx.on_error()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Ops that doesn't have concrete steps to execute and are there for measurements only
|
||||||
|
force_inline bool isAbstractOp(int op) noexcept {
|
||||||
|
return op == OP_COMMIT || op == OP_TRANSACTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
using StepFunction = fdb::Future (*)(fdb::Transaction& tx,
|
||||||
|
Arguments const&,
|
||||||
|
fdb::ByteString& /*key1*/,
|
||||||
|
fdb::ByteString& /*key2*/,
|
||||||
|
fdb::ByteString& /*value*/);
|
||||||
|
|
||||||
|
using PostStepFunction = void (*)(fdb::Future&,
|
||||||
|
fdb::Transaction& tx,
|
||||||
|
Arguments const&,
|
||||||
|
fdb::ByteString& /*key1*/,
|
||||||
|
fdb::ByteString& /*key2*/,
|
||||||
|
fdb::ByteString& /*value*/);
|
||||||
|
|
||||||
|
struct Step {
|
||||||
|
StepKind kind;
|
||||||
|
StepFunction step_func_;
|
||||||
|
PostStepFunction post_step_func_{ nullptr };
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Operation {
|
||||||
|
std::string_view name_;
|
||||||
|
Step steps_[2];
|
||||||
|
int num_steps_;
|
||||||
|
bool needs_commit_;
|
||||||
|
|
||||||
|
std::string_view name() const noexcept { return name_; }
|
||||||
|
|
||||||
|
StepKind stepKind(int step) const noexcept {
|
||||||
|
assert(step < steps());
|
||||||
|
return steps_[step].kind;
|
||||||
|
}
|
||||||
|
|
||||||
|
StepFunction stepFunction(int step) const noexcept { return steps_[step].step_func_; }
|
||||||
|
|
||||||
|
PostStepFunction postStepFunction(int step) const noexcept { return steps_[step].post_step_func_; }
|
||||||
|
// how many steps in this op?
|
||||||
|
int steps() const noexcept { return num_steps_; }
|
||||||
|
// does the op needs to commit some time after its final step?
|
||||||
|
bool needsCommit() const noexcept { return needs_commit_; }
|
||||||
|
};
|
||||||
|
|
||||||
|
extern const std::array<Operation, MAX_OP> opTable;
|
||||||
|
|
||||||
|
force_inline char const* getOpName(int ops_code) {
|
||||||
|
if (ops_code >= 0 && ops_code < MAX_OP)
|
||||||
|
return opTable[ops_code].name().data();
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
struct OpIterator {
|
||||||
|
int op, count, step;
|
||||||
|
|
||||||
|
bool operator==(const OpIterator& other) const noexcept {
|
||||||
|
return op == other.op && count == other.count && step == other.step;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator!=(const OpIterator& other) const noexcept { return !(*this == other); }
|
||||||
|
|
||||||
|
StepKind stepKind() const noexcept { return opTable[op].stepKind(step); }
|
||||||
|
|
||||||
|
char const* opName() const noexcept { return getOpName(op); }
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr const OpIterator OpEnd = OpIterator{ MAX_OP, -1, -1 };
|
||||||
|
|
||||||
|
force_inline OpIterator getOpBegin(Arguments const& args) noexcept {
|
||||||
|
for (auto op = 0; op < MAX_OP; op++) {
|
||||||
|
if (isAbstractOp(op) || args.txnspec.ops[op][OP_COUNT] == 0)
|
||||||
|
continue;
|
||||||
|
return OpIterator{ op, 0, 0 };
|
||||||
|
}
|
||||||
|
return OpEnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
force_inline OpIterator getOpNext(Arguments const& args, OpIterator current) noexcept {
|
||||||
|
auto& [op, count, step] = current;
|
||||||
|
assert(op < MAX_OP && !isAbstractOp(op));
|
||||||
|
if (opTable[op].steps() > step + 1)
|
||||||
|
return OpIterator{ op, count, step + 1 };
|
||||||
|
count++;
|
||||||
|
for (; op < MAX_OP; op++, count = 0) {
|
||||||
|
if (isAbstractOp(op) || args.txnspec.ops[op][OP_COUNT] <= count)
|
||||||
|
continue;
|
||||||
|
return OpIterator{ op, count, 0 };
|
||||||
|
}
|
||||||
|
return OpEnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mako
|
||||||
|
|
||||||
|
#endif /* MAKO_OPERATIONS_HPP */
|
|
@ -0,0 +1,26 @@
|
||||||
|
/*
|
||||||
|
* process.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MAKO_PROCESS_HPP
|
||||||
|
#define MAKO_PROCESS_HPP
|
||||||
|
|
||||||
|
enum class ProcKind { MAIN, WORKER, STATS };
|
||||||
|
|
||||||
|
#endif /*MAKO_PROCESS_HPP*/
|
|
@ -0,0 +1,108 @@
|
||||||
|
/*
|
||||||
|
* shm.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MAKO_SHM_HPP
|
||||||
|
#define MAKO_SHM_HPP
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdint>
|
||||||
|
#include "stats.hpp"
|
||||||
|
|
||||||
|
/* shared memory */
|
||||||
|
constexpr const int SIGNAL_RED = 0;
|
||||||
|
constexpr const int SIGNAL_GREEN = 1;
|
||||||
|
constexpr const int SIGNAL_OFF = 2;
|
||||||
|
|
||||||
|
// controlled, safer access to shared memory
|
||||||
|
namespace mako::shared_memory {
|
||||||
|
|
||||||
|
struct Header {
|
||||||
|
std::atomic<int> signal = ATOMIC_VAR_INIT(SIGNAL_OFF);
|
||||||
|
std::atomic<int> readycount = ATOMIC_VAR_INIT(0);
|
||||||
|
std::atomic<double> throttle_factor = ATOMIC_VAR_INIT(1.0);
|
||||||
|
std::atomic<int> stopcount = ATOMIC_VAR_INIT(0);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LayoutHelper {
|
||||||
|
Header hdr;
|
||||||
|
ThreadStatistics stats;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline size_t storageSize(int num_processes, int num_threads) noexcept {
|
||||||
|
assert(num_processes >= 1 && num_threads >= 1);
|
||||||
|
return sizeof(LayoutHelper) + sizeof(ThreadStatistics) * ((num_processes * num_threads) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
class Access {
|
||||||
|
void* base;
|
||||||
|
int num_processes;
|
||||||
|
int num_threads;
|
||||||
|
|
||||||
|
static inline ThreadStatistics& statsSlot(void* shm_base,
|
||||||
|
int num_threads,
|
||||||
|
int process_idx,
|
||||||
|
int thread_idx) noexcept {
|
||||||
|
return (&static_cast<LayoutHelper*>(shm_base)->stats)[process_idx * num_threads + thread_idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
Access(void* shm, int num_processes, int num_threads) noexcept
|
||||||
|
: base(shm), num_processes(num_processes), num_threads(num_threads) {}
|
||||||
|
|
||||||
|
Access() noexcept : Access(nullptr, 0, 0) {}
|
||||||
|
|
||||||
|
Access(const Access&) noexcept = default;
|
||||||
|
|
||||||
|
Access& operator=(const Access&) noexcept = default;
|
||||||
|
|
||||||
|
size_t size() const noexcept { return storageSize(num_processes, num_threads); }
|
||||||
|
|
||||||
|
void initMemory() noexcept {
|
||||||
|
new (&header()) Header{};
|
||||||
|
for (auto i = 0; i < num_processes; i++)
|
||||||
|
for (auto j = 0; j < num_threads; j++)
|
||||||
|
new (&statsSlot(i, j)) ThreadStatistics();
|
||||||
|
}
|
||||||
|
|
||||||
|
Header const& headerConst() const noexcept { return *static_cast<Header const*>(base); }
|
||||||
|
|
||||||
|
Header& header() const noexcept { return *static_cast<Header*>(base); }
|
||||||
|
|
||||||
|
ThreadStatistics const* statsConstArray() const noexcept {
|
||||||
|
return &statsSlot(base, num_threads, 0 /*process_id*/, 0 /*thread_id*/);
|
||||||
|
}
|
||||||
|
|
||||||
|
ThreadStatistics* statsArray() const noexcept {
|
||||||
|
return &statsSlot(base, num_threads, 0 /*process_id*/, 0 /*thread_id*/);
|
||||||
|
}
|
||||||
|
|
||||||
|
ThreadStatistics const& statsConstSlot(int process_idx, int thread_idx) const noexcept {
|
||||||
|
return statsSlot(base, num_threads, process_idx, thread_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
ThreadStatistics& statsSlot(int process_idx, int thread_idx) const noexcept {
|
||||||
|
return statsSlot(base, num_threads, process_idx, thread_idx);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mako::shared_memory
|
||||||
|
|
||||||
|
#endif /* MAKO_SHM_HPP */
|
|
@ -0,0 +1,177 @@
|
||||||
|
/*
|
||||||
|
* stats.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MAKO_STATS_HPP
|
||||||
|
#define MAKO_STATS_HPP
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
|
#include <list>
|
||||||
|
#include <new>
|
||||||
|
#include <utility>
|
||||||
|
#include "operations.hpp"
|
||||||
|
#include "time.hpp"
|
||||||
|
|
||||||
|
namespace mako {
|
||||||
|
|
||||||
|
/* rough cap on the number of samples to avoid OOM hindering benchmark */
|
||||||
|
constexpr const size_t SAMPLE_CAP = 2000000;
|
||||||
|
|
||||||
|
/* size of each block to get detailed latency for each operation */
|
||||||
|
constexpr const size_t LAT_BLOCK_SIZE = 4093;
|
||||||
|
|
||||||
|
/* hard cap on the number of sample blocks = 488 */
|
||||||
|
constexpr const size_t MAX_LAT_BLOCKS = SAMPLE_CAP / LAT_BLOCK_SIZE;
|
||||||
|
|
||||||
|
/* memory block allocated to each operation when collecting detailed latency */
|
||||||
|
class LatencySampleBlock {
|
||||||
|
uint64_t samples[LAT_BLOCK_SIZE]{
|
||||||
|
0,
|
||||||
|
};
|
||||||
|
uint64_t index{ 0 };
|
||||||
|
|
||||||
|
public:
|
||||||
|
LatencySampleBlock() noexcept = default;
|
||||||
|
bool full() const noexcept { return index >= LAT_BLOCK_SIZE; }
|
||||||
|
void put(timediff_t td) {
|
||||||
|
assert(!full());
|
||||||
|
samples[index++] = toIntegerMicroseconds(td);
|
||||||
|
}
|
||||||
|
// return {data block, number of samples}
|
||||||
|
std::pair<uint64_t const*, size_t> data() const noexcept { return { samples, index }; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/* collect sampled latencies until OOM is hit */
|
||||||
|
class LatencySampleBin {
|
||||||
|
std::list<LatencySampleBlock> blocks;
|
||||||
|
bool noMoreAlloc{ false };
|
||||||
|
|
||||||
|
bool tryAlloc() {
|
||||||
|
try {
|
||||||
|
blocks.emplace_back();
|
||||||
|
} catch (const std::bad_alloc&) {
|
||||||
|
noMoreAlloc = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
void reserveOneBlock() {
|
||||||
|
if (blocks.empty())
|
||||||
|
tryAlloc();
|
||||||
|
}
|
||||||
|
|
||||||
|
void put(timediff_t td) {
|
||||||
|
if (blocks.empty() || blocks.back().full()) {
|
||||||
|
if (blocks.size() >= MAX_LAT_BLOCKS || noMoreAlloc || !tryAlloc())
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
blocks.back().put(td);
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate & apply for each block user function void(uint64_t const*, size_t)
|
||||||
|
template <typename Func>
|
||||||
|
void forEachBlock(Func&& fn) const {
|
||||||
|
for (const auto& block : blocks) {
|
||||||
|
auto [ptr, cnt] = block.data();
|
||||||
|
fn(ptr, cnt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class alignas(64) ThreadStatistics {
|
||||||
|
uint64_t conflicts;
|
||||||
|
uint64_t total_errors;
|
||||||
|
uint64_t ops[MAX_OP];
|
||||||
|
uint64_t errors[MAX_OP];
|
||||||
|
uint64_t latency_samples[MAX_OP];
|
||||||
|
uint64_t latency_us_total[MAX_OP];
|
||||||
|
uint64_t latency_us_min[MAX_OP];
|
||||||
|
uint64_t latency_us_max[MAX_OP];
|
||||||
|
|
||||||
|
public:
|
||||||
|
ThreadStatistics() noexcept {
|
||||||
|
memset(this, 0, sizeof(ThreadStatistics));
|
||||||
|
memset(latency_us_min, 0xff, sizeof(latency_us_min));
|
||||||
|
}
|
||||||
|
|
||||||
|
ThreadStatistics(const ThreadStatistics& other) noexcept = default;
|
||||||
|
ThreadStatistics& operator=(const ThreadStatistics& other) noexcept = default;
|
||||||
|
|
||||||
|
uint64_t getConflictCount() const noexcept { return conflicts; }
|
||||||
|
|
||||||
|
uint64_t getOpCount(int op) const noexcept { return ops[op]; }
|
||||||
|
|
||||||
|
uint64_t getErrorCount(int op) const noexcept { return errors[op]; }
|
||||||
|
|
||||||
|
uint64_t getTotalErrorCount() const noexcept { return total_errors; }
|
||||||
|
|
||||||
|
uint64_t getLatencySampleCount(int op) const noexcept { return latency_samples[op]; }
|
||||||
|
|
||||||
|
uint64_t getLatencyUsTotal(int op) const noexcept { return latency_us_total[op]; }
|
||||||
|
|
||||||
|
uint64_t getLatencyUsMin(int op) const noexcept { return latency_us_min[op]; }
|
||||||
|
|
||||||
|
uint64_t getLatencyUsMax(int op) const noexcept { return latency_us_max[op]; }
|
||||||
|
|
||||||
|
// with 'this' as final aggregation, factor in 'other'
|
||||||
|
void combine(const ThreadStatistics& other) {
|
||||||
|
conflicts += other.conflicts;
|
||||||
|
for (auto op = 0; op < MAX_OP; op++) {
|
||||||
|
ops[op] += other.ops[op];
|
||||||
|
errors[op] += other.errors[op];
|
||||||
|
total_errors += other.errors[op];
|
||||||
|
latency_samples[op] += other.latency_samples[op];
|
||||||
|
latency_us_total[op] += other.latency_us_total[op];
|
||||||
|
if (latency_us_min[op] > other.latency_us_min[op])
|
||||||
|
latency_us_min[op] = other.latency_us_min[op];
|
||||||
|
if (latency_us_max[op] < other.latency_us_max[op])
|
||||||
|
latency_us_max[op] = other.latency_us_max[op];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void incrConflictCount() noexcept { conflicts++; }
|
||||||
|
|
||||||
|
// non-commit write operations aren't measured for time.
|
||||||
|
void incrOpCount(int op) noexcept { ops[op]++; }
|
||||||
|
|
||||||
|
void incrErrorCount(int op) noexcept {
|
||||||
|
total_errors++;
|
||||||
|
errors[op]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void addLatency(int op, timediff_t diff) noexcept {
|
||||||
|
const auto latency_us = toIntegerMicroseconds(diff);
|
||||||
|
latency_samples[op]++;
|
||||||
|
latency_us_total[op] += latency_us;
|
||||||
|
if (latency_us_min[op] > latency_us)
|
||||||
|
latency_us_min[op] = latency_us;
|
||||||
|
if (latency_us_max[op] < latency_us)
|
||||||
|
latency_us_max[op] = latency_us;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
using LatencySampleBinArray = std::array<LatencySampleBin, MAX_OP>;
|
||||||
|
|
||||||
|
} // namespace mako
|
||||||
|
|
||||||
|
#endif /* MAKO_STATS_HPP */
|
|
@ -0,0 +1,77 @@
|
||||||
|
/*
|
||||||
|
* time.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MAKO_TIME_HPP
|
||||||
|
#define MAKO_TIME_HPP
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
namespace mako {
|
||||||
|
|
||||||
|
/* time measurement helpers */
|
||||||
|
using std::chrono::steady_clock;
|
||||||
|
using timepoint_t = decltype(steady_clock::now());
|
||||||
|
using timediff_t = decltype(std::declval<timepoint_t>() - std::declval<timepoint_t>());
|
||||||
|
|
||||||
|
template <typename Duration>
|
||||||
|
double toDoubleSeconds(Duration duration) {
|
||||||
|
return std::chrono::duration_cast<std::chrono::duration<double>>(duration).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Duration>
|
||||||
|
uint64_t toIntegerSeconds(Duration duration) {
|
||||||
|
return std::chrono::duration_cast<std::chrono::duration<uint64_t>>(duration).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Duration>
|
||||||
|
uint64_t toIntegerMicroseconds(Duration duration) {
|
||||||
|
return std::chrono::duration_cast<std::chrono::duration<uint64_t, std::micro>>(duration).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
// timing helpers
|
||||||
|
struct StartAtCtor {};
|
||||||
|
|
||||||
|
class Stopwatch {
|
||||||
|
timepoint_t p1, p2;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Stopwatch() noexcept : p1(), p2() {}
|
||||||
|
Stopwatch(StartAtCtor) noexcept { start(); }
|
||||||
|
Stopwatch(timepoint_t start_time) noexcept : p1(start_time), p2() {}
|
||||||
|
Stopwatch(const Stopwatch&) noexcept = default;
|
||||||
|
Stopwatch& operator=(const Stopwatch&) noexcept = default;
|
||||||
|
timepoint_t getStart() const noexcept { return p1; }
|
||||||
|
timepoint_t getStop() const noexcept { return p2; }
|
||||||
|
void start() noexcept { p1 = steady_clock::now(); }
|
||||||
|
Stopwatch& stop() noexcept {
|
||||||
|
p2 = steady_clock::now();
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
Stopwatch& setStop(timepoint_t p_stop) noexcept {
|
||||||
|
p2 = p_stop;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
void startFromStop() noexcept { p1 = p2; }
|
||||||
|
auto diff() const noexcept { return p2 - p1; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace mako
|
||||||
|
|
||||||
|
#endif /* MAKO_TIME_HPP */
|
|
@ -1,136 +0,0 @@
|
||||||
#include "utils.h"
|
|
||||||
#include "mako.h"
|
|
||||||
#include <math.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
/* uniform-distribution random */
|
|
||||||
int urand(int low, int high) {
|
|
||||||
double r = rand() / (1.0 + RAND_MAX);
|
|
||||||
int range = high - low + 1;
|
|
||||||
return (int)((r * range) + low);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* random string */
|
|
||||||
/* len is the buffer size, must include null */
|
|
||||||
void randstr(char* str, int len) {
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < len - 1; i++) {
|
|
||||||
str[i] = '!' + urand(0, 'z' - '!'); /* generage a char from '!' to 'z' */
|
|
||||||
}
|
|
||||||
str[len - 1] = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
/* random numeric string */
|
|
||||||
/* len is the buffer size, must include null */
|
|
||||||
void randnumstr(char* str, int len) {
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < len - 1; i++) {
|
|
||||||
str[i] = '0' + urand(0, 9); /* generage a char from '!' to 'z' */
|
|
||||||
}
|
|
||||||
str[len - 1] = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
/* return the first key to be inserted */
|
|
||||||
int insert_begin(int rows, int p_idx, int t_idx, int total_p, int total_t) {
|
|
||||||
double interval = (double)rows / total_p / total_t;
|
|
||||||
return (int)(round(interval * ((p_idx * total_t) + t_idx)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* return the last key to be inserted */
|
|
||||||
int insert_end(int rows, int p_idx, int t_idx, int total_p, int total_t) {
|
|
||||||
double interval = (double)rows / total_p / total_t;
|
|
||||||
return (int)(round(interval * ((p_idx * total_t) + t_idx + 1) - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* devide val equally among threads */
|
|
||||||
int compute_thread_portion(int val, int p_idx, int t_idx, int total_p, int total_t) {
|
|
||||||
int interval = val / total_p / total_t;
|
|
||||||
int remaining = val - (interval * total_p * total_t);
|
|
||||||
if ((p_idx * total_t + t_idx) < remaining) {
|
|
||||||
return interval + 1;
|
|
||||||
} else if (interval == 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
/* else */
|
|
||||||
return interval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* number of digits */
|
|
||||||
int digits(int num) {
|
|
||||||
int digits = 0;
|
|
||||||
while (num > 0) {
|
|
||||||
num /= 10;
|
|
||||||
digits++;
|
|
||||||
}
|
|
||||||
return digits;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* generate a key for a given key number */
|
|
||||||
/* prefix is "mako" by default, prefixpadding = 1 means 'x' will be in front rather than trailing the keyname */
|
|
||||||
/* len is the buffer size, key length + null */
|
|
||||||
void genkey(char* str, char* prefix, int prefixlen, int prefixpadding, int num, int rows, int len) {
|
|
||||||
const int rowdigit = digits(rows);
|
|
||||||
const int prefixoffset = prefixpadding ? len - (prefixlen + rowdigit) - 1 : 0;
|
|
||||||
char* prefixstr = (char*)alloca(sizeof(char) * (prefixlen + rowdigit + 1));
|
|
||||||
snprintf(prefixstr, prefixlen + rowdigit + 1, "%s%0.*d", prefix, rowdigit, num);
|
|
||||||
memset(str, 'x', len);
|
|
||||||
memcpy(str + prefixoffset, prefixstr, prefixlen + rowdigit);
|
|
||||||
str[len - 1] = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This is another sorting algorithm used to calculate latency parameters */
|
|
||||||
/* We moved from radix sort to quick sort to avoid extra space used in radix sort */
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
uint64_t get_max(uint64_t arr[], int n) {
|
|
||||||
uint64_t mx = arr[0];
|
|
||||||
for (int i = 1; i < n; i++) {
|
|
||||||
if (arr[i] > mx) {
|
|
||||||
mx = arr[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return mx;
|
|
||||||
}
|
|
||||||
|
|
||||||
void bucket_data(uint64_t arr[], int n, uint64_t exp) {
|
|
||||||
// uint64_t output[n];
|
|
||||||
int i, count[10] = { 0 };
|
|
||||||
uint64_t* output = (uint64_t*)malloc(sizeof(uint64_t) * n);
|
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
count[(arr[i] / exp) % 10]++;
|
|
||||||
}
|
|
||||||
for (i = 1; i < 10; i++) {
|
|
||||||
count[i] += count[i - 1];
|
|
||||||
}
|
|
||||||
for (i = n - 1; i >= 0; i--) {
|
|
||||||
output[count[(arr[i] / exp) % 10] - 1] = arr[i];
|
|
||||||
count[(arr[i] / exp) % 10]--;
|
|
||||||
}
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
arr[i] = output[i];
|
|
||||||
}
|
|
||||||
free(output);
|
|
||||||
}
|
|
||||||
|
|
||||||
// The main function is to sort arr[] of size n using Radix Sort
|
|
||||||
void radix_sort(uint64_t* arr, int n) {
|
|
||||||
// Find the maximum number to know number of digits
|
|
||||||
uint64_t m = get_max(arr, n);
|
|
||||||
for (uint64_t exp = 1; m / exp > 0; exp *= 10) bucket_data(arr, n, exp);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int compare(const void* a, const void* b) {
|
|
||||||
const uint64_t* da = (const uint64_t*)a;
|
|
||||||
const uint64_t* db = (const uint64_t*)b;
|
|
||||||
|
|
||||||
return (*da > *db) - (*da < *db);
|
|
||||||
}
|
|
||||||
|
|
||||||
// The main function is to sort arr[] of size n using Quick Sort
|
|
||||||
void quick_sort(uint64_t* arr, int n) {
|
|
||||||
qsort(arr, n, sizeof(uint64_t), compare);
|
|
||||||
}
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* utils.cpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "utils.hpp"
|
||||||
|
#include "mako.hpp"
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
namespace mako {
|
||||||
|
|
||||||
|
/* return the last key to be inserted */
|
||||||
|
/* devide val equally among threads */
|
||||||
|
int computeThreadPortion(int val, int p_idx, int t_idx, int total_p, int total_t) {
|
||||||
|
int interval = val / total_p / total_t;
|
||||||
|
int remaining = val - (interval * total_p * total_t);
|
||||||
|
if ((p_idx * total_t + t_idx) < remaining) {
|
||||||
|
return interval + 1;
|
||||||
|
} else if (interval == 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
/* else */
|
||||||
|
return interval;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* number of digits */
|
||||||
|
int digits(int num) {
|
||||||
|
int digits = 0;
|
||||||
|
while (num > 0) {
|
||||||
|
num /= 10;
|
||||||
|
digits++;
|
||||||
|
}
|
||||||
|
return digits;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mako
|
|
@ -1,65 +0,0 @@
|
||||||
#ifndef UTILS_H
|
|
||||||
#define UTILS_H
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
/* uniform-distribution random */
|
|
||||||
/* return a uniform random number between low and high, both inclusive */
|
|
||||||
int urand(int low, int high);
|
|
||||||
|
|
||||||
/* write a random string of the length of (len-1) to memory pointed by str
|
|
||||||
* with a null-termination character at str[len-1].
|
|
||||||
*/
|
|
||||||
void randstr(char* str, int len);
|
|
||||||
|
|
||||||
/* write a random numeric string of the length of (len-1) to memory pointed by str
|
|
||||||
* with a null-termination character at str[len-1].
|
|
||||||
*/
|
|
||||||
void randnumstr(char* str, int len);
|
|
||||||
|
|
||||||
/* given the total number of rows to be inserted,
|
|
||||||
* the worker process index p_idx and the thread index t_idx (both 0-based),
|
|
||||||
* and the total number of processes, total_p, and threads, total_t,
|
|
||||||
* returns the first row number assigned to this partition.
|
|
||||||
*/
|
|
||||||
int insert_begin(int rows, int p_idx, int t_idx, int total_p, int total_t);
|
|
||||||
|
|
||||||
/* similar to insert_begin, insert_end returns the last row numer */
|
|
||||||
int insert_end(int rows, int p_idx, int t_idx, int total_p, int total_t);
|
|
||||||
|
|
||||||
/* devide a value equally among threads */
|
|
||||||
int compute_thread_portion(int val, int p_idx, int t_idx, int total_p, int total_t);
|
|
||||||
|
|
||||||
/* similar to insert_begin/end, compute_thread_tps computes
|
|
||||||
* the per-thread target TPS for given configuration.
|
|
||||||
*/
|
|
||||||
#define compute_thread_tps(val, p_idx, t_idx, total_p, total_t) \
|
|
||||||
compute_thread_portion(val, p_idx, t_idx, total_p, total_t)
|
|
||||||
|
|
||||||
/* similar to compute_thread_tps,
|
|
||||||
* compute_thread_iters computs the number of iterations.
|
|
||||||
*/
|
|
||||||
#define compute_thread_iters(val, p_idx, t_idx, total_p, total_t) \
|
|
||||||
compute_thread_portion(val, p_idx, t_idx, total_p, total_t)
|
|
||||||
|
|
||||||
/* get the number of digits */
|
|
||||||
int digits(int num);
|
|
||||||
|
|
||||||
/* generate a key for a given key number */
|
|
||||||
/* prefix is "mako" by default, prefixpadding = 1 means 'x' will be in front rather than trailing the keyname */
|
|
||||||
/* len is the buffer size, key length + null */
|
|
||||||
void genkey(char* str, char* prefix, int prefixlen, int prefixpadding, int num, int rows, int len);
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
// The main function is to sort arr[] of size n using Radix Sort
|
|
||||||
void radix_sort(uint64_t arr[], int n);
|
|
||||||
void bucket_data(uint64_t arr[], int n, uint64_t exp);
|
|
||||||
uint64_t get_max(uint64_t arr[], int n);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// The main function is to sort arr[] of size n using Quick Sort
|
|
||||||
void quick_sort(uint64_t arr[], int n);
|
|
||||||
int compare(const void* a, const void* b);
|
|
||||||
|
|
||||||
#endif /* UTILS_H */
|
|
|
@ -0,0 +1,195 @@
|
||||||
|
/*
|
||||||
|
* utils.hpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef UTILS_HPP
|
||||||
|
#define UTILS_HPP
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "macro.hpp"
|
||||||
|
#include "mako.hpp"
|
||||||
|
#include "fdbclient/zipf.h"
|
||||||
|
#include <cassert>
|
||||||
|
#include <chrono>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <string_view>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
namespace mako {
|
||||||
|
|
||||||
|
/* uniform-distribution random */
|
||||||
|
/* return a uniform random number between low and high, both inclusive */
|
||||||
|
force_inline int urand(int low, int high) {
|
||||||
|
double r = rand() / (1.0 + RAND_MAX);
|
||||||
|
int range = high - low + 1;
|
||||||
|
return (int)((r * range) + low);
|
||||||
|
}
|
||||||
|
|
||||||
|
force_inline int nextKey(Arguments const& args) {
|
||||||
|
if (args.zipf)
|
||||||
|
return zipfian_next();
|
||||||
|
return urand(0, args.rows - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
force_inline int intSize(std::string_view sv) {
|
||||||
|
return static_cast<int>(sv.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* random string */
|
||||||
|
template <typename Char>
|
||||||
|
force_inline void randomString(Char* str, int len) {
|
||||||
|
assert(len >= 0);
|
||||||
|
for (auto i = 0; i < len; i++) {
|
||||||
|
str[i] = ('!' + urand(0, 'z' - '!')); /* generate a char from '!' to 'z' */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* given the total number of rows to be inserted,
|
||||||
|
* the worker process index p_idx and the thread index t_idx (both 0-based),
|
||||||
|
* and the total number of processes, total_p, and threads, total_t,
|
||||||
|
* returns the first row number assigned to this partition.
|
||||||
|
*/
|
||||||
|
force_inline int insertBegin(int rows, int p_idx, int t_idx, int total_p, int total_t) {
|
||||||
|
double interval = (double)rows / total_p / total_t;
|
||||||
|
return (int)(round(interval * ((p_idx * total_t) + t_idx)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* similar to insertBegin, insertEnd returns the last row numer */
|
||||||
|
force_inline int insertEnd(int rows, int p_idx, int t_idx, int total_p, int total_t) {
|
||||||
|
double interval = (double)rows / total_p / total_t;
|
||||||
|
return (int)(round(interval * ((p_idx * total_t) + t_idx + 1) - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* devide a value equally among threads */
|
||||||
|
int computeThreadPortion(int val, int p_idx, int t_idx, int total_p, int total_t);
|
||||||
|
|
||||||
|
/* similar to insertBegin/end, computeThreadTps computes
|
||||||
|
* the per-thread target TPS for given configuration.
|
||||||
|
*/
|
||||||
|
#define computeThreadTps(val, p_idx, t_idx, total_p, total_t) computeThreadPortion(val, p_idx, t_idx, total_p, total_t)
|
||||||
|
|
||||||
|
/* similar to computeThreadTps,
|
||||||
|
* computeThreadIters computs the number of iterations.
|
||||||
|
*/
|
||||||
|
#define computeThreadIters(val, p_idx, t_idx, total_p, total_t) \
|
||||||
|
computeThreadPortion(val, p_idx, t_idx, total_p, total_t)
|
||||||
|
|
||||||
|
/* get the number of digits */
|
||||||
|
int digits(int num);
|
||||||
|
|
||||||
|
/* fill memory slice [str, str + len) as stringified, zero-padded num */
|
||||||
|
template <typename Char>
|
||||||
|
force_inline void numericWithFill(Char* str, int len, int num) {
|
||||||
|
static_assert(sizeof(Char) == 1);
|
||||||
|
assert(num >= 0);
|
||||||
|
memset(str, '0', len);
|
||||||
|
for (auto i = len - 1; num > 0 && i >= 0; i--, num /= 10) {
|
||||||
|
str[i] = (num % 10) + '0';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* generate a key for a given key number */
|
||||||
|
/* prefix is "mako" by default, prefixpadding = 1 means 'x' will be in front rather than trailing the keyname */
|
||||||
|
template <typename Char>
|
||||||
|
void genKey(Char* str, std::string_view prefix, Arguments const& args, int num) {
|
||||||
|
static_assert(sizeof(Char) == 1);
|
||||||
|
memset(str, 'x', args.key_length);
|
||||||
|
const auto prefix_len = static_cast<int>(prefix.size());
|
||||||
|
auto pos = args.prefixpadding ? (args.key_length - prefix_len - args.row_digits) : 0;
|
||||||
|
memcpy(&str[pos], prefix.data(), prefix_len);
|
||||||
|
pos += prefix_len;
|
||||||
|
numericWithFill(&str[pos], args.row_digits, num);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Char>
|
||||||
|
force_inline void prepareKeys(int op,
|
||||||
|
std::basic_string<Char>& key1,
|
||||||
|
std::basic_string<Char>& key2,
|
||||||
|
Arguments const& args) {
|
||||||
|
const auto key1_num = nextKey(args);
|
||||||
|
genKey(key1.data(), KEY_PREFIX, args, key1_num);
|
||||||
|
if (args.txnspec.ops[op][OP_RANGE] > 0) {
|
||||||
|
const auto key2_num = std::min(key1_num + args.txnspec.ops[op][OP_RANGE] - 1, args.rows - 1);
|
||||||
|
genKey(key2.data(), KEY_PREFIX, args, key2_num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// invoke user-provided callable when object goes out of scope.
|
||||||
|
template <typename Func>
|
||||||
|
class ExitGuard {
|
||||||
|
std::decay_t<Func> fn;
|
||||||
|
|
||||||
|
public:
|
||||||
|
ExitGuard(Func&& fn) : fn(std::forward<Func>(fn)) {}
|
||||||
|
|
||||||
|
~ExitGuard() { fn(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
// invoke user-provided callable when stack unwinds by exception.
|
||||||
|
template <typename Func>
|
||||||
|
class FailGuard {
|
||||||
|
std::decay_t<Func> fn;
|
||||||
|
|
||||||
|
public:
|
||||||
|
FailGuard(Func&& fn) : fn(std::forward<Func>(fn)) {}
|
||||||
|
|
||||||
|
~FailGuard() {
|
||||||
|
if (std::uncaught_exceptions()) {
|
||||||
|
fn();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// trace helpers
|
||||||
|
constexpr const int STATS_TITLE_WIDTH = 12;
|
||||||
|
constexpr const int STATS_FIELD_WIDTH = 12;
|
||||||
|
|
||||||
|
template <typename Value>
|
||||||
|
void putTitle(Value&& value) {
|
||||||
|
fmt::print("{0: <{1}} ", std::forward<Value>(value), STATS_TITLE_WIDTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Value>
|
||||||
|
void putTitleRight(Value&& value) {
|
||||||
|
fmt::print("{0: >{1}} ", std::forward<Value>(value), STATS_TITLE_WIDTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void putTitleBar() {
|
||||||
|
fmt::print("{0:=<{1}} ", "", STATS_TITLE_WIDTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Value>
|
||||||
|
void putField(Value&& value) {
|
||||||
|
fmt::print("{0: >{1}} ", std::forward<Value>(value), STATS_FIELD_WIDTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void putFieldBar() {
|
||||||
|
fmt::print("{0:=>{1}} ", "", STATS_FIELD_WIDTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Value>
|
||||||
|
void putFieldFloat(Value&& value, int precision) {
|
||||||
|
fmt::print("{0: >{1}.{2}f} ", std::forward<Value>(value), STATS_FIELD_WIDTH, precision);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mako
|
||||||
|
|
||||||
|
#endif /* UTILS_HPP */
|
|
@ -21,22 +21,39 @@ endif()
|
||||||
include(CheckSymbolExists)
|
include(CheckSymbolExists)
|
||||||
|
|
||||||
set(DISABLE_TLS OFF CACHE BOOL "Don't try to find OpenSSL and always build without TLS support")
|
set(DISABLE_TLS OFF CACHE BOOL "Don't try to find OpenSSL and always build without TLS support")
|
||||||
|
set(USE_WOLFSSL OFF CACHE BOOL "Build against WolfSSL instead of OpenSSL")
|
||||||
|
set(USE_OPENSSL ON CACHE BOOL "Build against OpenSSL")
|
||||||
if(DISABLE_TLS)
|
if(DISABLE_TLS)
|
||||||
set(WITH_TLS OFF)
|
set(WITH_TLS OFF)
|
||||||
else()
|
else()
|
||||||
set(OPENSSL_USE_STATIC_LIBS TRUE)
|
if(USE_WOLFSSL)
|
||||||
if(WIN32)
|
set(WOLFSSL_USE_STATIC_LIBS TRUE)
|
||||||
set(OPENSSL_MSVC_STATIC_RT ON)
|
find_package(WolfSSL)
|
||||||
endif()
|
if(WOLFSSL_FOUND)
|
||||||
find_package(OpenSSL)
|
set(CMAKE_REQUIRED_INCLUDES ${WOLFSSL_INCLUDE_DIR})
|
||||||
if(OPENSSL_FOUND)
|
set(WITH_TLS ON)
|
||||||
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
|
add_compile_options(-DHAVE_OPENSSL)
|
||||||
set(WITH_TLS ON)
|
add_compile_options(-DHAVE_WOLFSSL)
|
||||||
add_compile_options(-DHAVE_OPENSSL)
|
else()
|
||||||
else()
|
message(STATUS "WolfSSL was not found - Will compile without TLS Support")
|
||||||
message(STATUS "OpenSSL was not found - Will compile without TLS Support")
|
message(STATUS "You can set WOLFSSL_ROOT_DIR to help cmake find it")
|
||||||
message(STATUS "You can set OPENSSL_ROOT_DIR to help cmake find it")
|
set(WITH_TLS OFF)
|
||||||
set(WITH_TLS OFF)
|
endif()
|
||||||
|
elseif(USE_OPENSSL)
|
||||||
|
set(OPENSSL_USE_STATIC_LIBS TRUE)
|
||||||
|
if(WIN32)
|
||||||
|
set(OPENSSL_MSVC_STATIC_RT ON)
|
||||||
|
endif()
|
||||||
|
find_package(OpenSSL)
|
||||||
|
if(OPENSSL_FOUND)
|
||||||
|
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
|
||||||
|
set(WITH_TLS ON)
|
||||||
|
add_compile_options(-DHAVE_OPENSSL)
|
||||||
|
else()
|
||||||
|
message(STATUS "OpenSSL was not found - Will compile without TLS Support")
|
||||||
|
message(STATUS "You can set OPENSSL_ROOT_DIR to help cmake find it")
|
||||||
|
set(WITH_TLS OFF)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -198,7 +198,7 @@ function(fdb_configure_and_install)
|
||||||
string(TOLOWER "${pkg}" package)
|
string(TOLOWER "${pkg}" package)
|
||||||
string(TOUPPER "${IN_DESTINATION}" destination)
|
string(TOUPPER "${IN_DESTINATION}" destination)
|
||||||
get_install_dest(${pkg} INCLUDE INCLUDE_DIR)
|
get_install_dest(${pkg} INCLUDE INCLUDE_DIR)
|
||||||
get_install_dest(${pkg} INCLUDE LIB_DIR)
|
get_install_dest(${pkg} LIB LIB_DIR)
|
||||||
get_install_dest(${pkg} ${destination} install_path)
|
get_install_dest(${pkg} ${destination} install_path)
|
||||||
string(REGEX REPLACE "\.in$" "" name "${IN_FILE}")
|
string(REGEX REPLACE "\.in$" "" name "${IN_FILE}")
|
||||||
get_filename_component(name "${name}" NAME)
|
get_filename_component(name "${name}" NAME)
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
# FindWolfSSL
|
||||||
|
|
||||||
|
# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES
|
||||||
|
if(WOLFSSL_USE_STATIC_LIBS)
|
||||||
|
if(WIN32)
|
||||||
|
set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
|
||||||
|
else()
|
||||||
|
set(CMAKE_FIND_LIBRARY_SUFFIXES .a)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
find_path(WOLFSSL_ROOT_DIR
|
||||||
|
NAMES
|
||||||
|
include/wolfssl/options.h
|
||||||
|
)
|
||||||
|
|
||||||
|
find_path(WOLFSSL_INCLUDE_DIR
|
||||||
|
NAMES
|
||||||
|
wolfssl/ssl.h
|
||||||
|
PATHS
|
||||||
|
${WOLFSSL_ROOT_DIR}/include
|
||||||
|
)
|
||||||
|
|
||||||
|
find_library(WOLFSSL_LIBRARY
|
||||||
|
NAMES
|
||||||
|
wolfssl
|
||||||
|
PATHS
|
||||||
|
${WOLFSSL_ROOT_DIR}/lib
|
||||||
|
)
|
||||||
|
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
find_package_handle_standard_args(WolfSSL
|
||||||
|
REQUIRED_VARS
|
||||||
|
WOLFSSL_LIBRARY
|
||||||
|
WOLFSSL_INCLUDE_DIR
|
||||||
|
FAIL_MESSAGE
|
||||||
|
"Could NOT find WolfSSL"
|
||||||
|
)
|
||||||
|
|
||||||
|
mark_as_advanced(
|
||||||
|
WOLFSSL_ROOT_DIR
|
||||||
|
WOLFSSL_LIBRARY
|
||||||
|
WOLFSSL_INCLUDE_DIR
|
||||||
|
)
|
||||||
|
|
||||||
|
if(WOLFSSL_FOUND)
|
||||||
|
message(STATUS "Found wolfssl library: ${WOLFSSL_LIBRARY}")
|
||||||
|
message(STATUS "Found wolfssl includes: ${WOLFSSL_INCLUDE_DIR}")
|
||||||
|
|
||||||
|
set(WOLFSSL_INCLUDE_DIRS ${WOLFSSL_INCLUDE_DIR})
|
||||||
|
set(WOLFSSL_LIBRARIES ${WOLFSSL_LIBRARY})
|
||||||
|
|
||||||
|
add_library(WolfSSL UNKNOWN IMPORTED GLOBAL)
|
||||||
|
add_library(OpenSSL::SSL ALIAS WolfSSL)
|
||||||
|
add_library(OpenSSL::CRYPTO ALIAS WolfSSL)
|
||||||
|
|
||||||
|
target_include_directories(WolfSSL INTERFACE "${WOLFSSL_INCLUDE_DIR}")
|
||||||
|
target_link_libraries(WolfSSL INTERFACE "${WOLFSSL_TLS_LIBRARY}" "${WOLFSSL_SSL_LIBRARY}" "${WOLFSSL_CRYPTO_LIBRARY}")
|
||||||
|
set_target_properties(WolfSSL PROPERTIES
|
||||||
|
INTERFACE_INCLUDE_DIRECTORIES "${WOLFSSL_INCLUDE_DIR}"
|
||||||
|
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
|
||||||
|
IMPORTED_LOCATION "${WOLFSSL_LIBRARY}")
|
||||||
|
endif()
|
|
@ -48,29 +48,25 @@
|
||||||
---
|
---
|
||||||
# name: test_execstack_permissions_libfdb_c[centos-versioned]
|
# name: test_execstack_permissions_libfdb_c[centos-versioned]
|
||||||
'
|
'
|
||||||
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
|
- /lib64/libfdb_c.so
|
||||||
0x0000000000000000 0x0000000000000000 RW 0x0
|
|
||||||
|
|
||||||
'
|
'
|
||||||
---
|
---
|
||||||
# name: test_execstack_permissions_libfdb_c[centos]
|
# name: test_execstack_permissions_libfdb_c[centos]
|
||||||
'
|
'
|
||||||
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
|
- /lib64/libfdb_c.so
|
||||||
0x0000000000000000 0x0000000000000000 RW 0x0
|
|
||||||
|
|
||||||
'
|
'
|
||||||
---
|
---
|
||||||
# name: test_execstack_permissions_libfdb_c[ubuntu-versioned]
|
# name: test_execstack_permissions_libfdb_c[ubuntu-versioned]
|
||||||
'
|
'
|
||||||
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
|
- /lib/libfdb_c.so
|
||||||
0x0000000000000000 0x0000000000000000 RW 0x0
|
|
||||||
|
|
||||||
'
|
'
|
||||||
---
|
---
|
||||||
# name: test_execstack_permissions_libfdb_c[ubuntu]
|
# name: test_execstack_permissions_libfdb_c[ubuntu]
|
||||||
'
|
'
|
||||||
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
|
- /lib/libfdb_c.so
|
||||||
0x0000000000000000 0x0000000000000000 RW 0x0
|
|
||||||
|
|
||||||
'
|
'
|
||||||
---
|
---
|
||||||
|
|
|
@ -22,6 +22,7 @@ import pathlib
|
||||||
import pytest
|
import pytest
|
||||||
import shlex
|
import shlex
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import sys
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from typing import Iterator, List, Optional, Union
|
from typing import Iterator, List, Optional, Union
|
||||||
|
@ -29,9 +30,14 @@ from typing import Iterator, List, Optional, Union
|
||||||
|
|
||||||
def run(args: List[str]) -> str:
|
def run(args: List[str]) -> str:
|
||||||
print("$ {}".format(" ".join(map(shlex.quote, args))))
|
print("$ {}".format(" ".join(map(shlex.quote, args))))
|
||||||
result = subprocess.check_output(args).decode("utf-8")
|
result = []
|
||||||
print(result, end="")
|
proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||||
return result
|
while proc.poll() is None:
|
||||||
|
text = proc.stdout.readline().decode("utf-8")
|
||||||
|
result.append(text)
|
||||||
|
sys.stdout.write(text)
|
||||||
|
assert proc.returncode == 0
|
||||||
|
return "".join(result)
|
||||||
|
|
||||||
|
|
||||||
class Image:
|
class Image:
|
||||||
|
@ -106,7 +112,16 @@ def ubuntu_image_with_fdb_helper(versioned: bool) -> Iterator[Optional[Image]]:
|
||||||
for deb in debs:
|
for deb in debs:
|
||||||
container.copy_to(deb, "/opt")
|
container.copy_to(deb, "/opt")
|
||||||
container.run(["bash", "-c", "apt-get update"])
|
container.run(["bash", "-c", "apt-get update"])
|
||||||
container.run(["bash", "-c", "apt-get install --yes binutils"]) # this is for testing libfdb_c execstack permissions
|
container.run(
|
||||||
|
["bash", "-c", "apt-get install --yes execstack"]
|
||||||
|
) # this is for testing libfdb_c execstack permissions
|
||||||
|
container.run(
|
||||||
|
[
|
||||||
|
"bash",
|
||||||
|
"-c",
|
||||||
|
"DEBIAN_FRONTEND=noninteractive DEBCONF_NONINTERACTIVE_SEEN=true apt-get install --yes gcc pkg-config cmake",
|
||||||
|
]
|
||||||
|
) # this is for testing building client apps
|
||||||
container.run(["bash", "-c", "dpkg -i /opt/*.deb"])
|
container.run(["bash", "-c", "dpkg -i /opt/*.deb"])
|
||||||
container.run(["bash", "-c", "rm /opt/*.deb"])
|
container.run(["bash", "-c", "rm /opt/*.deb"])
|
||||||
image = container.commit()
|
image = container.commit()
|
||||||
|
@ -151,7 +166,12 @@ def centos_image_with_fdb_helper(versioned: bool) -> Iterator[Optional[Image]]:
|
||||||
for rpm in rpms:
|
for rpm in rpms:
|
||||||
container.copy_to(rpm, "/opt")
|
container.copy_to(rpm, "/opt")
|
||||||
container.run(["bash", "-c", "yum update -y"])
|
container.run(["bash", "-c", "yum update -y"])
|
||||||
container.run(["bash", "-c", "yum install -y binutils"]) # this is for testing libfdb_c execstack permissions
|
container.run(
|
||||||
|
["bash", "-c", "yum install -y prelink"]
|
||||||
|
) # this is for testing libfdb_c execstack permissions
|
||||||
|
container.run(
|
||||||
|
["bash", "-c", "yum install -y gcc pkg-config cmake make"]
|
||||||
|
) # this is for testing building client apps
|
||||||
container.run(["bash", "-c", "yum install -y /opt/*.rpm"])
|
container.run(["bash", "-c", "yum install -y /opt/*.rpm"])
|
||||||
container.run(["bash", "-c", "rm /opt/*.rpm"])
|
container.run(["bash", "-c", "rm /opt/*.rpm"])
|
||||||
image = container.commit()
|
image = container.commit()
|
||||||
|
@ -232,6 +252,70 @@ def test_db_available(linux_container: Container):
|
||||||
linux_container.run(["fdbcli", "--exec", "get x"])
|
linux_container.run(["fdbcli", "--exec", "get x"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_client_app(linux_container: Container):
|
||||||
|
test_client_app_script = r"""#!/bin/bash
|
||||||
|
|
||||||
|
set -euxo pipefail
|
||||||
|
|
||||||
|
cat > app.c << EOF
|
||||||
|
// FDB_API_VERSION doesn't necessarily need to be kept up to date here
|
||||||
|
#define FDB_API_VERSION 700
|
||||||
|
#include <foundationdb/fdb_c.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
static void check(fdb_error_t e) {
|
||||||
|
if (e) {
|
||||||
|
fprintf(stderr, "%s\n", fdb_get_error(e));
|
||||||
|
fflush(NULL);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int result = 0;
|
||||||
|
|
||||||
|
static void callback(FDBFuture* f, void* _ignored) {
|
||||||
|
check(fdb_stop_network());
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
check(fdb_select_api_version(700));
|
||||||
|
check(fdb_setup_network());
|
||||||
|
FDBDatabase* db;
|
||||||
|
check(fdb_create_database(NULL, &db));
|
||||||
|
FDBTransaction* tr;
|
||||||
|
check(fdb_database_create_transaction(db, &tr));
|
||||||
|
FDBFuture* f = fdb_transaction_get_read_version(tr);
|
||||||
|
check(fdb_future_set_callback(f, callback, NULL));
|
||||||
|
check(fdb_run_network());
|
||||||
|
fdb_future_destroy(f);
|
||||||
|
fdb_transaction_destroy(tr);
|
||||||
|
fdb_database_destroy(db);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cc app.c `pkg-config foundationdb-client --cflags --libs`
|
||||||
|
./a.out
|
||||||
|
|
||||||
|
cat > CMakeLists.txt << EOF
|
||||||
|
project(app C)
|
||||||
|
find_package(FoundationDB-Client REQUIRED)
|
||||||
|
add_executable(app app.c)
|
||||||
|
target_link_libraries(app PRIVATE fdb_c)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake ..
|
||||||
|
make
|
||||||
|
./app
|
||||||
|
|
||||||
|
"""
|
||||||
|
linux_container.run(["bash", "-c", test_client_app_script])
|
||||||
|
|
||||||
|
|
||||||
def test_write(linux_container: Container, snapshot):
|
def test_write(linux_container: Container, snapshot):
|
||||||
linux_container.run(["fdbcli", "--exec", "writemode on; set x y"])
|
linux_container.run(["fdbcli", "--exec", "writemode on; set x y"])
|
||||||
assert snapshot == linux_container.run(["fdbcli", "--exec", "get x"])
|
assert snapshot == linux_container.run(["fdbcli", "--exec", "get x"])
|
||||||
|
@ -243,7 +327,7 @@ def test_execstack_permissions_libfdb_c(linux_container: Container, snapshot):
|
||||||
[
|
[
|
||||||
"bash",
|
"bash",
|
||||||
"-c",
|
"-c",
|
||||||
"readelf -l $(ldconfig -p | grep libfdb_c | awk '{print $(NF)}') | grep -A1 GNU_STACK",
|
"execstack -q $(ldconfig -p | grep libfdb_c | awk '{print $(NF)}')",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -148,7 +148,7 @@ is equivalent to something like:
|
||||||
tr.set(Tuple.from("class", "class1").pack(), encodeInt(100));
|
tr.set(Tuple.from("class", "class1").pack(), encodeInt(100));
|
||||||
t.commit().join();
|
t.commit().join();
|
||||||
} catch (RuntimeException e) {
|
} catch (RuntimeException e) {
|
||||||
t = t.onError(e).get();
|
t = t.onError(e).join();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -290,10 +290,10 @@ This is easy -- we simply add a condition to check that the value is non-zero. L
|
||||||
private static void signup(TransactionContext db, final String s, final String c) {
|
private static void signup(TransactionContext db, final String s, final String c) {
|
||||||
db.run((Transaction tr) -> {
|
db.run((Transaction tr) -> {
|
||||||
byte[] rec = Tuple.from("attends", s, c).pack();
|
byte[] rec = Tuple.from("attends", s, c).pack();
|
||||||
if (tr.get(rec).get() != null)
|
if (tr.get(rec).join() != null)
|
||||||
return null; // already signed up
|
return null; // already signed up
|
||||||
|
|
||||||
int seatsLeft = decodeInt(tr.get(Tuple.from("class", c).pack()).get());
|
int seatsLeft = decodeInt(tr.get(Tuple.from("class", c).pack()).join());
|
||||||
if (seatsLeft == 0)
|
if (seatsLeft == 0)
|
||||||
throw new IllegalStateException("No remaining seats");
|
throw new IllegalStateException("No remaining seats");
|
||||||
|
|
||||||
|
|
|
@ -1189,7 +1189,6 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
||||||
ClusterConnectionFile::lookupClusterFileName(opt.clusterFile);
|
ClusterConnectionFile::lookupClusterFileName(opt.clusterFile);
|
||||||
try {
|
try {
|
||||||
ccf = makeReference<ClusterConnectionFile>(resolvedClusterFile.first);
|
ccf = makeReference<ClusterConnectionFile>(resolvedClusterFile.first);
|
||||||
wait(ccf->resolveHostnames());
|
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_operation_cancelled) {
|
if (e.code() == error_code_operation_cancelled) {
|
||||||
throw;
|
throw;
|
||||||
|
|
|
@ -28,28 +28,46 @@
|
||||||
|
|
||||||
#include "fdbclient/CoordinationInterface.h"
|
#include "fdbclient/CoordinationInterface.h"
|
||||||
|
|
||||||
// Determine public IP address by calling the first coordinator.
|
// Determine public IP address by calling the first available coordinator.
|
||||||
|
// If fail connecting all coordinators, throw bind_failed().
|
||||||
IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) {
|
IPAddress determinePublicIPAutomatically(ClusterConnectionString& ccs) {
|
||||||
try {
|
int size = ccs.coordinators().size() + ccs.hostnames.size();
|
||||||
using namespace boost::asio;
|
int index = 0;
|
||||||
|
loop {
|
||||||
|
try {
|
||||||
|
using namespace boost::asio;
|
||||||
|
|
||||||
io_service ioService;
|
io_service ioService;
|
||||||
ip::udp::socket socket(ioService);
|
ip::udp::socket socket(ioService);
|
||||||
|
|
||||||
ccs.resolveHostnamesBlocking();
|
NetworkAddress coordAddr;
|
||||||
const auto& coordAddr = ccs.coordinators()[0];
|
// Try coords first, because they don't need to be resolved.
|
||||||
const auto boostIp = coordAddr.ip.isV6() ? ip::address(ip::address_v6(coordAddr.ip.toV6()))
|
if (index < ccs.coordinators().size()) {
|
||||||
: ip::address(ip::address_v4(coordAddr.ip.toV4()));
|
coordAddr = ccs.coordinators()[index];
|
||||||
|
} else {
|
||||||
|
Hostname& h = ccs.hostnames[index - ccs.coordinators().size()];
|
||||||
|
Optional<NetworkAddress> resolvedAddr = h.resolveBlocking();
|
||||||
|
if (!resolvedAddr.present()) {
|
||||||
|
throw lookup_failed();
|
||||||
|
}
|
||||||
|
coordAddr = resolvedAddr.get();
|
||||||
|
}
|
||||||
|
const auto boostIp = coordAddr.ip.isV6() ? ip::address(ip::address_v6(coordAddr.ip.toV6()))
|
||||||
|
: ip::address(ip::address_v4(coordAddr.ip.toV4()));
|
||||||
|
|
||||||
ip::udp::endpoint endpoint(boostIp, coordAddr.port);
|
ip::udp::endpoint endpoint(boostIp, coordAddr.port);
|
||||||
socket.connect(endpoint);
|
socket.connect(endpoint);
|
||||||
IPAddress ip = coordAddr.ip.isV6() ? IPAddress(socket.local_endpoint().address().to_v6().to_bytes())
|
IPAddress ip = coordAddr.ip.isV6() ? IPAddress(socket.local_endpoint().address().to_v6().to_bytes())
|
||||||
: IPAddress(socket.local_endpoint().address().to_v4().to_ulong());
|
: IPAddress(socket.local_endpoint().address().to_v4().to_ulong());
|
||||||
socket.close();
|
socket.close();
|
||||||
|
|
||||||
return ip;
|
return ip;
|
||||||
} catch (boost::system::system_error e) {
|
} catch (...) {
|
||||||
fprintf(stderr, "Error determining public address: %s\n", e.what());
|
++index;
|
||||||
throw bind_failed();
|
if (index == size) {
|
||||||
|
fprintf(stderr, "Error determining public address.\n");
|
||||||
|
throw bind_failed();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,7 +65,6 @@ set(FDBCLIENT_SRCS
|
||||||
GlobalConfig.actor.cpp
|
GlobalConfig.actor.cpp
|
||||||
GrvProxyInterface.h
|
GrvProxyInterface.h
|
||||||
HighContentionPrefixAllocator.actor.h
|
HighContentionPrefixAllocator.actor.h
|
||||||
HTTP.actor.cpp
|
|
||||||
IClientApi.h
|
IClientApi.h
|
||||||
IConfigTransaction.cpp
|
IConfigTransaction.cpp
|
||||||
IConfigTransaction.h
|
IConfigTransaction.h
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "fdbclient/Knobs.h"
|
#include "fdbclient/Knobs.h"
|
||||||
#include "fdbclient/FDBTypes.h"
|
#include "fdbclient/FDBTypes.h"
|
||||||
#include "fdbclient/SystemData.h"
|
#include "fdbclient/SystemData.h"
|
||||||
|
#include "fdbclient/Tenant.h"
|
||||||
#include "flow/UnitTest.h"
|
#include "flow/UnitTest.h"
|
||||||
|
|
||||||
#define init(...) KNOB_FN(__VA_ARGS__, INIT_ATOMIC_KNOB, INIT_KNOB)(__VA_ARGS__)
|
#define init(...) KNOB_FN(__VA_ARGS__, INIT_ATOMIC_KNOB, INIT_KNOB)(__VA_ARGS__)
|
||||||
|
@ -82,6 +83,7 @@ void ClientKnobs::initialize(Randomize randomize) {
|
||||||
init( CHANGE_FEED_CACHE_SIZE, 100000 ); if( randomize && BUGGIFY ) CHANGE_FEED_CACHE_SIZE = 1;
|
init( CHANGE_FEED_CACHE_SIZE, 100000 ); if( randomize && BUGGIFY ) CHANGE_FEED_CACHE_SIZE = 1;
|
||||||
init( CHANGE_FEED_POP_TIMEOUT, 5.0 );
|
init( CHANGE_FEED_POP_TIMEOUT, 5.0 );
|
||||||
init( CHANGE_FEED_STREAM_MIN_BYTES, 1e4 ); if( randomize && BUGGIFY ) CHANGE_FEED_STREAM_MIN_BYTES = 1;
|
init( CHANGE_FEED_STREAM_MIN_BYTES, 1e4 ); if( randomize && BUGGIFY ) CHANGE_FEED_STREAM_MIN_BYTES = 1;
|
||||||
|
init( TENANT_PREFIX_SIZE_LIMIT, 28 ); ASSERT(TENANT_PREFIX_SIZE_LIMIT >= TenantMapEntry::ROOT_PREFIX_SIZE); // includes 8-byte ID and optional tenant subspace
|
||||||
|
|
||||||
init( MAX_BATCH_SIZE, 1000 ); if( randomize && BUGGIFY ) MAX_BATCH_SIZE = 1;
|
init( MAX_BATCH_SIZE, 1000 ); if( randomize && BUGGIFY ) MAX_BATCH_SIZE = 1;
|
||||||
init( GRV_BATCH_TIMEOUT, 0.005 ); if( randomize && BUGGIFY ) GRV_BATCH_TIMEOUT = 0.1;
|
init( GRV_BATCH_TIMEOUT, 0.005 ); if( randomize && BUGGIFY ) GRV_BATCH_TIMEOUT = 0.1;
|
||||||
|
|
|
@ -81,6 +81,7 @@ public:
|
||||||
int64_t CHANGE_FEED_CACHE_SIZE;
|
int64_t CHANGE_FEED_CACHE_SIZE;
|
||||||
double CHANGE_FEED_POP_TIMEOUT;
|
double CHANGE_FEED_POP_TIMEOUT;
|
||||||
int64_t CHANGE_FEED_STREAM_MIN_BYTES;
|
int64_t CHANGE_FEED_STREAM_MIN_BYTES;
|
||||||
|
int64_t TENANT_PREFIX_SIZE_LIMIT;
|
||||||
|
|
||||||
int MAX_BATCH_SIZE;
|
int MAX_BATCH_SIZE;
|
||||||
double GRV_BATCH_TIMEOUT;
|
double GRV_BATCH_TIMEOUT;
|
||||||
|
|
|
@ -61,61 +61,31 @@ struct ClientLeaderRegInterface {
|
||||||
// - There is no address present more than once
|
// - There is no address present more than once
|
||||||
class ClusterConnectionString {
|
class ClusterConnectionString {
|
||||||
public:
|
public:
|
||||||
enum ConnectionStringStatus { RESOLVED, RESOLVING, UNRESOLVED };
|
|
||||||
|
|
||||||
ClusterConnectionString() {}
|
ClusterConnectionString() {}
|
||||||
ClusterConnectionString(const std::string& connStr);
|
ClusterConnectionString(const std::string& connectionString);
|
||||||
ClusterConnectionString(const std::vector<NetworkAddress>& coordinators, Key key);
|
ClusterConnectionString(const std::vector<NetworkAddress>& coordinators, Key key);
|
||||||
ClusterConnectionString(const std::vector<Hostname>& hosts, Key key);
|
ClusterConnectionString(const std::vector<Hostname>& hosts, Key key);
|
||||||
|
|
||||||
ClusterConnectionString(const ClusterConnectionString& rhs) { operator=(rhs); }
|
|
||||||
ClusterConnectionString& operator=(const ClusterConnectionString& rhs) {
|
|
||||||
// Copy everything except AsyncTrigger resolveFinish.
|
|
||||||
status = rhs.status;
|
|
||||||
coords = rhs.coords;
|
|
||||||
hostnames = rhs.hostnames;
|
|
||||||
networkAddressToHostname = rhs.networkAddressToHostname;
|
|
||||||
key = rhs.key;
|
|
||||||
keyDesc = rhs.keyDesc;
|
|
||||||
connectionString = rhs.connectionString;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<NetworkAddress> const& coordinators() const { return coords; }
|
std::vector<NetworkAddress> const& coordinators() const { return coords; }
|
||||||
void addResolved(const Hostname& hostname, const NetworkAddress& address) {
|
|
||||||
coords.push_back(address);
|
|
||||||
networkAddressToHostname.emplace(address, hostname);
|
|
||||||
}
|
|
||||||
Key clusterKey() const { return key; }
|
Key clusterKey() const { return key; }
|
||||||
Key clusterKeyName() const {
|
Key clusterKeyName() const {
|
||||||
return keyDesc;
|
return keyDesc;
|
||||||
} // Returns the "name" or "description" part of the clusterKey (the part before the ':')
|
} // Returns the "name" or "description" part of the clusterKey (the part before the ':')
|
||||||
std::string toString() const;
|
std::string toString() const;
|
||||||
static std::string getErrorString(std::string const& source, Error const& e);
|
static std::string getErrorString(std::string const& source, Error const& e);
|
||||||
Future<Void> resolveHostnames();
|
|
||||||
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
|
|
||||||
// should be preferred.
|
|
||||||
void resolveHostnamesBlocking();
|
|
||||||
// This function derives the member connectionString from the current key, coordinators and hostnames.
|
|
||||||
void resetConnectionString();
|
|
||||||
|
|
||||||
void resetToUnresolved();
|
|
||||||
void parseKey(const std::string& key);
|
void parseKey(const std::string& key);
|
||||||
|
|
||||||
ConnectionStringStatus status = RESOLVED;
|
|
||||||
AsyncTrigger resolveFinish;
|
|
||||||
// This function tries to resolve all hostnames once, and return them with coords.
|
// This function tries to resolve all hostnames once, and return them with coords.
|
||||||
// Best effort, does not guarantee that the resolves succeed.
|
// Best effort, does not guarantee that the resolves succeed.
|
||||||
Future<std::vector<NetworkAddress>> tryResolveHostnames();
|
Future<std::vector<NetworkAddress>> tryResolveHostnames();
|
||||||
|
|
||||||
std::vector<NetworkAddress> coords;
|
std::vector<NetworkAddress> coords;
|
||||||
std::vector<Hostname> hostnames;
|
std::vector<Hostname> hostnames;
|
||||||
std::unordered_map<NetworkAddress, Hostname> networkAddressToHostname;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void parseConnString();
|
void parseConnString();
|
||||||
Key key, keyDesc;
|
Key key, keyDesc;
|
||||||
std::string connectionString;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
FDB_DECLARE_BOOLEAN_PARAM(ConnectionStringNeedsPersisted);
|
FDB_DECLARE_BOOLEAN_PARAM(ConnectionStringNeedsPersisted);
|
||||||
|
@ -165,12 +135,6 @@ public:
|
||||||
// Signals to the connection record that it was successfully used to connect to a cluster.
|
// Signals to the connection record that it was successfully used to connect to a cluster.
|
||||||
void notifyConnected();
|
void notifyConnected();
|
||||||
|
|
||||||
ClusterConnectionString::ConnectionStringStatus connectionStringStatus() const;
|
|
||||||
Future<Void> resolveHostnames();
|
|
||||||
// This one should only be used when resolving asynchronously is impossible. For all other cases, resolveHostnames()
|
|
||||||
// should be preferred.
|
|
||||||
void resolveHostnamesBlocking();
|
|
||||||
|
|
||||||
virtual void addref() = 0;
|
virtual void addref() = 0;
|
||||||
virtual void delref() = 0;
|
virtual void delref() = 0;
|
||||||
|
|
||||||
|
@ -275,12 +239,21 @@ struct OpenDatabaseCoordRequest {
|
||||||
Standalone<VectorRef<ClientVersionRef>> supportedVersions;
|
Standalone<VectorRef<ClientVersionRef>> supportedVersions;
|
||||||
UID knownClientInfoID;
|
UID knownClientInfoID;
|
||||||
Key clusterKey;
|
Key clusterKey;
|
||||||
|
std::vector<Hostname> hostnames;
|
||||||
std::vector<NetworkAddress> coordinators;
|
std::vector<NetworkAddress> coordinators;
|
||||||
ReplyPromise<CachedSerialization<struct ClientDBInfo>> reply;
|
ReplyPromise<CachedSerialization<struct ClientDBInfo>> reply;
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, issues, supportedVersions, traceLogGroup, knownClientInfoID, clusterKey, coordinators, reply);
|
serializer(ar,
|
||||||
|
issues,
|
||||||
|
supportedVersions,
|
||||||
|
traceLogGroup,
|
||||||
|
knownClientInfoID,
|
||||||
|
clusterKey,
|
||||||
|
hostnames,
|
||||||
|
coordinators,
|
||||||
|
reply);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
#include "fdbclient/FDBTypes.h"
|
#include "fdbclient/FDBTypes.h"
|
||||||
#include "fdbclient/Knobs.h"
|
#include "fdbclient/Knobs.h"
|
||||||
|
#include "fdbclient/NativeAPI.actor.h"
|
||||||
|
|
||||||
KeyRef keyBetween(const KeyRangeRef& keys) {
|
KeyRef keyBetween(const KeyRangeRef& keys) {
|
||||||
int pos = 0; // will be the position of the first difference between keys.begin and keys.end
|
int pos = 0; // will be the position of the first difference between keys.begin and keys.end
|
||||||
|
@ -40,16 +41,14 @@ KeyRef keyBetween(const KeyRangeRef& keys) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void KeySelectorRef::setKey(KeyRef const& key) {
|
void KeySelectorRef::setKey(KeyRef const& key) {
|
||||||
// There are no keys in the database with size greater than KEY_SIZE_LIMIT, so if this key selector has a key
|
// There are no keys in the database with size greater than the max key size, so if this key selector has a key
|
||||||
// which is large, then we can translate it to an equivalent key selector with a smaller key
|
// which is large, then we can translate it to an equivalent key selector with a smaller key
|
||||||
if (key.size() >
|
int64_t maxKeySize = getMaxKeySize(key);
|
||||||
(key.startsWith(LiteralStringRef("\xff")) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
if (key.size() > maxKeySize) {
|
||||||
this->key = key.substr(0,
|
this->key = key.substr(0, maxKeySize + 1);
|
||||||
(key.startsWith(LiteralStringRef("\xff")) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
} else {
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1);
|
|
||||||
else
|
|
||||||
this->key = key;
|
this->key = key;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void KeySelectorRef::setKeyUnlimited(KeyRef const& key) {
|
void KeySelectorRef::setKeyUnlimited(KeyRef const& key) {
|
||||||
|
|
|
@ -746,6 +746,17 @@ Future<Optional<TenantMapEntry>> createTenantTransaction(Transaction tr, TenantN
|
||||||
state Optional<Value> lastIdVal = wait(safeThreadFutureToFuture(lastIdFuture));
|
state Optional<Value> lastIdVal = wait(safeThreadFutureToFuture(lastIdFuture));
|
||||||
Optional<Value> tenantDataPrefix = wait(safeThreadFutureToFuture(tenantDataPrefixFuture));
|
Optional<Value> tenantDataPrefix = wait(safeThreadFutureToFuture(tenantDataPrefixFuture));
|
||||||
|
|
||||||
|
if (tenantDataPrefix.present() &&
|
||||||
|
tenantDataPrefix.get().size() + TenantMapEntry::ROOT_PREFIX_SIZE > CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT) {
|
||||||
|
TraceEvent(SevWarnAlways, "TenantPrefixTooLarge")
|
||||||
|
.detail("TenantSubspace", tenantDataPrefix.get())
|
||||||
|
.detail("TenantSubspaceLength", tenantDataPrefix.get().size())
|
||||||
|
.detail("RootPrefixLength", TenantMapEntry::ROOT_PREFIX_SIZE)
|
||||||
|
.detail("MaxTenantPrefixSize", CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT);
|
||||||
|
|
||||||
|
throw client_invalid_operation();
|
||||||
|
}
|
||||||
|
|
||||||
state TenantMapEntry newTenant(lastIdVal.present() ? TenantMapEntry::prefixToId(lastIdVal.get()) + 1 : 0,
|
state TenantMapEntry newTenant(lastIdVal.present() ? TenantMapEntry::prefixToId(lastIdVal.get()) + 1 : 0,
|
||||||
tenantDataPrefix.present() ? (KeyRef)tenantDataPrefix.get() : ""_sr);
|
tenantDataPrefix.present() ? (KeyRef)tenantDataPrefix.get() : ""_sr);
|
||||||
|
|
||||||
|
|
|
@ -782,7 +782,7 @@ ACTOR Future<std::vector<ProcessData>> getWorkers(Database cx) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
|
ACTOR Future<Optional<ClusterConnectionString>> getConnectionString(Database cx) {
|
||||||
state Transaction tr(cx);
|
state Transaction tr(cx);
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
|
@ -790,9 +790,8 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
|
||||||
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||||
Optional<Value> currentKey = wait(tr.get(coordinatorsKey));
|
Optional<Value> currentKey = wait(tr.get(coordinatorsKey));
|
||||||
if (!currentKey.present())
|
if (!currentKey.present())
|
||||||
return std::vector<NetworkAddress>();
|
return Optional<ClusterConnectionString>();
|
||||||
|
return ClusterConnectionString(currentKey.get().toString());
|
||||||
return ClusterConnectionString(currentKey.get().toString()).coordinators();
|
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
wait(tr.onError(e));
|
wait(tr.onError(e));
|
||||||
}
|
}
|
||||||
|
@ -801,7 +800,7 @@ ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx) {
|
||||||
|
|
||||||
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||||
Reference<IQuorumChange> change,
|
Reference<IQuorumChange> change,
|
||||||
ClusterConnectionString* conn) {
|
std::vector<NetworkAddress> desiredCoordinators) {
|
||||||
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
tr->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
|
tr->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
|
||||||
|
@ -812,47 +811,45 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||||
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone deleted this key entirely?
|
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone deleted this key entirely?
|
||||||
|
|
||||||
state ClusterConnectionString old(currentKey.get().toString());
|
state ClusterConnectionString old(currentKey.get().toString());
|
||||||
wait(old.resolveHostnames());
|
|
||||||
if (tr->getDatabase()->getConnectionRecord() &&
|
if (tr->getDatabase()->getConnectionRecord() &&
|
||||||
old.clusterKeyName().toString() !=
|
old.clusterKeyName().toString() !=
|
||||||
tr->getDatabase()->getConnectionRecord()->getConnectionString().clusterKeyName())
|
tr->getDatabase()->getConnectionRecord()->getConnectionString().clusterKeyName())
|
||||||
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
|
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
|
||||||
|
|
||||||
|
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
|
||||||
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
|
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
|
||||||
if (!conn->coords.size()) {
|
if (!desiredCoordinators.size()) {
|
||||||
std::vector<NetworkAddress> desiredCoordinatorAddresses = wait(change->getDesiredCoordinators(
|
std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
|
||||||
tr,
|
tr,
|
||||||
old.coordinators(),
|
oldCoordinators,
|
||||||
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
|
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
|
||||||
result));
|
result));
|
||||||
conn->coords = desiredCoordinatorAddresses;
|
desiredCoordinators = _desiredCoordinators;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result != CoordinatorsResult::SUCCESS)
|
if (result != CoordinatorsResult::SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
if (!conn->coordinators().size())
|
if (!desiredCoordinators.size())
|
||||||
return CoordinatorsResult::INVALID_NETWORK_ADDRESSES;
|
return CoordinatorsResult::INVALID_NETWORK_ADDRESSES;
|
||||||
|
|
||||||
std::sort(conn->coords.begin(), conn->coords.end());
|
std::sort(desiredCoordinators.begin(), desiredCoordinators.end());
|
||||||
std::sort(conn->hostnames.begin(), conn->hostnames.end());
|
|
||||||
|
|
||||||
std::string newName = change->getDesiredClusterKeyName();
|
std::string newName = change->getDesiredClusterKeyName();
|
||||||
if (newName.empty())
|
if (newName.empty())
|
||||||
newName = old.clusterKeyName().toString();
|
newName = old.clusterKeyName().toString();
|
||||||
|
|
||||||
if (old.coordinators() == conn->coordinators() && old.clusterKeyName() == newName)
|
if (oldCoordinators == desiredCoordinators && old.clusterKeyName() == newName)
|
||||||
return CoordinatorsResult::SAME_NETWORK_ADDRESSES;
|
return CoordinatorsResult::SAME_NETWORK_ADDRESSES;
|
||||||
|
|
||||||
std::string key(newName + ':' + deterministicRandom()->randomAlphaNumeric(32));
|
state ClusterConnectionString conn(desiredCoordinators,
|
||||||
conn->parseKey(key);
|
StringRef(newName + ':' + deterministicRandom()->randomAlphaNumeric(32)));
|
||||||
conn->resetConnectionString();
|
|
||||||
|
|
||||||
if (g_network->isSimulated()) {
|
if (g_network->isSimulated()) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
int protectedCount = 0;
|
int protectedCount = 0;
|
||||||
while ((protectedCount < ((conn->coordinators().size() / 2) + 1)) && (i < conn->coordinators().size())) {
|
while ((protectedCount < ((desiredCoordinators.size() / 2) + 1)) && (i < desiredCoordinators.size())) {
|
||||||
auto process = g_simulator.getProcessByAddress(conn->coordinators()[i]);
|
auto process = g_simulator.getProcessByAddress(desiredCoordinators[i]);
|
||||||
auto addresses = process->addresses;
|
auto addresses = process->addresses;
|
||||||
|
|
||||||
if (!process->isReliable()) {
|
if (!process->isReliable()) {
|
||||||
|
@ -864,14 +861,14 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||||
if (addresses.secondaryAddress.present()) {
|
if (addresses.secondaryAddress.present()) {
|
||||||
g_simulator.protectedAddresses.insert(process->addresses.secondaryAddress.get());
|
g_simulator.protectedAddresses.insert(process->addresses.secondaryAddress.get());
|
||||||
}
|
}
|
||||||
TraceEvent("ProtectCoordinator").detail("Address", conn->coordinators()[i]).backtrace();
|
TraceEvent("ProtectCoordinator").detail("Address", desiredCoordinators[i]).backtrace();
|
||||||
protectedCount++;
|
protectedCount++;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||||
ClientCoordinators coord(Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(*conn)));
|
ClientCoordinators coord(Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(conn)));
|
||||||
|
|
||||||
leaderServers.reserve(coord.clientLeaderServers.size());
|
leaderServers.reserve(coord.clientLeaderServers.size());
|
||||||
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
|
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
|
||||||
|
@ -883,7 +880,7 @@ ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||||
when(wait(waitForAll(leaderServers))) {}
|
when(wait(waitForAll(leaderServers))) {}
|
||||||
when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; }
|
when(wait(delay(5.0))) { return CoordinatorsResult::COORDINATOR_UNREACHABLE; }
|
||||||
}
|
}
|
||||||
tr->set(coordinatorsKey, conn->toString());
|
tr->set(coordinatorsKey, conn.toString());
|
||||||
return Optional<CoordinatorsResult>();
|
return Optional<CoordinatorsResult>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -909,11 +906,12 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
|
||||||
old.clusterKeyName().toString() != cx->getConnectionRecord()->getConnectionString().clusterKeyName())
|
old.clusterKeyName().toString() != cx->getConnectionRecord()->getConnectionString().clusterKeyName())
|
||||||
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
|
return CoordinatorsResult::BAD_DATABASE_STATE; // Someone changed the "name" of the database??
|
||||||
|
|
||||||
|
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
|
||||||
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
|
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
|
||||||
if (!desiredCoordinators.size()) {
|
if (!desiredCoordinators.size()) {
|
||||||
std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
|
std::vector<NetworkAddress> _desiredCoordinators = wait(change->getDesiredCoordinators(
|
||||||
&tr,
|
&tr,
|
||||||
old.coordinators(),
|
oldCoordinators,
|
||||||
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
|
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
|
||||||
result));
|
result));
|
||||||
desiredCoordinators = _desiredCoordinators;
|
desiredCoordinators = _desiredCoordinators;
|
||||||
|
@ -937,7 +935,7 @@ ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChan
|
||||||
if (newName.empty())
|
if (newName.empty())
|
||||||
newName = old.clusterKeyName().toString();
|
newName = old.clusterKeyName().toString();
|
||||||
|
|
||||||
if (old.coordinators() == desiredCoordinators && old.clusterKeyName() == newName)
|
if (oldCoordinators == desiredCoordinators && old.clusterKeyName() == newName)
|
||||||
return retries ? CoordinatorsResult::SUCCESS : CoordinatorsResult::SAME_NETWORK_ADDRESSES;
|
return retries ? CoordinatorsResult::SUCCESS : CoordinatorsResult::SAME_NETWORK_ADDRESSES;
|
||||||
|
|
||||||
state ClusterConnectionString conn(
|
state ClusterConnectionString conn(
|
||||||
|
@ -1075,9 +1073,16 @@ struct AutoQuorumChange final : IQuorumChange {
|
||||||
std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||||
leaderServers.reserve(coord.clientLeaderServers.size());
|
leaderServers.reserve(coord.clientLeaderServers.size());
|
||||||
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
|
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
|
||||||
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
|
if (coord.clientLeaderServers[i].hostname.present()) {
|
||||||
GetLeaderRequest(coord.clusterKey, UID()),
|
leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coord.clusterKey, UID()),
|
||||||
TaskPriority::CoordinationReply));
|
coord.clientLeaderServers[i].hostname.get(),
|
||||||
|
WLTOKEN_CLIENTLEADERREG_GETLEADER,
|
||||||
|
TaskPriority::CoordinationReply));
|
||||||
|
} else {
|
||||||
|
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
|
||||||
|
GetLeaderRequest(coord.clusterKey, UID()),
|
||||||
|
TaskPriority::CoordinationReply));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Optional<std::vector<Optional<LeaderInfo>>> results =
|
Optional<std::vector<Optional<LeaderInfo>>> results =
|
||||||
wait(timeout(getAll(leaderServers), CLIENT_KNOBS->IS_ACCEPTABLE_DELAY));
|
wait(timeout(getAll(leaderServers), CLIENT_KNOBS->IS_ACCEPTABLE_DELAY));
|
||||||
|
|
|
@ -56,7 +56,7 @@ struct IQuorumChange : ReferenceCounted<IQuorumChange> {
|
||||||
// Change to use the given set of coordination servers
|
// Change to use the given set of coordination servers
|
||||||
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
ACTOR Future<Optional<CoordinatorsResult>> changeQuorumChecker(Transaction* tr,
|
||||||
Reference<IQuorumChange> change,
|
Reference<IQuorumChange> change,
|
||||||
ClusterConnectionString* conn);
|
std::vector<NetworkAddress> desiredCoordinators);
|
||||||
ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change);
|
ACTOR Future<CoordinatorsResult> changeQuorum(Database cx, Reference<IQuorumChange> change);
|
||||||
Reference<IQuorumChange> autoQuorumChange(int desired = -1);
|
Reference<IQuorumChange> autoQuorumChange(int desired = -1);
|
||||||
Reference<IQuorumChange> noQuorumChange();
|
Reference<IQuorumChange> noQuorumChange();
|
||||||
|
@ -146,7 +146,7 @@ ACTOR Future<bool> setHealthyZone(Database cx, StringRef zoneId, double seconds,
|
||||||
ACTOR Future<Void> waitForPrimaryDC(Database cx, StringRef dcId);
|
ACTOR Future<Void> waitForPrimaryDC(Database cx, StringRef dcId);
|
||||||
|
|
||||||
// Gets the cluster connection string
|
// Gets the cluster connection string
|
||||||
ACTOR Future<std::vector<NetworkAddress>> getCoordinators(Database cx);
|
ACTOR Future<Optional<ClusterConnectionString>> getConnectionString(Database cx);
|
||||||
|
|
||||||
void schemaCoverage(std::string const& spath, bool covered = true);
|
void schemaCoverage(std::string const& spath, bool covered = true);
|
||||||
bool schemaMatch(json_spirit::mValue const& schema,
|
bool schemaMatch(json_spirit::mValue const& schema,
|
||||||
|
|
|
@ -77,18 +77,6 @@ void IClusterConnectionRecord::setPersisted() {
|
||||||
connectionStringNeedsPersisted = false;
|
connectionStringNeedsPersisted = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
ClusterConnectionString::ConnectionStringStatus IClusterConnectionRecord::connectionStringStatus() const {
|
|
||||||
return cs.status;
|
|
||||||
}
|
|
||||||
|
|
||||||
Future<Void> IClusterConnectionRecord::resolveHostnames() {
|
|
||||||
return cs.resolveHostnames();
|
|
||||||
}
|
|
||||||
|
|
||||||
void IClusterConnectionRecord::resolveHostnamesBlocking() {
|
|
||||||
cs.resolveHostnamesBlocking();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ClusterConnectionString::getErrorString(std::string const& source, Error const& e) {
|
std::string ClusterConnectionString::getErrorString(std::string const& source, Error const& e) {
|
||||||
if (e.code() == error_code_connection_string_invalid) {
|
if (e.code() == error_code_connection_string_invalid) {
|
||||||
return format("Invalid connection string `%s: %d %s", source.c_str(), e.code(), e.what());
|
return format("Invalid connection string `%s: %d %s", source.c_str(), e.code(), e.what());
|
||||||
|
@ -97,101 +85,19 @@ std::string ClusterConnectionString::getErrorString(std::string const& source, E
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> resolveHostnamesImpl(ClusterConnectionString* self) {
|
ClusterConnectionString::ClusterConnectionString(const std::string& connectionString) {
|
||||||
loop {
|
auto trimmed = trim(connectionString);
|
||||||
if (self->status == ClusterConnectionString::UNRESOLVED) {
|
|
||||||
self->status = ClusterConnectionString::RESOLVING;
|
|
||||||
std::vector<Future<Void>> fs;
|
|
||||||
for (auto const& hostname : self->hostnames) {
|
|
||||||
fs.push_back(map(INetworkConnections::net()->resolveTCPEndpoint(hostname.host, hostname.service),
|
|
||||||
[=](std::vector<NetworkAddress> const& addresses) -> Void {
|
|
||||||
NetworkAddress address =
|
|
||||||
addresses[deterministicRandom()->randomInt(0, addresses.size())];
|
|
||||||
address.flags = 0; // Reset the parsed address to public
|
|
||||||
address.fromHostname = NetworkAddressFromHostname::True;
|
|
||||||
if (hostname.isTLS) {
|
|
||||||
address.flags |= NetworkAddress::FLAG_TLS;
|
|
||||||
}
|
|
||||||
self->addResolved(hostname, address);
|
|
||||||
return Void();
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
wait(waitForAll(fs));
|
|
||||||
std::sort(self->coords.begin(), self->coords.end());
|
|
||||||
if (std::unique(self->coords.begin(), self->coords.end()) != self->coords.end()) {
|
|
||||||
self->status = ClusterConnectionString::UNRESOLVED;
|
|
||||||
self->resolveFinish.trigger();
|
|
||||||
throw connection_string_invalid();
|
|
||||||
}
|
|
||||||
self->status = ClusterConnectionString::RESOLVED;
|
|
||||||
self->resolveFinish.trigger();
|
|
||||||
break;
|
|
||||||
} else if (self->status == ClusterConnectionString::RESOLVING) {
|
|
||||||
wait(self->resolveFinish.onTrigger());
|
|
||||||
if (self->status == ClusterConnectionString::RESOLVED) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Otherwise, this means other threads failed on resolve, so here we go back to the loop and try to resolve
|
|
||||||
// again.
|
|
||||||
} else {
|
|
||||||
// status is RESOLVED, nothing to do.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Void();
|
|
||||||
}
|
|
||||||
|
|
||||||
Future<Void> ClusterConnectionString::resolveHostnames() {
|
|
||||||
return resolveHostnamesImpl(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ClusterConnectionString::resolveHostnamesBlocking() {
|
|
||||||
if (status != RESOLVED) {
|
|
||||||
status = RESOLVING;
|
|
||||||
for (auto const& hostname : hostnames) {
|
|
||||||
std::vector<NetworkAddress> addresses =
|
|
||||||
INetworkConnections::net()->resolveTCPEndpointBlocking(hostname.host, hostname.service);
|
|
||||||
NetworkAddress address = addresses[deterministicRandom()->randomInt(0, addresses.size())];
|
|
||||||
address.flags = 0; // Reset the parsed address to public
|
|
||||||
address.fromHostname = NetworkAddressFromHostname::True;
|
|
||||||
if (hostname.isTLS) {
|
|
||||||
address.flags |= NetworkAddress::FLAG_TLS;
|
|
||||||
}
|
|
||||||
addResolved(hostname, address);
|
|
||||||
}
|
|
||||||
std::sort(coords.begin(), coords.end());
|
|
||||||
if (std::unique(coords.begin(), coords.end()) != coords.end()) {
|
|
||||||
status = UNRESOLVED;
|
|
||||||
throw connection_string_invalid();
|
|
||||||
}
|
|
||||||
status = RESOLVED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ClusterConnectionString::resetToUnresolved() {
|
|
||||||
if (status == RESOLVED && hostnames.size() > 0) {
|
|
||||||
coords.clear();
|
|
||||||
hostnames.clear();
|
|
||||||
networkAddressToHostname.clear();
|
|
||||||
status = UNRESOLVED;
|
|
||||||
parseConnString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ClusterConnectionString::resetConnectionString() {
|
|
||||||
connectionString = toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
void ClusterConnectionString::parseConnString() {
|
|
||||||
// Split on '@' into key@addrs
|
// Split on '@' into key@addrs
|
||||||
int pAt = connectionString.find_first_of('@');
|
int pAt = trimmed.find_first_of('@');
|
||||||
if (pAt == connectionString.npos) {
|
if (pAt == trimmed.npos) {
|
||||||
throw connection_string_invalid();
|
throw connection_string_invalid();
|
||||||
}
|
}
|
||||||
std::string key = connectionString.substr(0, pAt);
|
std::string key = trimmed.substr(0, pAt);
|
||||||
std::string addrs = connectionString.substr(pAt + 1);
|
std::string addrs = trimmed.substr(pAt + 1);
|
||||||
|
|
||||||
parseKey(key);
|
parseKey(key);
|
||||||
|
std::set<Hostname> hostnameSet;
|
||||||
|
std::set<NetworkAddress> addressSet;
|
||||||
std::string curAddr;
|
std::string curAddr;
|
||||||
for (int p = 0; p <= addrs.size();) {
|
for (int p = 0; p <= addrs.size();) {
|
||||||
int pComma = addrs.find_first_of(',', p);
|
int pComma = addrs.find_first_of(',', p);
|
||||||
|
@ -199,31 +105,29 @@ void ClusterConnectionString::parseConnString() {
|
||||||
pComma = addrs.size();
|
pComma = addrs.size();
|
||||||
curAddr = addrs.substr(p, pComma - p);
|
curAddr = addrs.substr(p, pComma - p);
|
||||||
if (Hostname::isHostname(curAddr)) {
|
if (Hostname::isHostname(curAddr)) {
|
||||||
|
Hostname h = Hostname::parse(curAddr);
|
||||||
|
// Check that there are no duplicate hostnames
|
||||||
|
if (hostnameSet.find(h) != hostnameSet.end()) {
|
||||||
|
throw connection_string_invalid();
|
||||||
|
}
|
||||||
hostnames.push_back(Hostname::parse(curAddr));
|
hostnames.push_back(Hostname::parse(curAddr));
|
||||||
|
hostnameSet.insert(h);
|
||||||
} else {
|
} else {
|
||||||
coords.push_back(NetworkAddress::parse(curAddr));
|
NetworkAddress n = NetworkAddress::parse(curAddr);
|
||||||
|
// Check that there are no duplicate addresses
|
||||||
|
if (addressSet.find(n) != addressSet.end()) {
|
||||||
|
throw connection_string_invalid();
|
||||||
|
}
|
||||||
|
coords.push_back(n);
|
||||||
|
addressSet.insert(n);
|
||||||
}
|
}
|
||||||
p = pComma + 1;
|
p = pComma + 1;
|
||||||
}
|
}
|
||||||
if (hostnames.size() > 0) {
|
|
||||||
status = UNRESOLVED;
|
|
||||||
}
|
|
||||||
ASSERT((coords.size() + hostnames.size()) > 0);
|
ASSERT((coords.size() + hostnames.size()) > 0);
|
||||||
|
|
||||||
std::sort(coords.begin(), coords.end());
|
|
||||||
// Check that there are no duplicate addresses
|
|
||||||
if (std::unique(coords.begin(), coords.end()) != coords.end()) {
|
|
||||||
throw connection_string_invalid();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ClusterConnectionString::ClusterConnectionString(const std::string& connStr) {
|
|
||||||
connectionString = trim(connStr);
|
|
||||||
parseConnString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
|
TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
|
||||||
std::string input;
|
state std::string input;
|
||||||
|
|
||||||
{
|
{
|
||||||
input = "asdf:2345@1.1.1.1:345";
|
input = "asdf:2345@1.1.1.1:345";
|
||||||
|
@ -231,6 +135,15 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
|
||||||
ASSERT(input == cs.toString());
|
ASSERT(input == cs.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
input = "asdf:2345@1.1.1.1:345,1.1.1.1:345";
|
||||||
|
try {
|
||||||
|
ClusterConnectionString cs(input);
|
||||||
|
} catch (Error& e) {
|
||||||
|
ASSERT(e.code() == error_code_connection_string_invalid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
input = "0xxdeadbeef:100100100@1.1.1.1:34534,5.1.5.3:23443";
|
input = "0xxdeadbeef:100100100@1.1.1.1:34534,5.1.5.3:23443";
|
||||||
ClusterConnectionString cs(input);
|
ClusterConnectionString cs(input);
|
||||||
|
@ -274,20 +187,27 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/addresses") {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
||||||
std::string input;
|
state std::string input;
|
||||||
|
|
||||||
{
|
{
|
||||||
input = "asdf:2345@localhost:1234";
|
input = "asdf:2345@localhost:1234";
|
||||||
ClusterConnectionString cs(input);
|
ClusterConnectionString cs(input);
|
||||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
|
||||||
ASSERT(cs.hostnames.size() == 1);
|
ASSERT(cs.hostnames.size() == 1);
|
||||||
ASSERT(input == cs.toString());
|
ASSERT(input == cs.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
input = "asdf:2345@localhost:1234,localhost:1234";
|
||||||
|
try {
|
||||||
|
ClusterConnectionString cs(input);
|
||||||
|
} catch (Error& e) {
|
||||||
|
ASSERT(e.code() == error_code_connection_string_invalid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
input = "0xxdeadbeef:100100100@localhost:34534,host-name:23443";
|
input = "0xxdeadbeef:100100100@localhost:34534,host-name:23443";
|
||||||
ClusterConnectionString cs(input);
|
ClusterConnectionString cs(input);
|
||||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
ASSERT(cs.hostnames.size() == 2);
|
||||||
ASSERT(input == cs.toString());
|
ASSERT(input == cs.toString());
|
||||||
}
|
}
|
||||||
|
@ -300,7 +220,6 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
||||||
commented += "# asdfasdf ##";
|
commented += "# asdfasdf ##";
|
||||||
|
|
||||||
ClusterConnectionString cs(commented);
|
ClusterConnectionString cs(commented);
|
||||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
ASSERT(cs.hostnames.size() == 2);
|
||||||
ASSERT(input == cs.toString());
|
ASSERT(input == cs.toString());
|
||||||
}
|
}
|
||||||
|
@ -313,7 +232,6 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
||||||
commented += "# asdfasdf ##";
|
commented += "# asdfasdf ##";
|
||||||
|
|
||||||
ClusterConnectionString cs(commented);
|
ClusterConnectionString cs(commented);
|
||||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
ASSERT(cs.hostnames.size() == 2);
|
||||||
ASSERT(input == cs.toString());
|
ASSERT(input == cs.toString());
|
||||||
}
|
}
|
||||||
|
@ -321,44 +239,30 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/hostnames") {
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("/fdbclient/MonitorLeader/ConnectionString") {
|
TEST_CASE("/fdbclient/MonitorLeader/ConnectionString/hostname") {
|
||||||
state std::string connectionString = "TestCluster:0@localhost:1234,host-name:5678";
|
std::string connectionString = "TestCluster:0@localhost:1234,host-name:5678";
|
||||||
std::string hn1 = "localhost", port1 = "1234";
|
std::string hn1 = "localhost", port1 = "1234", hn2 = "host-name", port2 = "5678";
|
||||||
state std::string hn2 = "host-name";
|
std::vector<Hostname> hostnames;
|
||||||
state std::string port2 = "5678";
|
|
||||||
state std::vector<Hostname> hostnames;
|
|
||||||
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
|
|
||||||
hostnames.push_back(Hostname::parse(hn2 + ":" + port2));
|
|
||||||
|
|
||||||
NetworkAddress address1 = NetworkAddress::parse("127.0.0.0:1234");
|
{
|
||||||
NetworkAddress address2 = NetworkAddress::parse("127.0.0.1:5678");
|
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
|
||||||
|
hostnames.push_back(Hostname::parse(hn2 + ":" + port2));
|
||||||
|
|
||||||
INetworkConnections::net()->addMockTCPEndpoint(hn1, port1, { address1 });
|
ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
|
||||||
INetworkConnections::net()->addMockTCPEndpoint(hn2, port2, { address2 });
|
ASSERT(cs.hostnames.size() == 2);
|
||||||
|
ASSERT(cs.coordinators().size() == 0);
|
||||||
|
ASSERT(cs.toString() == connectionString);
|
||||||
|
}
|
||||||
|
|
||||||
state ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
|
{
|
||||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
hostnames.clear();
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
|
||||||
ASSERT(cs.coordinators().size() == 0);
|
hostnames.push_back(Hostname::parse(hn1 + ":" + port1));
|
||||||
wait(cs.resolveHostnames());
|
try {
|
||||||
ASSERT(cs.status == ClusterConnectionString::RESOLVED);
|
ClusterConnectionString cs(hostnames, LiteralStringRef("TestCluster:0"));
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
} catch (Error& e) {
|
||||||
ASSERT(cs.coordinators().size() == 2);
|
ASSERT(e.code() == error_code_connection_string_invalid);
|
||||||
ASSERT(cs.toString() == connectionString);
|
}
|
||||||
cs.resetToUnresolved();
|
|
||||||
ASSERT(cs.status == ClusterConnectionString::UNRESOLVED);
|
|
||||||
ASSERT(cs.hostnames.size() == 2);
|
|
||||||
ASSERT(cs.coordinators().size() == 0);
|
|
||||||
ASSERT(cs.toString() == connectionString);
|
|
||||||
|
|
||||||
INetworkConnections::net()->removeMockTCPEndpoint(hn2, port2);
|
|
||||||
NetworkAddress address3 = NetworkAddress::parse("127.0.0.0:5678");
|
|
||||||
INetworkConnections::net()->addMockTCPEndpoint(hn2, port2, { address3 });
|
|
||||||
|
|
||||||
try {
|
|
||||||
wait(cs.resolveHostnames());
|
|
||||||
} catch (Error& e) {
|
|
||||||
ASSERT(e.code() == error_code_connection_string_invalid);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -380,6 +284,7 @@ ACTOR Future<std::vector<NetworkAddress>> tryResolveHostnamesImpl(ClusterConnect
|
||||||
allCoordinatorsSet.insert(coord);
|
allCoordinatorsSet.insert(coord);
|
||||||
}
|
}
|
||||||
std::vector<NetworkAddress> allCoordinators(allCoordinatorsSet.begin(), allCoordinatorsSet.end());
|
std::vector<NetworkAddress> allCoordinators(allCoordinatorsSet.begin(), allCoordinatorsSet.end());
|
||||||
|
std::sort(allCoordinators.begin(), allCoordinators.end());
|
||||||
return allCoordinators;
|
return allCoordinators;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -484,17 +389,22 @@ TEST_CASE("/fdbclient/MonitorLeader/parseConnectionString/fuzz") {
|
||||||
}
|
}
|
||||||
|
|
||||||
ClusterConnectionString::ClusterConnectionString(const std::vector<NetworkAddress>& servers, Key key)
|
ClusterConnectionString::ClusterConnectionString(const std::vector<NetworkAddress>& servers, Key key)
|
||||||
: status(RESOLVED), coords(servers) {
|
: coords(servers) {
|
||||||
|
std::set<NetworkAddress> s(servers.begin(), servers.end());
|
||||||
|
if (s.size() != servers.size()) {
|
||||||
|
throw connection_string_invalid();
|
||||||
|
}
|
||||||
std::string keyString = key.toString();
|
std::string keyString = key.toString();
|
||||||
parseKey(keyString);
|
parseKey(keyString);
|
||||||
resetConnectionString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ClusterConnectionString::ClusterConnectionString(const std::vector<Hostname>& hosts, Key key)
|
ClusterConnectionString::ClusterConnectionString(const std::vector<Hostname>& hosts, Key key) : hostnames(hosts) {
|
||||||
: status(UNRESOLVED), hostnames(hosts) {
|
std::set<Hostname> h(hosts.begin(), hosts.end());
|
||||||
|
if (h.size() != hosts.size()) {
|
||||||
|
throw connection_string_invalid();
|
||||||
|
}
|
||||||
std::string keyString = key.toString();
|
std::string keyString = key.toString();
|
||||||
parseKey(keyString);
|
parseKey(keyString);
|
||||||
resetConnectionString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ClusterConnectionString::parseKey(const std::string& key) {
|
void ClusterConnectionString::parseKey(const std::string& key) {
|
||||||
|
@ -529,13 +439,11 @@ void ClusterConnectionString::parseKey(const std::string& key) {
|
||||||
std::string ClusterConnectionString::toString() const {
|
std::string ClusterConnectionString::toString() const {
|
||||||
std::string s = key.toString();
|
std::string s = key.toString();
|
||||||
s += '@';
|
s += '@';
|
||||||
for (int i = 0; i < coords.size(); i++) {
|
for (auto const& coord : coords) {
|
||||||
if (networkAddressToHostname.find(coords[i]) == networkAddressToHostname.end()) {
|
if (s.find('@') != s.length() - 1) {
|
||||||
if (s.find('@') != s.length() - 1) {
|
s += ',';
|
||||||
s += ',';
|
|
||||||
}
|
|
||||||
s += coords[i].toString();
|
|
||||||
}
|
}
|
||||||
|
s += coord.toString();
|
||||||
}
|
}
|
||||||
for (auto const& host : hostnames) {
|
for (auto const& host : hostnames) {
|
||||||
if (s.find('@') != s.length() - 1) {
|
if (s.find('@') != s.length() - 1) {
|
||||||
|
@ -547,11 +455,14 @@ std::string ClusterConnectionString::toString() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
ClientCoordinators::ClientCoordinators(Reference<IClusterConnectionRecord> ccr) : ccr(ccr) {
|
ClientCoordinators::ClientCoordinators(Reference<IClusterConnectionRecord> ccr) : ccr(ccr) {
|
||||||
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
|
|
||||||
ClusterConnectionString cs = ccr->getConnectionString();
|
ClusterConnectionString cs = ccr->getConnectionString();
|
||||||
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s)
|
|
||||||
clientLeaderServers.push_back(ClientLeaderRegInterface(*s));
|
|
||||||
clusterKey = cs.clusterKey();
|
clusterKey = cs.clusterKey();
|
||||||
|
for (auto h : cs.hostnames) {
|
||||||
|
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
|
||||||
|
}
|
||||||
|
for (auto s : cs.coordinators()) {
|
||||||
|
clientLeaderServers.push_back(ClientLeaderRegInterface(s));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ClientCoordinators::ClientCoordinators(Key clusterKey, std::vector<NetworkAddress> coordinators)
|
ClientCoordinators::ClientCoordinators(Key clusterKey, std::vector<NetworkAddress> coordinators)
|
||||||
|
@ -576,49 +487,32 @@ ClientLeaderRegInterface::ClientLeaderRegInterface(INetwork* local) {
|
||||||
|
|
||||||
// Nominee is the worker among all workers that are considered as leader by one coordinator
|
// Nominee is the worker among all workers that are considered as leader by one coordinator
|
||||||
// This function contacts a coordinator coord to ask who is its nominee.
|
// This function contacts a coordinator coord to ask who is its nominee.
|
||||||
// Note: for coordinators whose NetworkAddress is parsed out of a hostname, a connection failure will cause this actor
|
|
||||||
// to throw `coordinators_changed()` error
|
|
||||||
ACTOR Future<Void> monitorNominee(Key key,
|
ACTOR Future<Void> monitorNominee(Key key,
|
||||||
ClientLeaderRegInterface coord,
|
ClientLeaderRegInterface coord,
|
||||||
AsyncTrigger* nomineeChange,
|
AsyncTrigger* nomineeChange,
|
||||||
Optional<LeaderInfo>* info,
|
Optional<LeaderInfo>* info) {
|
||||||
Optional<Hostname> hostname = Optional<Hostname>()) {
|
|
||||||
loop {
|
loop {
|
||||||
state Optional<LeaderInfo> li;
|
state Optional<LeaderInfo> li;
|
||||||
|
if (coord.hostname.present()) {
|
||||||
if (coord.getLeader.getEndpoint().getPrimaryAddress().fromHostname) {
|
wait(store(li,
|
||||||
state ErrorOr<Optional<LeaderInfo>> rep =
|
retryGetReplyFromHostname(GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
|
||||||
wait(coord.getLeader.tryGetReply(GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
|
coord.hostname.get(),
|
||||||
TaskPriority::CoordinationReply));
|
WLTOKEN_CLIENTLEADERREG_GETLEADER,
|
||||||
if (rep.isError()) {
|
TaskPriority::CoordinationReply)));
|
||||||
// Connecting to nominee failed, most likely due to connection failed.
|
|
||||||
TraceEvent("MonitorNomineeError")
|
|
||||||
.error(rep.getError())
|
|
||||||
.detail("Hostname", hostname.present() ? hostname.get().toString() : "UnknownHostname")
|
|
||||||
.detail("OldAddr", coord.getLeader.getEndpoint().getPrimaryAddress().toString());
|
|
||||||
if (rep.getError().code() == error_code_request_maybe_delivered) {
|
|
||||||
// Delay to prevent tight resolving loop due to outdated DNS cache
|
|
||||||
wait(delay(FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL));
|
|
||||||
throw coordinators_changed();
|
|
||||||
} else {
|
|
||||||
throw rep.getError();
|
|
||||||
}
|
|
||||||
} else if (rep.present()) {
|
|
||||||
li = rep.get();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
Optional<LeaderInfo> tmp =
|
wait(store(li,
|
||||||
wait(retryBrokenPromise(coord.getLeader,
|
retryBrokenPromise(coord.getLeader,
|
||||||
GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
|
GetLeaderRequest(key, info->present() ? info->get().changeID : UID()),
|
||||||
TaskPriority::CoordinationReply));
|
TaskPriority::CoordinationReply)));
|
||||||
li = tmp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wait(Future<Void>(Void())); // Make sure we weren't cancelled
|
wait(Future<Void>(Void())); // Make sure we weren't cancelled
|
||||||
|
|
||||||
TraceEvent("GetLeaderReply")
|
TraceEvent("GetLeaderReply")
|
||||||
.suppressFor(1.0)
|
.suppressFor(1.0)
|
||||||
.detail("Coordinator", coord.getLeader.getEndpoint().getPrimaryAddress())
|
.detail("Coordinator",
|
||||||
|
coord.hostname.present() ? coord.hostname.get().toString()
|
||||||
|
: coord.getLeader.getEndpoint().getPrimaryAddress().toString())
|
||||||
.detail("Nominee", li.present() ? li.get().changeID : UID())
|
.detail("Nominee", li.present() ? li.get().changeID : UID())
|
||||||
.detail("ClusterKey", key.printable());
|
.detail("ClusterKey", key.printable());
|
||||||
|
|
||||||
|
@ -687,74 +581,54 @@ Optional<std::pair<LeaderInfo, bool>> getLeader(const std::vector<Optional<Leade
|
||||||
ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterConnectionRecord> connRecord,
|
ACTOR Future<MonitorLeaderInfo> monitorLeaderOneGeneration(Reference<IClusterConnectionRecord> connRecord,
|
||||||
Reference<AsyncVar<Value>> outSerializedLeaderInfo,
|
Reference<AsyncVar<Value>> outSerializedLeaderInfo,
|
||||||
MonitorLeaderInfo info) {
|
MonitorLeaderInfo info) {
|
||||||
|
state ClientCoordinators coordinators(info.intermediateConnRecord);
|
||||||
|
state AsyncTrigger nomineeChange;
|
||||||
|
state std::vector<Optional<LeaderInfo>> nominees;
|
||||||
|
state Future<Void> allActors;
|
||||||
|
|
||||||
|
nominees.resize(coordinators.clientLeaderServers.size());
|
||||||
|
|
||||||
|
state std::vector<Future<Void>> actors;
|
||||||
|
// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator.
|
||||||
|
actors.reserve(coordinators.clientLeaderServers.size());
|
||||||
|
for (int i = 0; i < coordinators.clientLeaderServers.size(); i++) {
|
||||||
|
actors.push_back(
|
||||||
|
monitorNominee(coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i]));
|
||||||
|
}
|
||||||
|
allActors = waitForAll(actors);
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
wait(connRecord->resolveHostnames());
|
Optional<std::pair<LeaderInfo, bool>> leader = getLeader(nominees);
|
||||||
wait(info.intermediateConnRecord->resolveHostnames());
|
TraceEvent("MonitorLeaderChange")
|
||||||
state ClientCoordinators coordinators(info.intermediateConnRecord);
|
.detail("NewLeader", leader.present() ? leader.get().first.changeID : UID(1, 1));
|
||||||
state AsyncTrigger nomineeChange;
|
if (leader.present()) {
|
||||||
state std::vector<Optional<LeaderInfo>> nominees;
|
if (leader.get().first.forward) {
|
||||||
state Future<Void> allActors;
|
TraceEvent("MonitorLeaderForwarding")
|
||||||
|
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
|
||||||
nominees.resize(coordinators.clientLeaderServers.size());
|
.detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString())
|
||||||
|
.trackLatest("MonitorLeaderForwarding");
|
||||||
state std::vector<Future<Void>> actors;
|
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
|
||||||
// Ask all coordinators if the worker is considered as a leader (leader nominee) by the coordinator.
|
ClusterConnectionString(leader.get().first.serializedInfo.toString()));
|
||||||
actors.reserve(coordinators.clientLeaderServers.size());
|
return info;
|
||||||
for (int i = 0; i < coordinators.clientLeaderServers.size(); i++) {
|
|
||||||
Optional<Hostname> hostname;
|
|
||||||
auto r = connRecord->getConnectionString().networkAddressToHostname.find(
|
|
||||||
coordinators.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress());
|
|
||||||
if (r != connRecord->getConnectionString().networkAddressToHostname.end()) {
|
|
||||||
hostname = r->second;
|
|
||||||
}
|
|
||||||
actors.push_back(monitorNominee(
|
|
||||||
coordinators.clusterKey, coordinators.clientLeaderServers[i], &nomineeChange, &nominees[i], hostname));
|
|
||||||
}
|
|
||||||
allActors = waitForAll(actors);
|
|
||||||
|
|
||||||
loop {
|
|
||||||
Optional<std::pair<LeaderInfo, bool>> leader = getLeader(nominees);
|
|
||||||
TraceEvent("MonitorLeaderChange")
|
|
||||||
.detail("NewLeader", leader.present() ? leader.get().first.changeID : UID(1, 1));
|
|
||||||
if (leader.present()) {
|
|
||||||
if (leader.get().first.forward) {
|
|
||||||
TraceEvent("MonitorLeaderForwarding")
|
|
||||||
.detail("NewConnStr", leader.get().first.serializedInfo.toString())
|
|
||||||
.detail("OldConnStr", info.intermediateConnRecord->getConnectionString().toString())
|
|
||||||
.trackLatest("MonitorLeaderForwarding");
|
|
||||||
info.intermediateConnRecord = connRecord->makeIntermediateRecord(
|
|
||||||
ClusterConnectionString(leader.get().first.serializedInfo.toString()));
|
|
||||||
return info;
|
|
||||||
}
|
|
||||||
if (connRecord != info.intermediateConnRecord) {
|
|
||||||
if (!info.hasConnected) {
|
|
||||||
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
|
|
||||||
.detail("ClusterFile", connRecord->toString())
|
|
||||||
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
|
|
||||||
.detail("CurrentConnectionString",
|
|
||||||
info.intermediateConnRecord->getConnectionString().toString());
|
|
||||||
}
|
|
||||||
connRecord->setAndPersistConnectionString(info.intermediateConnRecord->getConnectionString());
|
|
||||||
info.intermediateConnRecord = connRecord;
|
|
||||||
}
|
|
||||||
|
|
||||||
info.hasConnected = true;
|
|
||||||
connRecord->notifyConnected();
|
|
||||||
|
|
||||||
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
wait(nomineeChange.onTrigger() || allActors);
|
|
||||||
} catch (Error& e) {
|
|
||||||
if (e.code() == error_code_coordinators_changed) {
|
|
||||||
TraceEvent("MonitorLeaderCoordinatorsChanged").suppressFor(1.0);
|
|
||||||
connRecord->getConnectionString().resetToUnresolved();
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (connRecord != info.intermediateConnRecord) {
|
||||||
|
if (!info.hasConnected) {
|
||||||
|
TraceEvent(SevWarnAlways, "IncorrectClusterFileContentsAtConnection")
|
||||||
|
.detail("ClusterFile", connRecord->toString())
|
||||||
|
.detail("StoredConnectionString", connRecord->getConnectionString().toString())
|
||||||
|
.detail("CurrentConnectionString",
|
||||||
|
info.intermediateConnRecord->getConnectionString().toString());
|
||||||
|
}
|
||||||
|
connRecord->setAndPersistConnectionString(info.intermediateConnRecord->getConnectionString());
|
||||||
|
info.intermediateConnRecord = connRecord;
|
||||||
|
}
|
||||||
|
|
||||||
|
info.hasConnected = true;
|
||||||
|
connRecord->notifyConnected();
|
||||||
|
|
||||||
|
outSerializedLeaderInfo->set(leader.get().first.serializedInfo);
|
||||||
}
|
}
|
||||||
|
wait(nomineeChange.onTrigger() || allActors);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -885,10 +759,10 @@ ACTOR Future<Void> getClientInfoFromLeader(Reference<AsyncVar<Optional<ClusterCo
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
|
ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
|
||||||
|
std::vector<Hostname> hostnames,
|
||||||
std::vector<NetworkAddress> coordinators,
|
std::vector<NetworkAddress> coordinators,
|
||||||
ClientData* clientData,
|
ClientData* clientData,
|
||||||
Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo,
|
Reference<AsyncVar<Optional<LeaderInfo>>> leaderInfo) {
|
||||||
Reference<AsyncVar<Void>> coordinatorsChanged) {
|
|
||||||
state std::vector<ClientLeaderRegInterface> clientLeaderServers;
|
state std::vector<ClientLeaderRegInterface> clientLeaderServers;
|
||||||
state AsyncTrigger nomineeChange;
|
state AsyncTrigger nomineeChange;
|
||||||
state std::vector<Optional<LeaderInfo>> nominees;
|
state std::vector<Optional<LeaderInfo>> nominees;
|
||||||
|
@ -896,8 +770,12 @@ ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
|
||||||
state Reference<AsyncVar<Optional<ClusterControllerClientInterface>>> knownLeader(
|
state Reference<AsyncVar<Optional<ClusterControllerClientInterface>>> knownLeader(
|
||||||
new AsyncVar<Optional<ClusterControllerClientInterface>>{});
|
new AsyncVar<Optional<ClusterControllerClientInterface>>{});
|
||||||
|
|
||||||
for (auto s = coordinators.begin(); s != coordinators.end(); ++s) {
|
clientLeaderServers.reserve(hostnames.size() + coordinators.size());
|
||||||
clientLeaderServers.push_back(ClientLeaderRegInterface(*s));
|
for (auto h : hostnames) {
|
||||||
|
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
|
||||||
|
}
|
||||||
|
for (auto s : coordinators) {
|
||||||
|
clientLeaderServers.push_back(ClientLeaderRegInterface(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
nominees.resize(clientLeaderServers.size());
|
nominees.resize(clientLeaderServers.size());
|
||||||
|
@ -936,14 +814,7 @@ ACTOR Future<Void> monitorLeaderAndGetClientInfo(Key clusterKey,
|
||||||
leaderInfo->set(leader.get().first);
|
leaderInfo->set(leader.get().first);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
wait(nomineeChange.onTrigger() || allActors);
|
||||||
wait(nomineeChange.onTrigger() || allActors);
|
|
||||||
} catch (Error& e) {
|
|
||||||
if (e.code() == error_code_coordinators_changed) {
|
|
||||||
coordinatorsChanged->trigger();
|
|
||||||
}
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -995,7 +866,7 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
|
||||||
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
|
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
|
||||||
Key traceLogGroup) {
|
Key traceLogGroup) {
|
||||||
state ClusterConnectionString cs = info.intermediateConnRecord->getConnectionString();
|
state ClusterConnectionString cs = info.intermediateConnRecord->getConnectionString();
|
||||||
state std::vector<NetworkAddress> addrs = cs.coordinators();
|
state int coordinatorsSize = cs.hostnames.size() + cs.coordinators().size();
|
||||||
state int index = 0;
|
state int index = 0;
|
||||||
state int successIndex = 0;
|
state int successIndex = 0;
|
||||||
state Optional<double> incorrectTime;
|
state Optional<double> incorrectTime;
|
||||||
|
@ -1003,15 +874,26 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
|
||||||
state std::vector<CommitProxyInterface> lastCommitProxies;
|
state std::vector<CommitProxyInterface> lastCommitProxies;
|
||||||
state std::vector<UID> lastGrvProxyUIDs;
|
state std::vector<UID> lastGrvProxyUIDs;
|
||||||
state std::vector<GrvProxyInterface> lastGrvProxies;
|
state std::vector<GrvProxyInterface> lastGrvProxies;
|
||||||
|
state std::vector<ClientLeaderRegInterface> clientLeaderServers;
|
||||||
|
|
||||||
|
clientLeaderServers.reserve(coordinatorsSize);
|
||||||
|
for (const auto& h : cs.hostnames) {
|
||||||
|
clientLeaderServers.push_back(ClientLeaderRegInterface(h));
|
||||||
|
}
|
||||||
|
for (const auto& c : cs.coordinators()) {
|
||||||
|
clientLeaderServers.push_back(ClientLeaderRegInterface(c));
|
||||||
|
}
|
||||||
|
|
||||||
|
deterministicRandom()->randomShuffle(clientLeaderServers);
|
||||||
|
|
||||||
deterministicRandom()->randomShuffle(addrs);
|
|
||||||
loop {
|
loop {
|
||||||
state ClientLeaderRegInterface clientLeaderServer(addrs[index]);
|
state ClientLeaderRegInterface clientLeaderServer = clientLeaderServers[index];
|
||||||
state OpenDatabaseCoordRequest req;
|
state OpenDatabaseCoordRequest req;
|
||||||
|
|
||||||
coordinator->set(clientLeaderServer);
|
coordinator->set(clientLeaderServer);
|
||||||
|
|
||||||
req.clusterKey = cs.clusterKey();
|
req.clusterKey = cs.clusterKey();
|
||||||
|
req.hostnames = cs.hostnames;
|
||||||
req.coordinators = cs.coordinators();
|
req.coordinators = cs.coordinators();
|
||||||
req.knownClientInfoID = clientInfo->get().id;
|
req.knownClientInfoID = clientInfo->get().id;
|
||||||
req.supportedVersions = supportedVersions->get();
|
req.supportedVersions = supportedVersions->get();
|
||||||
|
@ -1040,8 +922,16 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
|
||||||
incorrectTime = Optional<double>();
|
incorrectTime = Optional<double>();
|
||||||
}
|
}
|
||||||
|
|
||||||
state ErrorOr<CachedSerialization<ClientDBInfo>> rep =
|
state ErrorOr<CachedSerialization<ClientDBInfo>> rep;
|
||||||
wait(clientLeaderServer.openDatabase.tryGetReply(req, TaskPriority::CoordinationReply));
|
if (clientLeaderServer.hostname.present()) {
|
||||||
|
wait(store(rep,
|
||||||
|
tryGetReplyFromHostname(req,
|
||||||
|
clientLeaderServer.hostname.get(),
|
||||||
|
WLTOKEN_CLIENTLEADERREG_OPENDATABASE,
|
||||||
|
TaskPriority::CoordinationReply)));
|
||||||
|
} else {
|
||||||
|
wait(store(rep, clientLeaderServer.openDatabase.tryGetReply(req, TaskPriority::CoordinationReply)));
|
||||||
|
}
|
||||||
if (rep.present()) {
|
if (rep.present()) {
|
||||||
if (rep.get().read().forward.present()) {
|
if (rep.get().read().forward.present()) {
|
||||||
TraceEvent("MonitorProxiesForwarding")
|
TraceEvent("MonitorProxiesForwarding")
|
||||||
|
@ -1072,15 +962,10 @@ ACTOR Future<MonitorLeaderInfo> monitorProxiesOneGeneration(
|
||||||
successIndex = index;
|
successIndex = index;
|
||||||
} else {
|
} else {
|
||||||
TEST(rep.getError().code() == error_code_failed_to_progress); // Coordinator cant talk to cluster controller
|
TEST(rep.getError().code() == error_code_failed_to_progress); // Coordinator cant talk to cluster controller
|
||||||
if (rep.getError().code() == error_code_coordinators_changed) {
|
TEST(rep.getError().code() == error_code_lookup_failed); // Coordinator hostname resolving failure
|
||||||
throw coordinators_changed();
|
index = (index + 1) % coordinatorsSize;
|
||||||
}
|
|
||||||
index = (index + 1) % addrs.size();
|
|
||||||
if (index == successIndex) {
|
if (index == successIndex) {
|
||||||
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
|
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
|
||||||
// When the client fails talking to all coordinators, we throw coordinators_changed() and let the caller
|
|
||||||
// re-resolve the connection string and retry.
|
|
||||||
throw coordinators_changed();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1092,27 +977,16 @@ ACTOR Future<Void> monitorProxies(
|
||||||
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
|
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
|
||||||
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
|
Reference<ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>> supportedVersions,
|
||||||
Key traceLogGroup) {
|
Key traceLogGroup) {
|
||||||
wait(connRecord->get()->resolveHostnames());
|
|
||||||
state MonitorLeaderInfo info(connRecord->get());
|
state MonitorLeaderInfo info(connRecord->get());
|
||||||
loop {
|
loop {
|
||||||
try {
|
choose {
|
||||||
wait(info.intermediateConnRecord->resolveHostnames());
|
when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration(
|
||||||
choose {
|
connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
|
||||||
when(MonitorLeaderInfo _info = wait(monitorProxiesOneGeneration(
|
info = _info;
|
||||||
connRecord->get(), clientInfo, coordinator, info, supportedVersions, traceLogGroup))) {
|
|
||||||
info = _info;
|
|
||||||
}
|
|
||||||
when(wait(connRecord->onChange())) {
|
|
||||||
info.hasConnected = false;
|
|
||||||
info.intermediateConnRecord = connRecord->get();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (Error& e) {
|
when(wait(connRecord->onChange())) {
|
||||||
if (e.code() == error_code_coordinators_changed) {
|
info.hasConnected = false;
|
||||||
TraceEvent("MonitorProxiesCoordinatorsChanged").suppressFor(1.0);
|
info.intermediateConnRecord = connRecord->get();
|
||||||
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
|
|
||||||
} else {
|
|
||||||
throw e;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,10 +75,10 @@ Future<Void> monitorLeader(Reference<IClusterConnectionRecord> const& connFile,
|
||||||
// nominees, the nominee with the most nomination is the leader, and collects client data from the leader. This function
|
// nominees, the nominee with the most nomination is the leader, and collects client data from the leader. This function
|
||||||
// also monitors the change of the leader.
|
// also monitors the change of the leader.
|
||||||
Future<Void> monitorLeaderAndGetClientInfo(Key const& clusterKey,
|
Future<Void> monitorLeaderAndGetClientInfo(Key const& clusterKey,
|
||||||
|
std::vector<Hostname> const& hostnames,
|
||||||
std::vector<NetworkAddress> const& coordinators,
|
std::vector<NetworkAddress> const& coordinators,
|
||||||
ClientData* const& clientData,
|
ClientData* const& clientData,
|
||||||
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo,
|
Reference<AsyncVar<Optional<LeaderInfo>>> const& leaderInfo);
|
||||||
Reference<AsyncVar<Void>> const& coordinatorsChanged);
|
|
||||||
|
|
||||||
Future<Void> monitorProxies(
|
Future<Void> monitorProxies(
|
||||||
Reference<AsyncVar<Reference<IClusterConnectionRecord>>> const& connRecord,
|
Reference<AsyncVar<Reference<IClusterConnectionRecord>>> const& connRecord,
|
||||||
|
|
|
@ -18,6 +18,10 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifdef ADDRESS_SANITIZER
|
||||||
|
#include <sanitizer/lsan_interface.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "fdbclient/FDBOptions.g.h"
|
#include "fdbclient/FDBOptions.g.h"
|
||||||
#include "fdbclient/FDBTypes.h"
|
#include "fdbclient/FDBTypes.h"
|
||||||
#include "fdbclient/GenericManagementAPI.actor.h"
|
#include "fdbclient/GenericManagementAPI.actor.h"
|
||||||
|
@ -2763,6 +2767,11 @@ template <class T>
|
||||||
THREAD_FUNC runSingleAssignmentVarTest(void* arg) {
|
THREAD_FUNC runSingleAssignmentVarTest(void* arg) {
|
||||||
noUnseed = true;
|
noUnseed = true;
|
||||||
|
|
||||||
|
// This test intentionally leaks memory
|
||||||
|
#ifdef ADDRESS_SANITIZER
|
||||||
|
__lsan::ScopedDisabler disableLeakChecks;
|
||||||
|
#endif
|
||||||
|
|
||||||
volatile bool* done = (volatile bool*)arg;
|
volatile bool* done = (volatile bool*)arg;
|
||||||
try {
|
try {
|
||||||
for (int i = 0; i < 25; ++i) {
|
for (int i = 0; i < 25; ++i) {
|
||||||
|
|
|
@ -265,11 +265,11 @@ void DatabaseContext::getLatestCommitVersions(const Reference<LocationInfo>& loc
|
||||||
void updateCachedReadVersionShared(double t, Version v, DatabaseSharedState* p) {
|
void updateCachedReadVersionShared(double t, Version v, DatabaseSharedState* p) {
|
||||||
MutexHolder mutex(p->mutexLock);
|
MutexHolder mutex(p->mutexLock);
|
||||||
if (v >= p->grvCacheSpace.cachedReadVersion) {
|
if (v >= p->grvCacheSpace.cachedReadVersion) {
|
||||||
TraceEvent(SevDebug, "CacheReadVersionUpdate")
|
//TraceEvent(SevDebug, "CacheReadVersionUpdate")
|
||||||
.detail("Version", v)
|
// .detail("Version", v)
|
||||||
.detail("CurTime", t)
|
// .detail("CurTime", t)
|
||||||
.detail("LastVersion", p->grvCacheSpace.cachedReadVersion)
|
// .detail("LastVersion", p->grvCacheSpace.cachedReadVersion)
|
||||||
.detail("LastTime", p->grvCacheSpace.lastGrvTime);
|
// .detail("LastTime", p->grvCacheSpace.lastGrvTime);
|
||||||
p->grvCacheSpace.cachedReadVersion = v;
|
p->grvCacheSpace.cachedReadVersion = v;
|
||||||
if (t > p->grvCacheSpace.lastGrvTime) {
|
if (t > p->grvCacheSpace.lastGrvTime) {
|
||||||
p->grvCacheSpace.lastGrvTime = t;
|
p->grvCacheSpace.lastGrvTime = t;
|
||||||
|
@ -282,11 +282,11 @@ void DatabaseContext::updateCachedReadVersion(double t, Version v) {
|
||||||
return updateCachedReadVersionShared(t, v, sharedStatePtr);
|
return updateCachedReadVersionShared(t, v, sharedStatePtr);
|
||||||
}
|
}
|
||||||
if (v >= cachedReadVersion) {
|
if (v >= cachedReadVersion) {
|
||||||
TraceEvent(SevDebug, "CachedReadVersionUpdate")
|
//TraceEvent(SevDebug, "CachedReadVersionUpdate")
|
||||||
.detail("Version", v)
|
// .detail("Version", v)
|
||||||
.detail("GrvStartTime", t)
|
// .detail("GrvStartTime", t)
|
||||||
.detail("LastVersion", cachedReadVersion)
|
// .detail("LastVersion", cachedReadVersion)
|
||||||
.detail("LastTime", lastGrvTime);
|
// .detail("LastTime", lastGrvTime);
|
||||||
cachedReadVersion = v;
|
cachedReadVersion = v;
|
||||||
// Since the time is based on the start of the request, it's possible that we
|
// Since the time is based on the start of the request, it's possible that we
|
||||||
// get a newer version with an older time.
|
// get a newer version with an older time.
|
||||||
|
@ -5100,10 +5100,10 @@ Future<Optional<Value>> Transaction::get(const Key& key, Snapshot snapshot) {
|
||||||
++trState->cx->transactionGetValueRequests;
|
++trState->cx->transactionGetValueRequests;
|
||||||
// ASSERT (key < allKeys.end);
|
// ASSERT (key < allKeys.end);
|
||||||
|
|
||||||
// There are no keys in the database with size greater than KEY_SIZE_LIMIT
|
// There are no keys in the database with size greater than the max key size
|
||||||
if (key.size() >
|
if (key.size() > getMaxReadKeySize(key)) {
|
||||||
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
||||||
return Optional<Value>();
|
return Optional<Value>();
|
||||||
|
}
|
||||||
|
|
||||||
auto ver = getReadVersion();
|
auto ver = getReadVersion();
|
||||||
|
|
||||||
|
@ -5484,23 +5484,19 @@ Future<Void> Transaction::getRangeStream(const PromiseStream<RangeResult>& resul
|
||||||
void Transaction::addReadConflictRange(KeyRangeRef const& keys) {
|
void Transaction::addReadConflictRange(KeyRangeRef const& keys) {
|
||||||
ASSERT(!keys.empty());
|
ASSERT(!keys.empty());
|
||||||
|
|
||||||
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
|
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
|
||||||
// we can translate it to an equivalent one with smaller keys
|
// we can translate it to an equivalent one with smaller keys
|
||||||
KeyRef begin = keys.begin;
|
KeyRef begin = keys.begin;
|
||||||
KeyRef end = keys.end;
|
KeyRef end = keys.end;
|
||||||
|
|
||||||
if (begin.size() >
|
int64_t beginMaxSize = getMaxReadKeySize(begin);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
int64_t endMaxSize = getMaxReadKeySize(end);
|
||||||
begin = begin.substr(
|
if (begin.size() > beginMaxSize) {
|
||||||
0,
|
begin = begin.substr(0, beginMaxSize + 1);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
}
|
||||||
1);
|
if (end.size() > endMaxSize) {
|
||||||
if (end.size() >
|
end = end.substr(0, endMaxSize + 1);
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
}
|
||||||
end = end.substr(
|
|
||||||
0,
|
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1);
|
|
||||||
|
|
||||||
KeyRangeRef r = KeyRangeRef(begin, end);
|
KeyRangeRef r = KeyRangeRef(begin, end);
|
||||||
|
|
||||||
|
@ -5522,8 +5518,7 @@ void Transaction::makeSelfConflicting() {
|
||||||
|
|
||||||
void Transaction::set(const KeyRef& key, const ValueRef& value, AddConflictRange addConflictRange) {
|
void Transaction::set(const KeyRef& key, const ValueRef& value, AddConflictRange addConflictRange) {
|
||||||
++trState->cx->transactionSetMutations;
|
++trState->cx->transactionSetMutations;
|
||||||
if (key.size() >
|
if (key.size() > getMaxWriteKeySize(key, trState->options.rawAccess))
|
||||||
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
||||||
throw key_too_large();
|
throw key_too_large();
|
||||||
if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
|
if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
|
||||||
throw value_too_large();
|
throw value_too_large();
|
||||||
|
@ -5544,8 +5539,7 @@ void Transaction::atomicOp(const KeyRef& key,
|
||||||
MutationRef::Type operationType,
|
MutationRef::Type operationType,
|
||||||
AddConflictRange addConflictRange) {
|
AddConflictRange addConflictRange) {
|
||||||
++trState->cx->transactionAtomicMutations;
|
++trState->cx->transactionAtomicMutations;
|
||||||
if (key.size() >
|
if (key.size() > getMaxWriteKeySize(key, trState->options.rawAccess))
|
||||||
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
||||||
throw key_too_large();
|
throw key_too_large();
|
||||||
if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
|
if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
|
||||||
throw value_too_large();
|
throw value_too_large();
|
||||||
|
@ -5578,20 +5572,16 @@ void Transaction::clear(const KeyRangeRef& range, AddConflictRange addConflictRa
|
||||||
KeyRef begin = range.begin;
|
KeyRef begin = range.begin;
|
||||||
KeyRef end = range.end;
|
KeyRef end = range.end;
|
||||||
|
|
||||||
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
|
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
|
||||||
// we can translate it to an equivalent one with smaller keys
|
// we can translate it to an equivalent one with smaller keys
|
||||||
if (begin.size() >
|
int64_t beginMaxSize = getMaxClearKeySize(begin);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
int64_t endMaxSize = getMaxClearKeySize(end);
|
||||||
begin = begin.substr(
|
if (begin.size() > beginMaxSize) {
|
||||||
0,
|
begin = begin.substr(0, beginMaxSize + 1);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
}
|
||||||
1);
|
if (end.size() > endMaxSize) {
|
||||||
if (end.size() >
|
end = end.substr(0, endMaxSize + 1);
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
}
|
||||||
end = end.substr(
|
|
||||||
0,
|
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1);
|
|
||||||
|
|
||||||
auto r = KeyRangeRef(req.arena, KeyRangeRef(begin, end));
|
auto r = KeyRangeRef(req.arena, KeyRangeRef(begin, end));
|
||||||
if (r.empty())
|
if (r.empty())
|
||||||
|
@ -5604,10 +5594,10 @@ void Transaction::clear(const KeyRangeRef& range, AddConflictRange addConflictRa
|
||||||
}
|
}
|
||||||
void Transaction::clear(const KeyRef& key, AddConflictRange addConflictRange) {
|
void Transaction::clear(const KeyRef& key, AddConflictRange addConflictRange) {
|
||||||
++trState->cx->transactionClearMutations;
|
++trState->cx->transactionClearMutations;
|
||||||
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT
|
// There aren't any keys in the database with size larger than the max key size
|
||||||
if (key.size() >
|
if (key.size() > getMaxClearKeySize(key)) {
|
||||||
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
auto& req = tr;
|
auto& req = tr;
|
||||||
auto& t = req.transaction;
|
auto& t = req.transaction;
|
||||||
|
@ -5626,24 +5616,19 @@ void Transaction::addWriteConflictRange(const KeyRangeRef& keys) {
|
||||||
auto& req = tr;
|
auto& req = tr;
|
||||||
auto& t = req.transaction;
|
auto& t = req.transaction;
|
||||||
|
|
||||||
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
|
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
|
||||||
// we can translate it to an equivalent one with smaller keys
|
// we can translate it to an equivalent one with smaller keys
|
||||||
KeyRef begin = keys.begin;
|
KeyRef begin = keys.begin;
|
||||||
KeyRef end = keys.end;
|
KeyRef end = keys.end;
|
||||||
|
|
||||||
if (begin.size() >
|
int64_t beginMaxSize = getMaxKeySize(begin);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
int64_t endMaxSize = getMaxKeySize(end);
|
||||||
begin = begin.substr(
|
if (begin.size() > beginMaxSize) {
|
||||||
0,
|
begin = begin.substr(0, beginMaxSize + 1);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
}
|
||||||
1);
|
if (end.size() > endMaxSize) {
|
||||||
if (end.size() >
|
end = end.substr(0, endMaxSize + 1);
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
}
|
||||||
end = end.substr(
|
|
||||||
0,
|
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1);
|
|
||||||
|
|
||||||
KeyRangeRef r = KeyRangeRef(begin, end);
|
KeyRangeRef r = KeyRangeRef(begin, end);
|
||||||
|
|
||||||
if (r.empty()) {
|
if (r.empty()) {
|
||||||
|
@ -6942,11 +6927,18 @@ Future<Standalone<StringRef>> Transaction::getVersionstamp() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gets the protocol version reported by a coordinator via the protocol info interface
|
// Gets the protocol version reported by a coordinator via the protocol info interface
|
||||||
ACTOR Future<ProtocolVersion> getCoordinatorProtocol(NetworkAddressList coordinatorAddresses) {
|
ACTOR Future<ProtocolVersion> getCoordinatorProtocol(
|
||||||
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint::wellKnown({ coordinatorAddresses },
|
Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator) {
|
||||||
WLTOKEN_PROTOCOL_INFO) };
|
state ProtocolInfoReply reply;
|
||||||
ProtocolInfoReply reply = wait(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
|
if (coordinator->get().get().hostname.present()) {
|
||||||
|
wait(store(reply,
|
||||||
|
retryGetReplyFromHostname(
|
||||||
|
ProtocolInfoRequest{}, coordinator->get().get().hostname.get(), WLTOKEN_PROTOCOL_INFO)));
|
||||||
|
} else {
|
||||||
|
RequestStream<ProtocolInfoRequest> requestStream(
|
||||||
|
Endpoint::wellKnown({ coordinator->get().get().getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO));
|
||||||
|
wait(store(reply, retryBrokenPromise(requestStream, ProtocolInfoRequest{})));
|
||||||
|
}
|
||||||
return reply.version;
|
return reply.version;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6955,8 +6947,16 @@ ACTOR Future<ProtocolVersion> getCoordinatorProtocol(NetworkAddressList coordina
|
||||||
// function will return with an unset result.
|
// function will return with an unset result.
|
||||||
// If an expected version is given, this future won't return if the actual protocol version matches the expected version
|
// If an expected version is given, this future won't return if the actual protocol version matches the expected version
|
||||||
ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket(
|
ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket(
|
||||||
NetworkAddress coordinatorAddress,
|
Reference<AsyncVar<Optional<ClientLeaderRegInterface>> const> coordinator,
|
||||||
Optional<ProtocolVersion> expectedVersion) {
|
Optional<ProtocolVersion> expectedVersion) {
|
||||||
|
state NetworkAddress coordinatorAddress;
|
||||||
|
if (coordinator->get().get().hostname.present()) {
|
||||||
|
Hostname h = coordinator->get().get().hostname.get();
|
||||||
|
wait(store(coordinatorAddress, h.resolveWithRetry()));
|
||||||
|
} else {
|
||||||
|
coordinatorAddress = coordinator->get().get().getLeader.getEndpoint().getPrimaryAddress();
|
||||||
|
}
|
||||||
|
|
||||||
state Reference<AsyncVar<Optional<ProtocolVersion>> const> protocolVersion =
|
state Reference<AsyncVar<Optional<ProtocolVersion>> const> protocolVersion =
|
||||||
FlowTransport::transport().getPeerProtocolAsyncVar(coordinatorAddress);
|
FlowTransport::transport().getPeerProtocolAsyncVar(coordinatorAddress);
|
||||||
|
|
||||||
|
@ -6991,11 +6991,10 @@ ACTOR Future<ProtocolVersion> getClusterProtocolImpl(
|
||||||
if (!coordinator->get().present()) {
|
if (!coordinator->get().present()) {
|
||||||
wait(coordinator->onChange());
|
wait(coordinator->onChange());
|
||||||
} else {
|
} else {
|
||||||
Endpoint coordinatorEndpoint = coordinator->get().get().getLeader.getEndpoint();
|
|
||||||
if (needToConnect) {
|
if (needToConnect) {
|
||||||
// Even though we typically rely on the connect packet to get the protocol version, we need to send some
|
// Even though we typically rely on the connect packet to get the protocol version, we need to send some
|
||||||
// request in order to start a connection. This protocol version request serves that purpose.
|
// request in order to start a connection. This protocol version request serves that purpose.
|
||||||
protocolVersion = getCoordinatorProtocol(coordinatorEndpoint.addresses);
|
protocolVersion = getCoordinatorProtocol(coordinator);
|
||||||
needToConnect = false;
|
needToConnect = false;
|
||||||
}
|
}
|
||||||
choose {
|
choose {
|
||||||
|
@ -7011,8 +7010,8 @@ ACTOR Future<ProtocolVersion> getClusterProtocolImpl(
|
||||||
|
|
||||||
// Older versions of FDB don't have an endpoint to return the protocol version, so we get this info from
|
// Older versions of FDB don't have an endpoint to return the protocol version, so we get this info from
|
||||||
// the connect packet
|
// the connect packet
|
||||||
when(Optional<ProtocolVersion> pv = wait(getCoordinatorProtocolFromConnectPacket(
|
when(Optional<ProtocolVersion> pv =
|
||||||
coordinatorEndpoint.getPrimaryAddress(), expectedVersion))) {
|
wait(getCoordinatorProtocolFromConnectPacket(coordinator, expectedVersion))) {
|
||||||
if (pv.present()) {
|
if (pv.present()) {
|
||||||
return pv.get();
|
return pv.get();
|
||||||
} else {
|
} else {
|
||||||
|
@ -8186,14 +8185,20 @@ ACTOR Future<bool> checkSafeExclusions(Database cx, std::vector<AddressExclusion
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
TraceEvent("ExclusionSafetyCheckCoordinators").log();
|
TraceEvent("ExclusionSafetyCheckCoordinators").log();
|
||||||
wait(cx->getConnectionRecord()->resolveHostnames());
|
|
||||||
state ClientCoordinators coordinatorList(cx->getConnectionRecord());
|
state ClientCoordinators coordinatorList(cx->getConnectionRecord());
|
||||||
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||||
leaderServers.reserve(coordinatorList.clientLeaderServers.size());
|
leaderServers.reserve(coordinatorList.clientLeaderServers.size());
|
||||||
for (int i = 0; i < coordinatorList.clientLeaderServers.size(); i++) {
|
for (int i = 0; i < coordinatorList.clientLeaderServers.size(); i++) {
|
||||||
leaderServers.push_back(retryBrokenPromise(coordinatorList.clientLeaderServers[i].getLeader,
|
if (coordinatorList.clientLeaderServers[i].hostname.present()) {
|
||||||
GetLeaderRequest(coordinatorList.clusterKey, UID()),
|
leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coordinatorList.clusterKey, UID()),
|
||||||
TaskPriority::CoordinationReply));
|
coordinatorList.clientLeaderServers[i].hostname.get(),
|
||||||
|
WLTOKEN_CLIENTLEADERREG_GETLEADER,
|
||||||
|
TaskPriority::CoordinationReply));
|
||||||
|
} else {
|
||||||
|
leaderServers.push_back(retryBrokenPromise(coordinatorList.clientLeaderServers[i].getLeader,
|
||||||
|
GetLeaderRequest(coordinatorList.clusterKey, UID()),
|
||||||
|
TaskPriority::CoordinationReply));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Wait for quorum so we don't dismiss live coordinators as unreachable by acting too fast
|
// Wait for quorum so we don't dismiss live coordinators as unreachable by acting too fast
|
||||||
choose {
|
choose {
|
||||||
|
@ -9395,3 +9400,21 @@ ACTOR Future<Void> waitPurgeGranulesCompleteActor(Reference<DatabaseContext> db,
|
||||||
Future<Void> DatabaseContext::waitPurgeGranulesComplete(Key purgeKey) {
|
Future<Void> DatabaseContext::waitPurgeGranulesComplete(Key purgeKey) {
|
||||||
return waitPurgeGranulesCompleteActor(Reference<DatabaseContext>::addRef(this), purgeKey);
|
return waitPurgeGranulesCompleteActor(Reference<DatabaseContext>::addRef(this), purgeKey);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int64_t getMaxKeySize(KeyRef const& key) {
|
||||||
|
return getMaxWriteKeySize(key, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t getMaxReadKeySize(KeyRef const& key) {
|
||||||
|
return getMaxKeySize(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t getMaxWriteKeySize(KeyRef const& key, bool hasRawAccess) {
|
||||||
|
int64_t tenantSize = hasRawAccess ? CLIENT_KNOBS->TENANT_PREFIX_SIZE_LIMIT : 0;
|
||||||
|
return key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
||||||
|
: CLIENT_KNOBS->KEY_SIZE_LIMIT + tenantSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t getMaxClearKeySize(KeyRef const& key) {
|
||||||
|
return getMaxKeySize(key);
|
||||||
|
}
|
|
@ -539,5 +539,19 @@ ACTOR Future<std::vector<std::pair<UID, StorageWiggleValue>>> readStorageWiggleV
|
||||||
bool primary,
|
bool primary,
|
||||||
bool use_system_priority);
|
bool use_system_priority);
|
||||||
|
|
||||||
|
// Returns the maximum legal size of a key. This size will be determined by the prefix of the passed in key
|
||||||
|
// (system keys have a larger maximum size). This should be used for generic max key size requests.
|
||||||
|
int64_t getMaxKeySize(KeyRef const& key);
|
||||||
|
|
||||||
|
// Returns the maximum legal size of a key that can be read. Keys larger than this will be assumed not to exist.
|
||||||
|
int64_t getMaxReadKeySize(KeyRef const& key);
|
||||||
|
|
||||||
|
// Returns the maximum legal size of a key that can be written. If using raw access, writes to normal keys will
|
||||||
|
// be allowed to be slighly larger to accommodate the prefix.
|
||||||
|
int64_t getMaxWriteKeySize(KeyRef const& key, bool hasRawAccess);
|
||||||
|
|
||||||
|
// Returns the maximum legal size of a key that can be cleared. Keys larger than this will be assumed not to exist.
|
||||||
|
int64_t getMaxClearKeySize(KeyRef const& key);
|
||||||
|
|
||||||
#include "flow/unactorcompiler.h"
|
#include "flow/unactorcompiler.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -59,8 +59,14 @@ class CommitQuorum {
|
||||||
ConfigGeneration generation,
|
ConfigGeneration generation,
|
||||||
ConfigTransactionInterface cti) {
|
ConfigTransactionInterface cti) {
|
||||||
try {
|
try {
|
||||||
wait(timeoutError(cti.commit.getReply(self->getCommitRequest(generation)),
|
if (cti.hostname.present()) {
|
||||||
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
|
wait(timeoutError(retryGetReplyFromHostname(
|
||||||
|
self->getCommitRequest(generation), cti.hostname.get(), WLTOKEN_CONFIGTXN_COMMIT),
|
||||||
|
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
|
||||||
|
} else {
|
||||||
|
wait(timeoutError(cti.commit.getReply(self->getCommitRequest(generation)),
|
||||||
|
CLIENT_KNOBS->COMMIT_QUORUM_TIMEOUT));
|
||||||
|
}
|
||||||
++self->successful;
|
++self->successful;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
// self might be destroyed if this actor is cancelled
|
// self might be destroyed if this actor is cancelled
|
||||||
|
@ -122,9 +128,20 @@ class GetGenerationQuorum {
|
||||||
ACTOR static Future<Void> addRequestActor(GetGenerationQuorum* self, ConfigTransactionInterface cti) {
|
ACTOR static Future<Void> addRequestActor(GetGenerationQuorum* self, ConfigTransactionInterface cti) {
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
ConfigTransactionGetGenerationReply reply = wait(timeoutError(
|
state ConfigTransactionGetGenerationReply reply;
|
||||||
cti.getGeneration.getReply(ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion }),
|
if (cti.hostname.present()) {
|
||||||
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
|
wait(timeoutError(store(reply,
|
||||||
|
retryGetReplyFromHostname(
|
||||||
|
ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion },
|
||||||
|
cti.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGTXN_GETGENERATION)),
|
||||||
|
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
|
||||||
|
} else {
|
||||||
|
wait(timeoutError(store(reply,
|
||||||
|
cti.getGeneration.getReply(
|
||||||
|
ConfigTransactionGetGenerationRequest{ self->lastSeenLiveVersion })),
|
||||||
|
CLIENT_KNOBS->GET_GENERATION_QUORUM_TIMEOUT));
|
||||||
|
}
|
||||||
|
|
||||||
++self->totalRepliesReceived;
|
++self->totalRepliesReceived;
|
||||||
auto gen = reply.generation;
|
auto gen = reply.generation;
|
||||||
|
@ -225,9 +242,18 @@ class PaxosConfigTransactionImpl {
|
||||||
state ConfigKey configKey = ConfigKey::decodeKey(key);
|
state ConfigKey configKey = ConfigKey::decodeKey(key);
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
||||||
state Reference<ConfigTransactionInfo> configNodes(
|
state std::vector<ConfigTransactionInterface> readReplicas =
|
||||||
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
|
self->getGenerationQuorum.getReadReplicas();
|
||||||
|
std::vector<Future<Void>> fs;
|
||||||
|
for (ConfigTransactionInterface& readReplica : readReplicas) {
|
||||||
|
if (readReplica.hostname.present()) {
|
||||||
|
fs.push_back(tryInitializeRequestStream(
|
||||||
|
&readReplica.get, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GET));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wait(waitForAll(fs));
|
||||||
|
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
|
||||||
ConfigTransactionGetReply reply =
|
ConfigTransactionGetReply reply =
|
||||||
wait(timeoutError(basicLoadBalance(configNodes,
|
wait(timeoutError(basicLoadBalance(configNodes,
|
||||||
&ConfigTransactionInterface::get,
|
&ConfigTransactionInterface::get,
|
||||||
|
@ -248,9 +274,17 @@ class PaxosConfigTransactionImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<RangeResult> getConfigClasses(PaxosConfigTransactionImpl* self) {
|
ACTOR static Future<RangeResult> getConfigClasses(PaxosConfigTransactionImpl* self) {
|
||||||
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
||||||
state Reference<ConfigTransactionInfo> configNodes(
|
state std::vector<ConfigTransactionInterface> readReplicas = self->getGenerationQuorum.getReadReplicas();
|
||||||
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
|
std::vector<Future<Void>> fs;
|
||||||
|
for (ConfigTransactionInterface& readReplica : readReplicas) {
|
||||||
|
if (readReplica.hostname.present()) {
|
||||||
|
fs.push_back(tryInitializeRequestStream(
|
||||||
|
&readReplica.getClasses, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETCLASSES));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wait(waitForAll(fs));
|
||||||
|
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
|
||||||
ConfigTransactionGetConfigClassesReply reply =
|
ConfigTransactionGetConfigClassesReply reply =
|
||||||
wait(basicLoadBalance(configNodes,
|
wait(basicLoadBalance(configNodes,
|
||||||
&ConfigTransactionInterface::getClasses,
|
&ConfigTransactionInterface::getClasses,
|
||||||
|
@ -264,9 +298,17 @@ class PaxosConfigTransactionImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<RangeResult> getKnobs(PaxosConfigTransactionImpl* self, Optional<Key> configClass) {
|
ACTOR static Future<RangeResult> getKnobs(PaxosConfigTransactionImpl* self, Optional<Key> configClass) {
|
||||||
ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
state ConfigGeneration generation = wait(self->getGenerationQuorum.getGeneration());
|
||||||
state Reference<ConfigTransactionInfo> configNodes(
|
state std::vector<ConfigTransactionInterface> readReplicas = self->getGenerationQuorum.getReadReplicas();
|
||||||
new ConfigTransactionInfo(self->getGenerationQuorum.getReadReplicas()));
|
std::vector<Future<Void>> fs;
|
||||||
|
for (ConfigTransactionInterface& readReplica : readReplicas) {
|
||||||
|
if (readReplica.hostname.present()) {
|
||||||
|
fs.push_back(tryInitializeRequestStream(
|
||||||
|
&readReplica.getKnobs, readReplica.hostname.get(), WLTOKEN_CONFIGTXN_GETKNOBS));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wait(waitForAll(fs));
|
||||||
|
state Reference<ConfigTransactionInfo> configNodes(new ConfigTransactionInfo(readReplicas));
|
||||||
ConfigTransactionGetKnobsReply reply =
|
ConfigTransactionGetKnobsReply reply =
|
||||||
wait(basicLoadBalance(configNodes,
|
wait(basicLoadBalance(configNodes,
|
||||||
&ConfigTransactionInterface::getKnobs,
|
&ConfigTransactionInterface::getKnobs,
|
||||||
|
@ -366,10 +408,13 @@ public:
|
||||||
Future<Void> commit() { return commit(this); }
|
Future<Void> commit() { return commit(this); }
|
||||||
|
|
||||||
PaxosConfigTransactionImpl(Database const& cx) : cx(cx) {
|
PaxosConfigTransactionImpl(Database const& cx) : cx(cx) {
|
||||||
auto coordinators = cx->getConnectionRecord()->getConnectionString().coordinators();
|
const ClusterConnectionString& cs = cx->getConnectionRecord()->getConnectionString();
|
||||||
ctis.reserve(coordinators.size());
|
ctis.reserve(cs.hostnames.size() + cs.coordinators().size());
|
||||||
for (const auto& coordinator : coordinators) {
|
for (const auto& h : cs.hostnames) {
|
||||||
ctis.emplace_back(coordinator);
|
ctis.emplace_back(h);
|
||||||
|
}
|
||||||
|
for (const auto& c : cs.coordinators()) {
|
||||||
|
ctis.emplace_back(c);
|
||||||
}
|
}
|
||||||
getGenerationQuorum = GetGenerationQuorum{ ctis };
|
getGenerationQuorum = GetGenerationQuorum{ ctis };
|
||||||
commitQuorum = CommitQuorum{ ctis };
|
commitQuorum = CommitQuorum{ ctis };
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fdbclient/ReadYourWrites.h"
|
#include "fdbclient/ReadYourWrites.h"
|
||||||
|
#include "fdbclient/NativeAPI.actor.h"
|
||||||
#include "fdbclient/Atomic.h"
|
#include "fdbclient/Atomic.h"
|
||||||
#include "fdbclient/DatabaseContext.h"
|
#include "fdbclient/DatabaseContext.h"
|
||||||
#include "fdbclient/SpecialKeySpace.actor.h"
|
#include "fdbclient/SpecialKeySpace.actor.h"
|
||||||
|
@ -1578,10 +1579,10 @@ Future<Optional<Value>> ReadYourWritesTransaction::get(const Key& key, Snapshot
|
||||||
if (key >= getMaxReadKey() && key != metadataVersionKey)
|
if (key >= getMaxReadKey() && key != metadataVersionKey)
|
||||||
return key_outside_legal_range();
|
return key_outside_legal_range();
|
||||||
|
|
||||||
// There are no keys in the database with size greater than KEY_SIZE_LIMIT
|
// There are no keys in the database with size greater than the max key size
|
||||||
if (key.size() >
|
if (key.size() > getMaxReadKeySize(key)) {
|
||||||
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
||||||
return Optional<Value>();
|
return Optional<Value>();
|
||||||
|
}
|
||||||
|
|
||||||
Future<Optional<Value>> result = RYWImpl::readWithConflictRange(this, RYWImpl::GetValueReq(key), snapshot);
|
Future<Optional<Value>> result = RYWImpl::readWithConflictRange(this, RYWImpl::GetValueReq(key), snapshot);
|
||||||
reading.add(success(result));
|
reading.add(success(result));
|
||||||
|
@ -1822,23 +1823,19 @@ void ReadYourWritesTransaction::addReadConflictRange(KeyRangeRef const& keys) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
|
// There aren't any keys in the database with size larger than max key size, so if range contains large keys
|
||||||
// we can translate it to an equivalent one with smaller keys
|
// we can translate it to an equivalent one with smaller keys
|
||||||
KeyRef begin = keys.begin;
|
KeyRef begin = keys.begin;
|
||||||
KeyRef end = keys.end;
|
KeyRef end = keys.end;
|
||||||
|
|
||||||
if (begin.size() >
|
int64_t beginMaxSize = getMaxReadKeySize(begin);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
int64_t endMaxSize = getMaxReadKeySize(end);
|
||||||
begin = begin.substr(
|
if (begin.size() > beginMaxSize) {
|
||||||
0,
|
begin = begin.substr(0, beginMaxSize + 1);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
}
|
||||||
1);
|
if (end.size() > endMaxSize) {
|
||||||
if (end.size() >
|
end = end.substr(0, endMaxSize + 1);
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
}
|
||||||
end = end.substr(
|
|
||||||
0,
|
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1);
|
|
||||||
|
|
||||||
KeyRangeRef r = KeyRangeRef(begin, end);
|
KeyRangeRef r = KeyRangeRef(begin, end);
|
||||||
|
|
||||||
|
@ -2111,9 +2108,9 @@ void ReadYourWritesTransaction::atomicOp(const KeyRef& key, const ValueRef& oper
|
||||||
if (!isValidMutationType(operationType) || !isAtomicOp((MutationRef::Type)operationType))
|
if (!isValidMutationType(operationType) || !isAtomicOp((MutationRef::Type)operationType))
|
||||||
throw invalid_mutation_type();
|
throw invalid_mutation_type();
|
||||||
|
|
||||||
if (key.size() >
|
if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
|
||||||
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
||||||
throw key_too_large();
|
throw key_too_large();
|
||||||
|
}
|
||||||
if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
|
if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
|
||||||
throw value_too_large();
|
throw value_too_large();
|
||||||
|
|
||||||
|
@ -2218,9 +2215,9 @@ void ReadYourWritesTransaction::set(const KeyRef& key, const ValueRef& value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: check transaction size here
|
// TODO: check transaction size here
|
||||||
if (key.size() >
|
if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
|
||||||
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
||||||
throw key_too_large();
|
throw key_too_large();
|
||||||
|
}
|
||||||
if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
|
if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
|
||||||
throw value_too_large();
|
throw value_too_large();
|
||||||
|
|
||||||
|
@ -2254,23 +2251,19 @@ void ReadYourWritesTransaction::clear(const KeyRangeRef& range) {
|
||||||
return tr.clear(range, addWriteConflict);
|
return tr.clear(range, addWriteConflict);
|
||||||
}
|
}
|
||||||
|
|
||||||
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
|
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
|
||||||
// we can translate it to an equivalent one with smaller keys
|
// we can translate it to an equivalent one with smaller keys
|
||||||
KeyRef begin = range.begin;
|
KeyRef begin = range.begin;
|
||||||
KeyRef end = range.end;
|
KeyRef end = range.end;
|
||||||
|
|
||||||
if (begin.size() >
|
int64_t beginMaxSize = getMaxClearKeySize(begin);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
int64_t endMaxSize = getMaxClearKeySize(end);
|
||||||
begin = begin.substr(
|
if (begin.size() > beginMaxSize) {
|
||||||
0,
|
begin = begin.substr(0, beginMaxSize + 1);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
}
|
||||||
1);
|
if (end.size() > endMaxSize) {
|
||||||
if (end.size() >
|
end = end.substr(0, endMaxSize + 1);
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
}
|
||||||
end = end.substr(
|
|
||||||
0,
|
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1);
|
|
||||||
|
|
||||||
KeyRangeRef r = KeyRangeRef(begin, end);
|
KeyRangeRef r = KeyRangeRef(begin, end);
|
||||||
|
|
||||||
|
@ -2300,9 +2293,9 @@ void ReadYourWritesTransaction::clear(const KeyRef& key) {
|
||||||
if (key >= getMaxWriteKey())
|
if (key >= getMaxWriteKey())
|
||||||
throw key_outside_legal_range();
|
throw key_outside_legal_range();
|
||||||
|
|
||||||
if (key.size() >
|
if (key.size() > getMaxClearKeySize(key)) {
|
||||||
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (options.readYourWritesDisabled) {
|
if (options.readYourWritesDisabled) {
|
||||||
return tr.clear(key, addWriteConflict);
|
return tr.clear(key, addWriteConflict);
|
||||||
|
@ -2332,9 +2325,9 @@ Future<Void> ReadYourWritesTransaction::watch(const Key& key) {
|
||||||
if (key >= allKeys.end || (key >= getMaxReadKey() && key != metadataVersionKey && tr.apiVersionAtLeast(300)))
|
if (key >= allKeys.end || (key >= getMaxReadKey() && key != metadataVersionKey && tr.apiVersionAtLeast(300)))
|
||||||
return key_outside_legal_range();
|
return key_outside_legal_range();
|
||||||
|
|
||||||
if (key.size() >
|
if (key.size() > getMaxWriteKeySize(key, getTransactionState()->options.rawAccess)) {
|
||||||
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
||||||
return key_too_large();
|
return key_too_large();
|
||||||
|
}
|
||||||
|
|
||||||
return RYWImpl::watch(this, key);
|
return RYWImpl::watch(this, key);
|
||||||
}
|
}
|
||||||
|
@ -2350,23 +2343,19 @@ void ReadYourWritesTransaction::addWriteConflictRange(KeyRangeRef const& keys) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
|
// There aren't any keys in the database with size larger than the max key size, so if range contains large keys
|
||||||
// we can translate it to an equivalent one with smaller keys
|
// we can translate it to an equivalent one with smaller keys
|
||||||
KeyRef begin = keys.begin;
|
KeyRef begin = keys.begin;
|
||||||
KeyRef end = keys.end;
|
KeyRef end = keys.end;
|
||||||
|
|
||||||
if (begin.size() >
|
int64_t beginMaxSize = getMaxKeySize(begin);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
int64_t endMaxSize = getMaxKeySize(end);
|
||||||
begin = begin.substr(
|
if (begin.size() > beginMaxSize) {
|
||||||
0,
|
begin = begin.substr(0, beginMaxSize + 1);
|
||||||
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
}
|
||||||
1);
|
if (end.size() > endMaxSize) {
|
||||||
if (end.size() >
|
end = end.substr(0, endMaxSize + 1);
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
}
|
||||||
end = end.substr(
|
|
||||||
0,
|
|
||||||
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1);
|
|
||||||
|
|
||||||
KeyRangeRef r = KeyRangeRef(begin, end);
|
KeyRangeRef r = KeyRangeRef(begin, end);
|
||||||
|
|
||||||
|
|
|
@ -25,9 +25,15 @@
|
||||||
#include "fdbclient/sha1/SHA1.h"
|
#include "fdbclient/sha1/SHA1.h"
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
#if defined(HAVE_WOLFSSL)
|
||||||
|
#include <wolfssl/options.h>
|
||||||
|
#endif
|
||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
#include <openssl/evp.h>
|
#include <openssl/evp.h>
|
||||||
#include <openssl/hmac.h>
|
#include <openssl/hmac.h>
|
||||||
|
#if defined(HAVE_WOLFSSL)
|
||||||
|
#undef SHA1 // wolfSSL will will shadow FDB SHA1.h
|
||||||
|
#endif
|
||||||
#include <boost/algorithm/string/split.hpp>
|
#include <boost/algorithm/string/split.hpp>
|
||||||
#include <boost/algorithm/string/classification.hpp>
|
#include <boost/algorithm/string/classification.hpp>
|
||||||
#include <boost/algorithm/string.hpp>
|
#include <boost/algorithm/string.hpp>
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
#include "flow/Net2Packet.h"
|
#include "flow/Net2Packet.h"
|
||||||
#include "fdbclient/Knobs.h"
|
#include "fdbclient/Knobs.h"
|
||||||
#include "fdbrpc/IRateControl.h"
|
#include "fdbrpc/IRateControl.h"
|
||||||
#include "fdbclient/HTTP.h"
|
#include "fdbrpc/HTTP.h"
|
||||||
#include "fdbclient/JSONDoc.h"
|
#include "fdbclient/JSONDoc.h"
|
||||||
|
|
||||||
// Representation of all the things you need to connect to a blob store instance with some credentials.
|
// Representation of all the things you need to connect to a blob store instance with some credentials.
|
||||||
|
|
|
@ -450,6 +450,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||||
init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1;
|
init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1;
|
||||||
init( MAX_PROXY_COMPUTE, 2.0 );
|
init( MAX_PROXY_COMPUTE, 2.0 );
|
||||||
init( MAX_COMPUTE_PER_OPERATION, 0.1 );
|
init( MAX_COMPUTE_PER_OPERATION, 0.1 );
|
||||||
|
init( MAX_COMPUTE_DURATION_LOG_CUTOFF, 0.05 );
|
||||||
init( PROXY_COMPUTE_BUCKETS, 20000 );
|
init( PROXY_COMPUTE_BUCKETS, 20000 );
|
||||||
init( PROXY_COMPUTE_GROWTH_RATE, 0.01 );
|
init( PROXY_COMPUTE_GROWTH_RATE, 0.01 );
|
||||||
init( TXN_STATE_SEND_AMOUNT, 4 );
|
init( TXN_STATE_SEND_AMOUNT, 4 );
|
||||||
|
@ -541,6 +542,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
||||||
init( CC_ENABLE_ENTIRE_SATELLITE_MONITORING, false );
|
init( CC_ENABLE_ENTIRE_SATELLITE_MONITORING, false );
|
||||||
init( CC_SATELLITE_DEGRADATION_MIN_COMPLAINER, 3 );
|
init( CC_SATELLITE_DEGRADATION_MIN_COMPLAINER, 3 );
|
||||||
init( CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER, 3 );
|
init( CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER, 3 );
|
||||||
|
init( CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL, 0.5 );
|
||||||
|
|
||||||
init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0;
|
init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0;
|
||||||
init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit );
|
init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit );
|
||||||
|
|
|
@ -374,6 +374,7 @@ public:
|
||||||
int MAX_COMMIT_UPDATES;
|
int MAX_COMMIT_UPDATES;
|
||||||
double MAX_PROXY_COMPUTE;
|
double MAX_PROXY_COMPUTE;
|
||||||
double MAX_COMPUTE_PER_OPERATION;
|
double MAX_COMPUTE_PER_OPERATION;
|
||||||
|
double MAX_COMPUTE_DURATION_LOG_CUTOFF;
|
||||||
int PROXY_COMPUTE_BUCKETS;
|
int PROXY_COMPUTE_BUCKETS;
|
||||||
double PROXY_COMPUTE_GROWTH_RATE;
|
double PROXY_COMPUTE_GROWTH_RATE;
|
||||||
int TXN_STATE_SEND_AMOUNT;
|
int TXN_STATE_SEND_AMOUNT;
|
||||||
|
@ -480,6 +481,8 @@ public:
|
||||||
// be determined as degraded worker.
|
// be determined as degraded worker.
|
||||||
int CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER; // The minimum amount of degraded server in satellite DC to be
|
int CC_SATELLITE_DEGRADATION_MIN_BAD_SERVER; // The minimum amount of degraded server in satellite DC to be
|
||||||
// determined as degraded satellite.
|
// determined as degraded satellite.
|
||||||
|
double CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL; // The interval to prevent re-recruiting the same singleton if a
|
||||||
|
// recruiting fight between two cluster controllers occurs.
|
||||||
|
|
||||||
// Knobs used to select the best policy (via monte carlo)
|
// Knobs used to select the best policy (via monte carlo)
|
||||||
int POLICY_RATING_TESTS; // number of tests per policy (in order to compare)
|
int POLICY_RATING_TESTS; // number of tests per policy (in order to compare)
|
||||||
|
|
|
@ -41,9 +41,15 @@ class SimpleConfigTransactionImpl {
|
||||||
if (self->dID.present()) {
|
if (self->dID.present()) {
|
||||||
TraceEvent("SimpleConfigTransactionGettingReadVersion", self->dID.get());
|
TraceEvent("SimpleConfigTransactionGettingReadVersion", self->dID.get());
|
||||||
}
|
}
|
||||||
ConfigTransactionGetGenerationRequest req;
|
state ConfigTransactionGetGenerationReply reply;
|
||||||
ConfigTransactionGetGenerationReply reply =
|
if (self->cti.hostname.present()) {
|
||||||
wait(retryBrokenPromise(self->cti.getGeneration, ConfigTransactionGetGenerationRequest{}));
|
wait(store(reply,
|
||||||
|
retryGetReplyFromHostname(ConfigTransactionGetGenerationRequest{},
|
||||||
|
self->cti.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGTXN_GETGENERATION)));
|
||||||
|
} else {
|
||||||
|
wait(store(reply, retryBrokenPromise(self->cti.getGeneration, ConfigTransactionGetGenerationRequest{})));
|
||||||
|
}
|
||||||
if (self->dID.present()) {
|
if (self->dID.present()) {
|
||||||
TraceEvent("SimpleConfigTransactionGotReadVersion", self->dID.get())
|
TraceEvent("SimpleConfigTransactionGotReadVersion", self->dID.get())
|
||||||
.detail("Version", reply.generation.liveVersion);
|
.detail("Version", reply.generation.liveVersion);
|
||||||
|
@ -62,8 +68,15 @@ class SimpleConfigTransactionImpl {
|
||||||
.detail("ConfigClass", configKey.configClass)
|
.detail("ConfigClass", configKey.configClass)
|
||||||
.detail("KnobName", configKey.knobName);
|
.detail("KnobName", configKey.knobName);
|
||||||
}
|
}
|
||||||
ConfigTransactionGetReply reply =
|
state ConfigTransactionGetReply reply;
|
||||||
wait(retryBrokenPromise(self->cti.get, ConfigTransactionGetRequest{ generation, configKey }));
|
if (self->cti.hostname.present()) {
|
||||||
|
wait(store(reply,
|
||||||
|
retryGetReplyFromHostname(ConfigTransactionGetRequest{ generation, configKey },
|
||||||
|
self->cti.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGTXN_GET)));
|
||||||
|
} else {
|
||||||
|
wait(store(reply, retryBrokenPromise(self->cti.get, ConfigTransactionGetRequest{ generation, configKey })));
|
||||||
|
}
|
||||||
if (self->dID.present()) {
|
if (self->dID.present()) {
|
||||||
TraceEvent("SimpleConfigTransactionGotValue", self->dID.get())
|
TraceEvent("SimpleConfigTransactionGotValue", self->dID.get())
|
||||||
.detail("Value", reply.value.get().toString());
|
.detail("Value", reply.value.get().toString());
|
||||||
|
@ -80,8 +93,17 @@ class SimpleConfigTransactionImpl {
|
||||||
self->getGenerationFuture = getGeneration(self);
|
self->getGenerationFuture = getGeneration(self);
|
||||||
}
|
}
|
||||||
ConfigGeneration generation = wait(self->getGenerationFuture);
|
ConfigGeneration generation = wait(self->getGenerationFuture);
|
||||||
ConfigTransactionGetConfigClassesReply reply =
|
state ConfigTransactionGetConfigClassesReply reply;
|
||||||
wait(retryBrokenPromise(self->cti.getClasses, ConfigTransactionGetConfigClassesRequest{ generation }));
|
if (self->cti.hostname.present()) {
|
||||||
|
wait(store(reply,
|
||||||
|
retryGetReplyFromHostname(ConfigTransactionGetConfigClassesRequest{ generation },
|
||||||
|
self->cti.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGTXN_GETCLASSES)));
|
||||||
|
} else {
|
||||||
|
wait(store(
|
||||||
|
reply,
|
||||||
|
retryBrokenPromise(self->cti.getClasses, ConfigTransactionGetConfigClassesRequest{ generation })));
|
||||||
|
}
|
||||||
RangeResult result;
|
RangeResult result;
|
||||||
for (const auto& configClass : reply.configClasses) {
|
for (const auto& configClass : reply.configClasses) {
|
||||||
result.push_back_deep(result.arena(), KeyValueRef(configClass, ""_sr));
|
result.push_back_deep(result.arena(), KeyValueRef(configClass, ""_sr));
|
||||||
|
@ -94,8 +116,17 @@ class SimpleConfigTransactionImpl {
|
||||||
self->getGenerationFuture = getGeneration(self);
|
self->getGenerationFuture = getGeneration(self);
|
||||||
}
|
}
|
||||||
ConfigGeneration generation = wait(self->getGenerationFuture);
|
ConfigGeneration generation = wait(self->getGenerationFuture);
|
||||||
ConfigTransactionGetKnobsReply reply =
|
state ConfigTransactionGetKnobsReply reply;
|
||||||
wait(retryBrokenPromise(self->cti.getKnobs, ConfigTransactionGetKnobsRequest{ generation, configClass }));
|
if (self->cti.hostname.present()) {
|
||||||
|
wait(store(reply,
|
||||||
|
retryGetReplyFromHostname(ConfigTransactionGetKnobsRequest{ generation, configClass },
|
||||||
|
self->cti.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGTXN_GETKNOBS)));
|
||||||
|
} else {
|
||||||
|
wait(store(
|
||||||
|
reply,
|
||||||
|
retryBrokenPromise(self->cti.getKnobs, ConfigTransactionGetKnobsRequest{ generation, configClass })));
|
||||||
|
}
|
||||||
RangeResult result;
|
RangeResult result;
|
||||||
for (const auto& knobName : reply.knobNames) {
|
for (const auto& knobName : reply.knobNames) {
|
||||||
result.push_back_deep(result.arena(), KeyValueRef(knobName, ""_sr));
|
result.push_back_deep(result.arena(), KeyValueRef(knobName, ""_sr));
|
||||||
|
@ -109,7 +140,11 @@ class SimpleConfigTransactionImpl {
|
||||||
}
|
}
|
||||||
wait(store(self->toCommit.generation, self->getGenerationFuture));
|
wait(store(self->toCommit.generation, self->getGenerationFuture));
|
||||||
self->toCommit.annotation.timestamp = now();
|
self->toCommit.annotation.timestamp = now();
|
||||||
wait(retryBrokenPromise(self->cti.commit, self->toCommit));
|
if (self->cti.hostname.present()) {
|
||||||
|
wait(retryGetReplyFromHostname(self->toCommit, self->cti.hostname.get(), WLTOKEN_CONFIGTXN_COMMIT));
|
||||||
|
} else {
|
||||||
|
wait(retryBrokenPromise(self->cti.commit, self->toCommit));
|
||||||
|
}
|
||||||
self->committed = true;
|
self->committed = true;
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
@ -126,9 +161,14 @@ class SimpleConfigTransactionImpl {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SimpleConfigTransactionImpl(Database const& cx) : cx(cx) {
|
SimpleConfigTransactionImpl(Database const& cx) : cx(cx) {
|
||||||
auto coordinators = cx->getConnectionRecord()->getConnectionString().coordinators();
|
const ClusterConnectionString& cs = cx->getConnectionRecord()->getConnectionString();
|
||||||
std::sort(coordinators.begin(), coordinators.end());
|
if (cs.coordinators().size()) {
|
||||||
cti = ConfigTransactionInterface(coordinators[0]);
|
std::vector<NetworkAddress> coordinators = cs.coordinators();
|
||||||
|
std::sort(coordinators.begin(), coordinators.end());
|
||||||
|
cti = ConfigTransactionInterface(coordinators[0]);
|
||||||
|
} else {
|
||||||
|
cti = ConfigTransactionInterface(cs.hostnames[0]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SimpleConfigTransactionImpl(ConfigTransactionInterface const& cti) : cti(cti) {}
|
SimpleConfigTransactionImpl(ConfigTransactionInterface const& cti) : cti(cti) {}
|
||||||
|
|
|
@ -1644,13 +1644,10 @@ void TracingOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key
|
||||||
|
|
||||||
CoordinatorsImpl::CoordinatorsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
|
CoordinatorsImpl::CoordinatorsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
|
||||||
|
|
||||||
Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
|
ACTOR Future<RangeResult> coordinatorsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRef prefix, KeyRangeRef kr) {
|
||||||
KeyRangeRef kr,
|
state ClusterConnectionString cs = ryw->getDatabase()->getConnectionRecord()->getConnectionString();
|
||||||
GetRangeLimits limitsHint) const {
|
state std::vector<NetworkAddress> coordinator_processes = wait(cs.tryResolveHostnames());
|
||||||
RangeResult result;
|
RangeResult result;
|
||||||
KeyRef prefix(getKeyRange().begin);
|
|
||||||
auto cs = ryw->getDatabase()->getConnectionRecord()->getConnectionString();
|
|
||||||
auto coordinator_processes = cs.coordinators();
|
|
||||||
Key cluster_decription_key = prefix.withSuffix(LiteralStringRef("cluster_description"));
|
Key cluster_decription_key = prefix.withSuffix(LiteralStringRef("cluster_description"));
|
||||||
if (kr.contains(cluster_decription_key)) {
|
if (kr.contains(cluster_decription_key)) {
|
||||||
result.push_back_deep(result.arena(), KeyValueRef(cluster_decription_key, cs.clusterKeyName()));
|
result.push_back_deep(result.arena(), KeyValueRef(cluster_decription_key, cs.clusterKeyName()));
|
||||||
|
@ -1673,10 +1670,16 @@ Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
|
||||||
return rywGetRange(ryw, kr, result);
|
return rywGetRange(ryw, kr, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
|
||||||
|
KeyRangeRef kr,
|
||||||
|
GetRangeLimits limitsHint) const {
|
||||||
|
KeyRef prefix(getKeyRange().begin);
|
||||||
|
return coordinatorsGetRangeActor(ryw, prefix, kr);
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
|
ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
|
||||||
state Reference<IQuorumChange> change;
|
state Reference<IQuorumChange> change;
|
||||||
state ClusterConnectionString
|
state ClusterConnectionString conn; // We don't care about the Key here.
|
||||||
conn; // We don't care about the Key here, it will be overrode in changeQuorumChecker().
|
|
||||||
state std::vector<std::string> process_address_or_hostname_strs;
|
state std::vector<std::string> process_address_or_hostname_strs;
|
||||||
state Optional<std::string> msg;
|
state Optional<std::string> msg;
|
||||||
state int index;
|
state int index;
|
||||||
|
@ -1700,7 +1703,6 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
|
||||||
try {
|
try {
|
||||||
if (Hostname::isHostname(process_address_or_hostname_strs[index])) {
|
if (Hostname::isHostname(process_address_or_hostname_strs[index])) {
|
||||||
conn.hostnames.push_back(Hostname::parse(process_address_or_hostname_strs[index]));
|
conn.hostnames.push_back(Hostname::parse(process_address_or_hostname_strs[index]));
|
||||||
conn.status = ClusterConnectionString::ConnectionStringStatus::UNRESOLVED;
|
|
||||||
} else {
|
} else {
|
||||||
NetworkAddress a = NetworkAddress::parse(process_address_or_hostname_strs[index]);
|
NetworkAddress a = NetworkAddress::parse(process_address_or_hostname_strs[index]);
|
||||||
if (!a.isValid()) {
|
if (!a.isValid()) {
|
||||||
|
@ -1717,18 +1719,19 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
|
||||||
if (parse_error) {
|
if (parse_error) {
|
||||||
std::string error = "ERROR: \'" + process_address_or_hostname_strs[index] +
|
std::string error = "ERROR: \'" + process_address_or_hostname_strs[index] +
|
||||||
"\' is not a valid network endpoint address\n";
|
"\' is not a valid network endpoint address\n";
|
||||||
if (process_address_or_hostname_strs[index].find(":tls") != std::string::npos)
|
|
||||||
error += " Do not include the `:tls' suffix when naming a process\n";
|
|
||||||
return ManagementAPIError::toJsonString(false, "coordinators", error);
|
return ManagementAPIError::toJsonString(false, "coordinators", error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wait(conn.resolveHostnames());
|
std::vector<NetworkAddress> addressesVec = wait(conn.tryResolveHostnames());
|
||||||
if (conn.coordinators().size())
|
if (addressesVec.size() != conn.hostnames.size() + conn.coordinators().size()) {
|
||||||
change = specifiedQuorumChange(conn.coordinators());
|
return ManagementAPIError::toJsonString(false, "coordinators", "One or more hostnames are not resolvable.");
|
||||||
else
|
} else if (addressesVec.size()) {
|
||||||
|
change = specifiedQuorumChange(addressesVec);
|
||||||
|
} else {
|
||||||
change = noQuorumChange();
|
change = noQuorumChange();
|
||||||
|
}
|
||||||
|
|
||||||
// check update for cluster_description
|
// check update for cluster_description
|
||||||
Key cluster_decription_key = LiteralStringRef("cluster_description").withPrefix(kr.begin);
|
Key cluster_decription_key = LiteralStringRef("cluster_description").withPrefix(kr.begin);
|
||||||
|
@ -1740,19 +1743,18 @@ ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWrite
|
||||||
change = nameQuorumChange(entry.second.get().toString(), change);
|
change = nameQuorumChange(entry.second.get().toString(), change);
|
||||||
} else {
|
} else {
|
||||||
// throw the error
|
// throw the error
|
||||||
return Optional<std::string>(ManagementAPIError::toJsonString(
|
return ManagementAPIError::toJsonString(
|
||||||
false, "coordinators", "Cluster description must match [A-Za-z0-9_]+"));
|
false, "coordinators", "Cluster description must match [A-Za-z0-9_]+");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(change.isValid());
|
ASSERT(change.isValid());
|
||||||
|
|
||||||
TraceEvent(SevDebug, "SKSChangeCoordinatorsStart")
|
TraceEvent(SevDebug, "SKSChangeCoordinatorsStart")
|
||||||
.detail("NewHostnames", conn.hostnames.size() ? describe(conn.hostnames) : "N/A")
|
.detail("NewAddresses", describe(addressesVec))
|
||||||
.detail("NewAddresses", describe(conn.coordinators()))
|
|
||||||
.detail("Description", entry.first ? entry.second.get().toString() : "");
|
.detail("Description", entry.first ? entry.second.get().toString() : "");
|
||||||
|
|
||||||
Optional<CoordinatorsResult> r = wait(changeQuorumChecker(&ryw->getTransaction(), change, &conn));
|
Optional<CoordinatorsResult> r = wait(changeQuorumChecker(&ryw->getTransaction(), change, addressesVec));
|
||||||
|
|
||||||
TraceEvent(SevDebug, "SKSChangeCoordinatorsFinish")
|
TraceEvent(SevDebug, "SKSChangeCoordinatorsFinish")
|
||||||
.detail("Result", r.present() ? static_cast<int>(r.get()) : -1); // -1 means success
|
.detail("Result", r.present() ? static_cast<int>(r.get()) : -1); // -1 means success
|
||||||
|
@ -1804,9 +1806,10 @@ ACTOR static Future<RangeResult> CoordinatorsAutoImplActor(ReadYourWritesTransac
|
||||||
state ClusterConnectionString old(currentKey.get().toString());
|
state ClusterConnectionString old(currentKey.get().toString());
|
||||||
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
|
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
|
||||||
|
|
||||||
|
std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
|
||||||
std::vector<NetworkAddress> _desiredCoordinators = wait(autoQuorumChange()->getDesiredCoordinators(
|
std::vector<NetworkAddress> _desiredCoordinators = wait(autoQuorumChange()->getDesiredCoordinators(
|
||||||
&tr,
|
&tr,
|
||||||
old.coordinators(),
|
oldCoordinators,
|
||||||
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
|
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
|
||||||
result));
|
result));
|
||||||
|
|
||||||
|
|
|
@ -307,23 +307,35 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<I
|
||||||
bool* quorum_reachable,
|
bool* quorum_reachable,
|
||||||
int* coordinatorsFaultTolerance) {
|
int* coordinatorsFaultTolerance) {
|
||||||
try {
|
try {
|
||||||
wait(connRecord->resolveHostnames());
|
|
||||||
state ClientCoordinators coord(connRecord);
|
state ClientCoordinators coord(connRecord);
|
||||||
state StatusObject statusObj;
|
state StatusObject statusObj;
|
||||||
|
|
||||||
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
state std::vector<Future<Optional<LeaderInfo>>> leaderServers;
|
||||||
leaderServers.reserve(coord.clientLeaderServers.size());
|
leaderServers.reserve(coord.clientLeaderServers.size());
|
||||||
for (int i = 0; i < coord.clientLeaderServers.size(); i++)
|
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
|
||||||
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
|
if (coord.clientLeaderServers[i].hostname.present()) {
|
||||||
GetLeaderRequest(coord.clusterKey, UID()),
|
leaderServers.push_back(retryGetReplyFromHostname(GetLeaderRequest(coord.clusterKey, UID()),
|
||||||
TaskPriority::CoordinationReply));
|
coord.clientLeaderServers[i].hostname.get(),
|
||||||
|
WLTOKEN_CLIENTLEADERREG_GETLEADER,
|
||||||
|
TaskPriority::CoordinationReply));
|
||||||
|
} else {
|
||||||
|
leaderServers.push_back(retryBrokenPromise(coord.clientLeaderServers[i].getLeader,
|
||||||
|
GetLeaderRequest(coord.clusterKey, UID()),
|
||||||
|
TaskPriority::CoordinationReply));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
state std::vector<Future<ProtocolInfoReply>> coordProtocols;
|
state std::vector<Future<ProtocolInfoReply>> coordProtocols;
|
||||||
coordProtocols.reserve(coord.clientLeaderServers.size());
|
coordProtocols.reserve(coord.clientLeaderServers.size());
|
||||||
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
|
for (int i = 0; i < coord.clientLeaderServers.size(); i++) {
|
||||||
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint::wellKnown(
|
if (coord.clientLeaderServers[i].hostname.present()) {
|
||||||
{ coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO) };
|
coordProtocols.push_back(retryGetReplyFromHostname(
|
||||||
coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
|
ProtocolInfoRequest{}, coord.clientLeaderServers[i].hostname.get(), WLTOKEN_PROTOCOL_INFO));
|
||||||
|
} else {
|
||||||
|
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint::wellKnown(
|
||||||
|
{ coord.clientLeaderServers[i].getLeader.getEndpoint().addresses }, WLTOKEN_PROTOCOL_INFO) };
|
||||||
|
coordProtocols.push_back(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wait(smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) &&
|
wait(smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.5) &&
|
||||||
|
@ -337,8 +349,12 @@ ACTOR Future<Optional<StatusObject>> clientCoordinatorsStatusFetcher(Reference<I
|
||||||
int coordinatorsUnavailable = 0;
|
int coordinatorsUnavailable = 0;
|
||||||
for (int i = 0; i < leaderServers.size(); i++) {
|
for (int i = 0; i < leaderServers.size(); i++) {
|
||||||
StatusObject coordStatus;
|
StatusObject coordStatus;
|
||||||
coordStatus["address"] =
|
if (coord.clientLeaderServers[i].hostname.present()) {
|
||||||
coord.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress().toString();
|
coordStatus["address"] = coord.clientLeaderServers[i].hostname.get().toString();
|
||||||
|
} else {
|
||||||
|
coordStatus["address"] =
|
||||||
|
coord.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress().toString();
|
||||||
|
}
|
||||||
|
|
||||||
if (leaderServers[i].isReady()) {
|
if (leaderServers[i].isReady()) {
|
||||||
coordStatus["reachable"] = true;
|
coordStatus["reachable"] = true;
|
||||||
|
|
|
@ -48,6 +48,8 @@ struct TenantMapEntry {
|
||||||
int64_t id;
|
int64_t id;
|
||||||
Key prefix;
|
Key prefix;
|
||||||
|
|
||||||
|
constexpr static int ROOT_PREFIX_SIZE = sizeof(id);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void initPrefix(KeyRef subspace) {
|
void initPrefix(KeyRef subspace) {
|
||||||
ASSERT(id >= 0);
|
ASSERT(id >= 0);
|
||||||
|
|
|
@ -24,6 +24,9 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined(HAVE_OPENSSL) && !defined(TLS_DISABLED)
|
#if defined(HAVE_OPENSSL) && !defined(TLS_DISABLED)
|
||||||
|
#if defined(HAVE_WOLFSSL)
|
||||||
|
#include <wolfssl/options.h>
|
||||||
|
#endif
|
||||||
#include <openssl/md5.h>
|
#include <openssl/md5.h>
|
||||||
#elif !defined(_MD5_H)
|
#elif !defined(_MD5_H)
|
||||||
#define _MD5_H
|
#define _MD5_H
|
||||||
|
|
|
@ -15,6 +15,7 @@ set(FDBRPC_SRCS
|
||||||
genericactors.actor.h
|
genericactors.actor.h
|
||||||
genericactors.actor.cpp
|
genericactors.actor.cpp
|
||||||
HealthMonitor.actor.cpp
|
HealthMonitor.actor.cpp
|
||||||
|
HTTP.actor.cpp
|
||||||
IAsyncFile.actor.cpp
|
IAsyncFile.actor.cpp
|
||||||
IPAllowList.cpp
|
IPAllowList.cpp
|
||||||
LoadBalance.actor.cpp
|
LoadBalance.actor.cpp
|
||||||
|
@ -28,6 +29,10 @@ set(FDBRPC_SRCS
|
||||||
ReplicationPolicy.cpp
|
ReplicationPolicy.cpp
|
||||||
ReplicationTypes.cpp
|
ReplicationTypes.cpp
|
||||||
ReplicationUtils.cpp
|
ReplicationUtils.cpp
|
||||||
|
RESTClient.h
|
||||||
|
RESTClient.actor.cpp
|
||||||
|
RESTUtils.h
|
||||||
|
RESTUtils.actor.cpp
|
||||||
SimExternalConnection.actor.cpp
|
SimExternalConnection.actor.cpp
|
||||||
SimExternalConnection.h
|
SimExternalConnection.h
|
||||||
Stats.actor.cpp
|
Stats.actor.cpp
|
||||||
|
|
|
@ -18,10 +18,12 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "fdbclient/HTTP.h"
|
#include "fdbrpc/HTTP.h"
|
||||||
|
|
||||||
#include "fdbclient/md5/md5.h"
|
#include "fdbclient/md5/md5.h"
|
||||||
#include "fdbclient/libb64/encode.h"
|
#include "fdbclient/libb64/encode.h"
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
|
|
||||||
#include "flow/actorcompiler.h" // has to be last include
|
#include "flow/actorcompiler.h" // has to be last include
|
||||||
|
|
||||||
namespace HTTP {
|
namespace HTTP {
|
|
@ -18,6 +18,11 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifndef FDBRPC_HTTP_H
|
||||||
|
#define FDBRPC_HTTP_H
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
#include "flow/flow.h"
|
#include "flow/flow.h"
|
||||||
#include "flow/Net2Packet.h"
|
#include "flow/Net2Packet.h"
|
||||||
#include "fdbrpc/IRateControl.h"
|
#include "fdbrpc/IRateControl.h"
|
||||||
|
@ -63,4 +68,27 @@ Future<Reference<Response>> doRequest(Reference<IConnection> const& conn,
|
||||||
int64_t* const& pSent,
|
int64_t* const& pSent,
|
||||||
Reference<IRateControl> const& recvRate,
|
Reference<IRateControl> const& recvRate,
|
||||||
const std::string& requestHeader = std::string());
|
const std::string& requestHeader = std::string());
|
||||||
|
|
||||||
|
constexpr int HTTP_STATUS_CODE_OK = 200;
|
||||||
|
constexpr int HTTP_STATUS_CODE_CREATED = 201;
|
||||||
|
constexpr int HTTP_STATUS_CODE_ACCEPTED = 202;
|
||||||
|
constexpr int HTTP_STATUS_CODE_NO_CONTENT = 204;
|
||||||
|
constexpr int HTTP_STATUS_CODE_UNAUTHORIZED = 401;
|
||||||
|
constexpr int HTTP_STATUS_CODE_NOT_ACCEPTABLE = 406;
|
||||||
|
constexpr int HTTP_STATUS_CODE_TOO_MANY_REQUESTS = 429;
|
||||||
|
constexpr int HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR = 500;
|
||||||
|
constexpr int HTTP_STATUS_CODE_BAD_GATEWAY = 502;
|
||||||
|
constexpr int HTTP_STATUS_CODE_SERVICE_UNAVAILABLE = 503;
|
||||||
|
|
||||||
|
constexpr int HTTP_RETRYAFTER_DELAY_SECS = 300;
|
||||||
|
|
||||||
|
const std::string HTTP_VERB_GET = "GET";
|
||||||
|
const std::string HTTP_VERB_HEAD = "HEAD";
|
||||||
|
const std::string HTTP_VERB_DELETE = "DELETE";
|
||||||
|
const std::string HTTP_VERB_TRACE = "TRACE";
|
||||||
|
const std::string HTTP_VERB_PUT = "PUT";
|
||||||
|
const std::string HTTP_VERB_POST = "POST";
|
||||||
|
|
||||||
} // namespace HTTP
|
} // namespace HTTP
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,363 @@
|
||||||
|
/*
|
||||||
|
* RESTClient.actor.cpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "fdbrpc/RESTClient.h"
|
||||||
|
|
||||||
|
#include "fdbrpc/HTTP.h"
|
||||||
|
#include "fdbrpc/IRateControl.h"
|
||||||
|
#include "fdbrpc/RESTUtils.h"
|
||||||
|
#include "flow/Arena.h"
|
||||||
|
#include "flow/Error.h"
|
||||||
|
#include "flow/FastRef.h"
|
||||||
|
#include "flow/Knobs.h"
|
||||||
|
#include "flow/Net2Packet.h"
|
||||||
|
#include "flow/flow.h"
|
||||||
|
#include "flow/network.h"
|
||||||
|
#include "flow/serialize.h"
|
||||||
|
#include "flow/Trace.h"
|
||||||
|
#include "flow/UnitTest.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "flow/actorcompiler.h" // always the last include
|
||||||
|
|
||||||
|
json_spirit::mObject RESTClient::Stats::getJSON() {
|
||||||
|
json_spirit::mObject o;
|
||||||
|
|
||||||
|
o["host_service"] = host_service;
|
||||||
|
o["requests_failed"] = requests_failed;
|
||||||
|
o["requests_successful"] = requests_successful;
|
||||||
|
o["bytes_sent"] = bytes_sent;
|
||||||
|
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
RESTClient::Stats RESTClient::Stats::operator-(const Stats& rhs) {
|
||||||
|
Stats r(host_service);
|
||||||
|
|
||||||
|
r.requests_failed = requests_failed - rhs.requests_failed;
|
||||||
|
r.requests_successful = requests_successful - rhs.requests_successful;
|
||||||
|
r.bytes_sent = bytes_sent - rhs.bytes_sent;
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
RESTClient::RESTClient() {}
|
||||||
|
|
||||||
|
RESTClient::RESTClient(std::unordered_map<std::string, int>& knobSettings) {
|
||||||
|
knobs.set(knobSettings);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RESTClient::setKnobs(const std::unordered_map<std::string, int>& knobSettings) {
|
||||||
|
knobs.set(knobSettings);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_map<std::string, int> RESTClient::getKnobs() const {
|
||||||
|
return knobs.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Reference<HTTP::Response>> doRequest_impl(Reference<RESTClient> client,
|
||||||
|
std::string verb,
|
||||||
|
HTTP::Headers headers,
|
||||||
|
RESTUrl* url,
|
||||||
|
std::set<unsigned int> successCodes) {
|
||||||
|
state UnsentPacketQueue content;
|
||||||
|
state int contentLen = url->body.size();
|
||||||
|
|
||||||
|
if (url->body.size() > 0) {
|
||||||
|
PacketWriter pw(content.getWriteBuffer(url->body.size()), nullptr, Unversioned());
|
||||||
|
pw.serializeBytes(url->body);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string statsKey = RESTClient::getStatsKey(url->service, url->service);
|
||||||
|
auto sItr = client->statsMap.find(statsKey);
|
||||||
|
if (sItr == client->statsMap.end()) {
|
||||||
|
client->statsMap.emplace(statsKey, std::make_unique<RESTClient::Stats>(statsKey));
|
||||||
|
}
|
||||||
|
|
||||||
|
headers["Content-Length"] = format("%d", contentLen);
|
||||||
|
headers["Host"] = url->host;
|
||||||
|
|
||||||
|
state int maxTries = std::min(client->knobs.request_tries, client->knobs.connect_tries);
|
||||||
|
state int thisTry = 1;
|
||||||
|
state double nextRetryDelay = 2.0;
|
||||||
|
state Reference<IRateControl> sendReceiveRate = makeReference<Unlimited>();
|
||||||
|
state double reqTimeout = (client->knobs.request_timeout_secs * 1.0) / 60;
|
||||||
|
state RESTConnectionPoolKey connectPoolKey = RESTConnectionPool::getConnectionPoolKey(url->host, url->service);
|
||||||
|
state RESTClient::Stats* statsPtr = client->statsMap[statsKey].get();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
state Optional<Error> err;
|
||||||
|
state Optional<NetworkAddress> remoteAddress;
|
||||||
|
state bool connectionEstablished = false;
|
||||||
|
state Reference<HTTP::Response> r;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Start connecting
|
||||||
|
Future<RESTConnectionPool::ReusableConnection> frconn = client->conectionPool->connect(
|
||||||
|
connectPoolKey, client->knobs.secure_connection, client->knobs.max_connection_life);
|
||||||
|
|
||||||
|
// Finish connecting, do request
|
||||||
|
state RESTConnectionPool::ReusableConnection rconn =
|
||||||
|
wait(timeoutError(frconn, client->knobs.connect_timeout));
|
||||||
|
connectionEstablished = true;
|
||||||
|
|
||||||
|
remoteAddress = rconn.conn->getPeerAddress();
|
||||||
|
Reference<HTTP::Response> _r = wait(timeoutError(HTTP::doRequest(rconn.conn,
|
||||||
|
verb,
|
||||||
|
url->resource,
|
||||||
|
headers,
|
||||||
|
contentLen > 0 ? &content : nullptr,
|
||||||
|
contentLen,
|
||||||
|
sendReceiveRate,
|
||||||
|
&statsPtr->bytes_sent,
|
||||||
|
sendReceiveRate),
|
||||||
|
reqTimeout));
|
||||||
|
r = _r;
|
||||||
|
|
||||||
|
// Since the response was parsed successfully (which is why we are here) reuse the connection unless we
|
||||||
|
// received the "Connection: close" header.
|
||||||
|
if (r->headers["Connection"] != "close") {
|
||||||
|
client->conectionPool->returnConnection(connectPoolKey, rconn, client->knobs.connection_pool_size);
|
||||||
|
}
|
||||||
|
rconn.conn.clear();
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() == error_code_actor_cancelled) {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
err = e;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If err is not present then r is valid.
|
||||||
|
// If r->code is in successCodes then record the successful request and return r.
|
||||||
|
if (!err.present() && successCodes.count(r->code) != 0) {
|
||||||
|
statsPtr->requests_successful++;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, this request is considered failed. Update failure count.
|
||||||
|
statsPtr->requests_failed++;
|
||||||
|
|
||||||
|
// All errors in err are potentially retryable as well as certain HTTP response codes...
|
||||||
|
bool retryable = err.present() || r->code == HTTP::HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR ||
|
||||||
|
r->code == HTTP::HTTP_STATUS_CODE_BAD_GATEWAY ||
|
||||||
|
r->code == HTTP::HTTP_STATUS_CODE_SERVICE_UNAVAILABLE ||
|
||||||
|
r->code == HTTP::HTTP_STATUS_CODE_TOO_MANY_REQUESTS;
|
||||||
|
|
||||||
|
// But only if our previous attempt was not the last allowable try.
|
||||||
|
retryable = retryable && (thisTry < maxTries);
|
||||||
|
|
||||||
|
TraceEvent event(SevWarn, retryable ? "RESTClient_FailedRetryable" : "RESTClient_RequestFailed");
|
||||||
|
|
||||||
|
// Attach err to trace event if present, otherwise extract some stuff from the response
|
||||||
|
if (err.present()) {
|
||||||
|
event.errorUnsuppressed(err.get());
|
||||||
|
}
|
||||||
|
event.suppressFor(60);
|
||||||
|
if (!err.present()) {
|
||||||
|
event.detail("ResponseCode", r->code);
|
||||||
|
}
|
||||||
|
|
||||||
|
event.detail("ConnectionEstablished", connectionEstablished);
|
||||||
|
|
||||||
|
if (remoteAddress.present())
|
||||||
|
event.detail("RemoteEndpoint", remoteAddress.get());
|
||||||
|
else
|
||||||
|
event.detail("RemoteHost", url->host);
|
||||||
|
|
||||||
|
event.detail("Verb", verb).detail("Resource", url->resource).detail("ThisTry", thisTry);
|
||||||
|
|
||||||
|
// If r is not valid or not code TOO_MANY_REQUESTS then increment the try count.
|
||||||
|
// TOO_MANY_REQUEST's will not count against the attempt limit.
|
||||||
|
if (!r || r->code != HTTP::HTTP_STATUS_CODE_TOO_MANY_REQUESTS) {
|
||||||
|
++thisTry;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We will wait delay seconds before the next retry, start with nextRetryDelay.
|
||||||
|
double delay = nextRetryDelay;
|
||||||
|
// Double but limit the *next* nextRetryDelay.
|
||||||
|
nextRetryDelay = std::min(nextRetryDelay * 2, 60.0);
|
||||||
|
|
||||||
|
if (retryable) {
|
||||||
|
// If r is valid then obey the Retry-After response header if present.
|
||||||
|
if (r) {
|
||||||
|
auto iRetryAfter = r->headers.find("Retry-After");
|
||||||
|
if (iRetryAfter != r->headers.end()) {
|
||||||
|
event.detail("RetryAfterHeader", iRetryAfter->second);
|
||||||
|
char* pEnd;
|
||||||
|
double retryAfter = strtod(iRetryAfter->second.c_str(), &pEnd);
|
||||||
|
if (*pEnd) {
|
||||||
|
// If there were other characters then don't trust the parsed value
|
||||||
|
retryAfter = HTTP::HTTP_RETRYAFTER_DELAY_SECS;
|
||||||
|
}
|
||||||
|
// Update delay
|
||||||
|
delay = std::max(delay, retryAfter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log the delay then wait.
|
||||||
|
event.detail("RetryDelay", delay);
|
||||||
|
wait(::delay(delay));
|
||||||
|
} else {
|
||||||
|
// We can't retry, so throw something.
|
||||||
|
|
||||||
|
// This error code means the authentication header was not accepted, likely the account or key is wrong.
|
||||||
|
if (r && r->code == HTTP::HTTP_STATUS_CODE_NOT_ACCEPTABLE) {
|
||||||
|
throw http_not_accepted();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r && r->code == HTTP::HTTP_STATUS_CODE_UNAUTHORIZED) {
|
||||||
|
throw http_auth_failed();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recognize and throw specific errors
|
||||||
|
if (err.present()) {
|
||||||
|
int code = err.get().code();
|
||||||
|
|
||||||
|
// If we get a timed_out error during the the connect() phase, we'll call that connection_failed despite
|
||||||
|
// the fact that there was technically never a 'connection' to begin with. It differentiates between an
|
||||||
|
// active connection timing out vs a connection timing out, though not between an active connection
|
||||||
|
// failing vs connection attempt failing.
|
||||||
|
// TODO: Add more error types?
|
||||||
|
if (code == error_code_timed_out && !connectionEstablished) {
|
||||||
|
throw connection_failed();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code == error_code_timed_out || code == error_code_connection_failed ||
|
||||||
|
code == error_code_lookup_failed) {
|
||||||
|
throw err.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw http_request_failed();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Reference<HTTP::Response>> RESTClient::doPutOrPost(const std::string& verb,
|
||||||
|
Optional<HTTP::Headers> optHeaders,
|
||||||
|
RESTUrl* url,
|
||||||
|
std::set<unsigned int> successCodes) {
|
||||||
|
HTTP::Headers headers;
|
||||||
|
if (optHeaders.present()) {
|
||||||
|
headers = optHeaders.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
return doRequest_impl(Reference<RESTClient>::addRef(this), verb, headers, url, successCodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Reference<HTTP::Response>> RESTClient::doPost(const std::string& fullUrl,
|
||||||
|
const std::string& requestBody,
|
||||||
|
Optional<HTTP::Headers> optHeaders) {
|
||||||
|
RESTUrl url(fullUrl, requestBody, knobs.secure_connection);
|
||||||
|
return doPutOrPost(HTTP::HTTP_VERB_POST, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Reference<HTTP::Response>> RESTClient::doPut(const std::string& fullUrl,
|
||||||
|
const std::string& requestBody,
|
||||||
|
Optional<HTTP::Headers> optHeaders) {
|
||||||
|
RESTUrl url(fullUrl, requestBody, knobs.secure_connection);
|
||||||
|
return doPutOrPost(
|
||||||
|
HTTP::HTTP_VERB_PUT,
|
||||||
|
optHeaders,
|
||||||
|
std::addressof(url),
|
||||||
|
// 201 - on successful resource create
|
||||||
|
// 200 / 204 - if target resource representation was successfully modified with the desired state
|
||||||
|
{ HTTP::HTTP_STATUS_CODE_OK, HTTP::HTTP_STATUS_CODE_CREATED, HTTP::HTTP_STATUS_CODE_NO_CONTENT });
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Reference<HTTP::Response>> RESTClient::doGetHeadDeleteOrTrace(const std::string& verb,
|
||||||
|
Optional<HTTP::Headers> optHeaders,
|
||||||
|
RESTUrl* url,
|
||||||
|
std::set<unsigned int> successCodes) {
|
||||||
|
HTTP::Headers headers;
|
||||||
|
if (optHeaders.present()) {
|
||||||
|
headers = optHeaders.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
return doRequest_impl(Reference<RESTClient>::addRef(this), HTTP::HTTP_VERB_GET, headers, url, successCodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Reference<HTTP::Response>> RESTClient::doGet(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
|
||||||
|
RESTUrl url(fullUrl, knobs.secure_connection);
|
||||||
|
return doGetHeadDeleteOrTrace(HTTP::HTTP_VERB_GET, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Reference<HTTP::Response>> RESTClient::doHead(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
|
||||||
|
RESTUrl url(fullUrl, knobs.secure_connection);
|
||||||
|
return doGetHeadDeleteOrTrace(HTTP::HTTP_VERB_HEAD, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Reference<HTTP::Response>> RESTClient::doDelete(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
|
||||||
|
RESTUrl url(fullUrl, knobs.secure_connection);
|
||||||
|
return doGetHeadDeleteOrTrace(
|
||||||
|
HTTP::HTTP_VERB_DELETE,
|
||||||
|
optHeaders,
|
||||||
|
std::addressof(url),
|
||||||
|
// 200 - action has been enacted.
|
||||||
|
// 202 - action will likely succeed, but, has not yet been enacted.
|
||||||
|
// 204 - action has been enated, no further information is to supplied.
|
||||||
|
{ HTTP::HTTP_STATUS_CODE_OK, HTTP::HTTP_STATUS_CODE_NO_CONTENT, HTTP::HTTP_STATUS_CODE_ACCEPTED });
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<Reference<HTTP::Response>> RESTClient::doTrace(const std::string& fullUrl, Optional<HTTP::Headers> optHeaders) {
|
||||||
|
RESTUrl url(fullUrl, knobs.secure_connection);
|
||||||
|
return doGetHeadDeleteOrTrace(
|
||||||
|
HTTP::HTTP_VERB_TRACE, optHeaders, std::addressof(url), { HTTP::HTTP_STATUS_CODE_OK });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only used to link unit tests
|
||||||
|
void forceLinkRESTClientTests() {}
|
||||||
|
|
||||||
|
TEST_CASE("fdbrpc/RESTClient") {
|
||||||
|
RESTClient r;
|
||||||
|
std::unordered_map<std::string, int> knobs = r.getKnobs();
|
||||||
|
ASSERT_EQ(knobs["secure_connection"], RESTClientKnobs::SECURE_CONNECTION);
|
||||||
|
ASSERT_EQ(knobs["connection_pool_size"], FLOW_KNOBS->RESTCLIENT_MAX_CONNECTIONPOOL_SIZE);
|
||||||
|
ASSERT_EQ(knobs["connect_tries"], FLOW_KNOBS->RESTCLIENT_CONNECT_TRIES);
|
||||||
|
ASSERT_EQ(knobs["connect_timeout"], FLOW_KNOBS->RESTCLIENT_CONNECT_TIMEOUT);
|
||||||
|
ASSERT_EQ(knobs["max_connection_life"], FLOW_KNOBS->RESTCLIENT_MAX_CONNECTION_LIFE);
|
||||||
|
ASSERT_EQ(knobs["request_tries"], FLOW_KNOBS->RESTCLIENT_REQUEST_TRIES);
|
||||||
|
ASSERT_EQ(knobs["request_timeout_secs"], FLOW_KNOBS->RESTCLIENT_REQUEST_TIMEOUT_SEC);
|
||||||
|
|
||||||
|
for (auto& itr : knobs) {
|
||||||
|
itr.second++;
|
||||||
|
}
|
||||||
|
r.setKnobs(knobs);
|
||||||
|
|
||||||
|
std::unordered_map<std::string, int> updated = r.getKnobs();
|
||||||
|
for (auto& itr : updated) {
|
||||||
|
ASSERT_EQ(knobs[itr.first], itr.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
// invalid client knob
|
||||||
|
knobs["foo"] = 100;
|
||||||
|
try {
|
||||||
|
r.setKnobs(knobs);
|
||||||
|
ASSERT(false);
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() != error_code_rest_invalid_rest_client_knob) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Void();
|
||||||
|
}
|
|
@ -0,0 +1,97 @@
|
||||||
|
/*
|
||||||
|
* RESTClient.h
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef FDBRPC_RESTCLIENT_H
|
||||||
|
#define FDBRPC_RESTCLIENT_H
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "fdbclient/JSONDoc.h"
|
||||||
|
#include "fdbrpc/HTTP.h"
|
||||||
|
#include "fdbrpc/RESTUtils.h"
|
||||||
|
#include "flow/Arena.h"
|
||||||
|
#include "flow/FastRef.h"
|
||||||
|
#include "flow/flow.h"
|
||||||
|
#include "flow/Net2Packet.h"
|
||||||
|
|
||||||
|
// This interface enables sending REST HTTP requests and receiving REST HTTP responses from a resource identified by a
|
||||||
|
// URI.
|
||||||
|
|
||||||
|
class RESTClient : public ReferenceCounted<RESTClient> {
|
||||||
|
public:
|
||||||
|
struct Stats {
|
||||||
|
explicit Stats(const std::string& hService)
|
||||||
|
: host_service(hService), requests_successful(0), requests_failed(0), bytes_sent(0) {}
|
||||||
|
Stats operator-(const Stats& rhs);
|
||||||
|
void clear() { requests_failed = requests_successful = bytes_sent = 0; }
|
||||||
|
json_spirit::mObject getJSON();
|
||||||
|
|
||||||
|
std::string host_service;
|
||||||
|
int64_t requests_successful;
|
||||||
|
int64_t requests_failed;
|
||||||
|
int64_t bytes_sent;
|
||||||
|
};
|
||||||
|
|
||||||
|
RESTClientKnobs knobs;
|
||||||
|
Reference<RESTConnectionPool> conectionPool;
|
||||||
|
// Connection stats maintained per "host:service"
|
||||||
|
std::unordered_map<std::string, std::unique_ptr<Stats>> statsMap;
|
||||||
|
|
||||||
|
RESTClient();
|
||||||
|
explicit RESTClient(std::unordered_map<std::string, int>& params);
|
||||||
|
|
||||||
|
void setKnobs(const std::unordered_map<std::string, int>& knobSettings);
|
||||||
|
std::unordered_map<std::string, int> getKnobs() const;
|
||||||
|
|
||||||
|
// Supports common REST APIs.
|
||||||
|
// On invocation of below methods, input 'fullUrl' is parsed using RESTUrl interface,
|
||||||
|
// RESTConnectionPool is used to leverage cached connection if any for 'host:service' pair. API then leverage
|
||||||
|
// HTTP::doRequest to accomplish the specified operation
|
||||||
|
|
||||||
|
Future<Reference<HTTP::Response>> doGet(const std::string& fullUrl,
|
||||||
|
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
|
||||||
|
Future<Reference<HTTP::Response>> doHead(const std::string& fullUrl,
|
||||||
|
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
|
||||||
|
Future<Reference<HTTP::Response>> doDelete(const std::string& fullUrl,
|
||||||
|
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
|
||||||
|
Future<Reference<HTTP::Response>> doTrace(const std::string& fullUrl,
|
||||||
|
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
|
||||||
|
Future<Reference<HTTP::Response>> doPut(const std::string& fullUrl,
|
||||||
|
const std::string& requestBody,
|
||||||
|
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
|
||||||
|
Future<Reference<HTTP::Response>> doPost(const std::string& fullUrl,
|
||||||
|
const std::string& requestBody,
|
||||||
|
Optional<HTTP::Headers> optHeaders = Optional<HTTP::Headers>());
|
||||||
|
|
||||||
|
static std::string getStatsKey(const std::string& host, const std::string& service) { return host + ":" + service; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
Future<Reference<HTTP::Response>> doGetHeadDeleteOrTrace(const std::string& verb,
|
||||||
|
Optional<HTTP::Headers> optHeaders,
|
||||||
|
RESTUrl* url,
|
||||||
|
std::set<unsigned int> successCodes);
|
||||||
|
Future<Reference<HTTP::Response>> doPutOrPost(const std::string& verb,
|
||||||
|
Optional<HTTP::Headers> headers,
|
||||||
|
RESTUrl* url,
|
||||||
|
std::set<unsigned int> successCodes);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,276 @@
|
||||||
|
/*
|
||||||
|
* RESTUtils.actor.cpp
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "fdbrpc/RESTUtils.h"
|
||||||
|
|
||||||
|
#include "flow/flat_buffers.h"
|
||||||
|
#include "flow/UnitTest.h"
|
||||||
|
|
||||||
|
#include <boost/algorithm/string.hpp>
|
||||||
|
|
||||||
|
#include "flow/actorcompiler.h" // always the last include
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
std::unordered_set<std::string> protocols = { "http", "https" };
|
||||||
|
|
||||||
|
bool isProtocolSupported(const std::string& protocol) {
|
||||||
|
return protocols.find(protocol) != protocols.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isSecurePrototol(const std::string& protocol) {
|
||||||
|
return protocol.compare("https") == 0;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
RESTClientKnobs::RESTClientKnobs() {
|
||||||
|
secure_connection = RESTClientKnobs::SECURE_CONNECTION;
|
||||||
|
connection_pool_size = FLOW_KNOBS->RESTCLIENT_MAX_CONNECTIONPOOL_SIZE;
|
||||||
|
connect_tries = FLOW_KNOBS->RESTCLIENT_CONNECT_TRIES;
|
||||||
|
connect_timeout = FLOW_KNOBS->RESTCLIENT_CONNECT_TIMEOUT;
|
||||||
|
max_connection_life = FLOW_KNOBS->RESTCLIENT_MAX_CONNECTION_LIFE;
|
||||||
|
request_tries = FLOW_KNOBS->RESTCLIENT_REQUEST_TRIES;
|
||||||
|
request_timeout_secs = FLOW_KNOBS->RESTCLIENT_REQUEST_TIMEOUT_SEC;
|
||||||
|
|
||||||
|
knobMap["connection_pool_size"] = std::addressof(connection_pool_size);
|
||||||
|
knobMap["pz"] = std::addressof(connection_pool_size);
|
||||||
|
knobMap["secure_connection"] = std::addressof(secure_connection);
|
||||||
|
knobMap["sc"] = std::addressof(secure_connection);
|
||||||
|
knobMap["connect_tries"] = std::addressof(connect_tries);
|
||||||
|
knobMap["ct"] = std::addressof(connect_tries);
|
||||||
|
knobMap["connect_timeout"] = std::addressof(connect_timeout);
|
||||||
|
knobMap["cto"] = std::addressof(connect_timeout);
|
||||||
|
knobMap["max_connection_life"] = std::addressof(max_connection_life);
|
||||||
|
knobMap["mcl"] = std::addressof(max_connection_life);
|
||||||
|
knobMap["request_tries"] = std::addressof(request_tries);
|
||||||
|
knobMap["rt"] = std::addressof(request_tries);
|
||||||
|
knobMap["request_timeout_secs"] = std::addressof(request_timeout_secs);
|
||||||
|
knobMap["rtom"] = std::addressof(request_timeout_secs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RESTClientKnobs::set(const std::unordered_map<std::string, int>& knobSettings) {
|
||||||
|
TraceEvent trace = TraceEvent("RESTClient_SetKnobs");
|
||||||
|
|
||||||
|
for (const auto& itr : knobSettings) {
|
||||||
|
const auto& kItr = RESTClientKnobs::knobMap.find(itr.first);
|
||||||
|
if (kItr == RESTClientKnobs::knobMap.end()) {
|
||||||
|
trace.detail("RESTClient_InvalidKnobName", itr.first);
|
||||||
|
throw rest_invalid_rest_client_knob();
|
||||||
|
}
|
||||||
|
*(kItr->second) = itr.second;
|
||||||
|
trace.detail(itr.first.c_str(), itr.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_map<std::string, int> RESTClientKnobs::get() const {
|
||||||
|
std::unordered_map<std::string, int> details = {
|
||||||
|
{ "connection_pool_size", connection_pool_size },
|
||||||
|
{ "secure_connection", secure_connection },
|
||||||
|
{ "connect_tries", connect_tries },
|
||||||
|
{ "connect_timeout", connect_timeout },
|
||||||
|
{ "max_connection_life", max_connection_life },
|
||||||
|
{ "request_tries", request_tries },
|
||||||
|
{ "request_timeout_secs", request_timeout_secs },
|
||||||
|
};
|
||||||
|
|
||||||
|
return details;
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<RESTConnectionPool::ReusableConnection> connect_impl(Reference<RESTConnectionPool> connectionPool,
|
||||||
|
RESTConnectionPoolKey connectKey,
|
||||||
|
bool isSecure,
|
||||||
|
int maxConnLife) {
|
||||||
|
auto poolItr = connectionPool->connectionPoolMap.find(connectKey);
|
||||||
|
if (poolItr == connectionPool->connectionPoolMap.end()) {
|
||||||
|
throw rest_connectpool_key_not_found();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!poolItr->second.empty()) {
|
||||||
|
RESTConnectionPool::ReusableConnection rconn = poolItr->second.front();
|
||||||
|
poolItr->second.pop();
|
||||||
|
|
||||||
|
if (rconn.expirationTime > now()) {
|
||||||
|
TraceEvent("RESTClient_ReusableConnection")
|
||||||
|
.suppressFor(60)
|
||||||
|
.detail("RemoteEndpoint", rconn.conn->getPeerAddress())
|
||||||
|
.detail("ExpireIn", rconn.expirationTime - now());
|
||||||
|
return rconn;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
state Reference<IConnection> conn =
|
||||||
|
wait(INetworkConnections::net()->connect(connectKey.first, connectKey.second, isSecure));
|
||||||
|
wait(conn->connectHandshake());
|
||||||
|
|
||||||
|
return RESTConnectionPool::ReusableConnection({ conn, now() + maxConnLife });
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<RESTConnectionPool::ReusableConnection> RESTConnectionPool::connect(RESTConnectionPoolKey connectKey,
|
||||||
|
const bool isSecure,
|
||||||
|
const int maxConnLife) {
|
||||||
|
return connect_impl(Reference<RESTConnectionPool>::addRef(this), connectKey, isSecure, maxConnLife);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RESTConnectionPool::returnConnection(RESTConnectionPoolKey connectKey,
|
||||||
|
ReusableConnection& rconn,
|
||||||
|
const int maxConnections) {
|
||||||
|
auto poolItr = connectionPoolMap.find(connectKey);
|
||||||
|
if (poolItr == connectionPoolMap.end()) {
|
||||||
|
throw rest_connectpool_key_not_found();
|
||||||
|
}
|
||||||
|
|
||||||
|
// If it expires in the future then add it to the pool in the front iff connection pool size is not maxed
|
||||||
|
if (rconn.expirationTime > now() && poolItr->second.size() < maxConnections) {
|
||||||
|
poolItr->second.push(rconn);
|
||||||
|
}
|
||||||
|
rconn.conn = Reference<IConnection>();
|
||||||
|
}
|
||||||
|
|
||||||
|
RESTUrl::RESTUrl(const std::string& fUrl, const bool isSecure) {
|
||||||
|
parseUrl(fUrl, isSecure);
|
||||||
|
}
|
||||||
|
|
||||||
|
RESTUrl::RESTUrl(const std::string& fullUrl, const std::string& b, const bool isSecure) : body(b) {
|
||||||
|
parseUrl(fullUrl, isSecure);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RESTUrl::parseUrl(const std::string& fullUrl, const bool isSecure) {
|
||||||
|
// Sample valid URIs
|
||||||
|
// 1. With 'host' & 'resource' := '<protocol>://<host>/<resource>'
|
||||||
|
// 2. With 'host', 'service' & 'resource' := '<protocol>://<host>:port/<resource>'
|
||||||
|
// 3. With 'host', 'service', 'resource' & 'reqParameters' := '<protocol>://<host>:port/<resource>?<parameter-list>'
|
||||||
|
|
||||||
|
try {
|
||||||
|
StringRef t(fullUrl);
|
||||||
|
StringRef p = t.eat("://");
|
||||||
|
std::string protocol = p.toString();
|
||||||
|
boost::algorithm::to_lower(protocol);
|
||||||
|
if (!isProtocolSupported(protocol)) {
|
||||||
|
throw format("Invalid REST URI protocol '%s'", protocol.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure connection secure knob setting matches with the input URI
|
||||||
|
if ((isSecurePrototol(protocol) && !isSecure) || (!isSecurePrototol(protocol) && isSecure)) {
|
||||||
|
throw format("Invalid REST URI protocol secure knob '%s'", fullUrl.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
// extract 'resource' and optional 'parameter list' if supplied in the URL
|
||||||
|
uint8_t foundSeparator = 0;
|
||||||
|
StringRef hostPort = t.eatAny("/?", &foundSeparator);
|
||||||
|
if (foundSeparator == '/') {
|
||||||
|
resource = t.eat("?").toString();
|
||||||
|
reqParameters = t.eat().toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
// hostPort is at least a host or IP address, optionally followed by :portNumber or :serviceName
|
||||||
|
StringRef hRef(hostPort);
|
||||||
|
StringRef h = hRef.eat(":");
|
||||||
|
if (h.size() == 0) {
|
||||||
|
throw std::string("host cannot be empty");
|
||||||
|
}
|
||||||
|
host = h.toString();
|
||||||
|
service = hRef.eat().toString();
|
||||||
|
|
||||||
|
TraceEvent("RESTClient_ParseURI")
|
||||||
|
.detail("URI", fullUrl)
|
||||||
|
.detail("Host", host)
|
||||||
|
.detail("Service", service)
|
||||||
|
.detail("Resource", resource)
|
||||||
|
.detail("ReqParameters", reqParameters);
|
||||||
|
} catch (std::string& err) {
|
||||||
|
TraceEvent("RESTClient_ParseError").detail("URI", fullUrl).detail("Error", err);
|
||||||
|
throw rest_invalid_uri();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only used to link unit tests
|
||||||
|
void forceLinkRESTUtilsTests() {}
|
||||||
|
|
||||||
|
TEST_CASE("fdbrpc/RESTUtils") {
|
||||||
|
// invalid protocol
|
||||||
|
try {
|
||||||
|
std::string uri("httpx://foo/bar");
|
||||||
|
RESTUrl r(uri, false);
|
||||||
|
ASSERT(false);
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() != error_code_rest_invalid_uri) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// mismatch protocol and knob values
|
||||||
|
try {
|
||||||
|
std::string uri("http://foo/bar");
|
||||||
|
RESTUrl r(uri, true);
|
||||||
|
ASSERT(false);
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() != error_code_rest_invalid_uri) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// missing host
|
||||||
|
try {
|
||||||
|
std::string uri("https://:/bar");
|
||||||
|
RESTUrl r(uri, true);
|
||||||
|
ASSERT(false);
|
||||||
|
} catch (Error& e) {
|
||||||
|
if (e.code() != error_code_rest_invalid_uri) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// valid URI with service
|
||||||
|
try {
|
||||||
|
std::string uri("https://host:80/foo/bar");
|
||||||
|
RESTUrl r(uri, true);
|
||||||
|
ASSERT_EQ(r.host.compare("host"), 0);
|
||||||
|
ASSERT_EQ(r.service.compare("80"), 0);
|
||||||
|
ASSERT_EQ(r.resource.compare("foo/bar"), 0);
|
||||||
|
} catch (Error& e) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
// valid URI with-out service
|
||||||
|
try {
|
||||||
|
std::string uri("https://host/foo/bar");
|
||||||
|
RESTUrl r(uri, true);
|
||||||
|
ASSERT_EQ(r.host.compare("host"), 0);
|
||||||
|
ASSERT(r.service.empty());
|
||||||
|
ASSERT_EQ(r.resource.compare("foo/bar"), 0);
|
||||||
|
} catch (Error& e) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
// valid URI with parameters
|
||||||
|
try {
|
||||||
|
std::string uri("https://host/foo/bar?param1,param2");
|
||||||
|
RESTUrl r(uri, true);
|
||||||
|
ASSERT_EQ(r.host.compare("host"), 0);
|
||||||
|
ASSERT(r.service.empty());
|
||||||
|
ASSERT_EQ(r.resource.compare("foo/bar"), 0);
|
||||||
|
ASSERT_EQ(r.reqParameters.compare("param1,param2"), 0);
|
||||||
|
} catch (Error& e) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ensure RESTClient::Knob default values and updates
|
||||||
|
|
||||||
|
return Void();
|
||||||
|
}
|
|
@ -0,0 +1,113 @@
|
||||||
|
/*
|
||||||
|
* RESTUtils.h
|
||||||
|
*
|
||||||
|
* This source file is part of the FoundationDB open source project
|
||||||
|
*
|
||||||
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef FDRPC_REST_UTILS_H
|
||||||
|
#define FDRPC_REST_UTILS_H
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "flow/flow.h"
|
||||||
|
#include "flow/FastRef.h"
|
||||||
|
#include "flow/Net2Packet.h"
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
// Util interface managing REST active connection pool.
|
||||||
|
// The interface internally constructs and maintains map {"host:service" -> activeConnection}; any new connection
|
||||||
|
// request would first access cached connection if possible (not expired), if none exists, it would establish a new
|
||||||
|
// connection and return to the caller. Caller on accomplishing the task at-hand, should return the connection back to
|
||||||
|
// the pool.
|
||||||
|
|
||||||
|
using RESTConnectionPoolKey = std::pair<std::string, std::string>;
|
||||||
|
|
||||||
|
class RESTConnectionPool : public ReferenceCounted<RESTConnectionPool> {
|
||||||
|
public:
|
||||||
|
struct ReusableConnection {
|
||||||
|
Reference<IConnection> conn;
|
||||||
|
double expirationTime;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Maximum number of connections cached in the connection-pool.
|
||||||
|
int maxConnPerConnectKey;
|
||||||
|
std::map<RESTConnectionPoolKey, std::queue<ReusableConnection>> connectionPoolMap;
|
||||||
|
|
||||||
|
RESTConnectionPool(const int maxConnsPerKey) : maxConnPerConnectKey(maxConnsPerKey) {}
|
||||||
|
|
||||||
|
// Routine is responsible to provide an usable TCP connection object; it reuses an active connection from
|
||||||
|
// connection-pool if availalbe, otherwise, establish a new TCP connection
|
||||||
|
Future<ReusableConnection> connect(RESTConnectionPoolKey connectKey, const bool isSecure, const int maxConnLife);
|
||||||
|
void returnConnection(RESTConnectionPoolKey connectKey, ReusableConnection& conn, const int maxConnections);
|
||||||
|
|
||||||
|
static RESTConnectionPoolKey getConnectionPoolKey(const std::string& host, const std::string& service) {
|
||||||
|
return std::make_pair(host, service);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Util interface facilitating management and update for RESTClient knob parameters
|
||||||
|
struct RESTClientKnobs {
|
||||||
|
int connection_pool_size, secure_connection, connect_timeout, connect_tries, max_connection_life, request_tries,
|
||||||
|
request_timeout_secs;
|
||||||
|
|
||||||
|
constexpr static int SECURE_CONNECTION = 1;
|
||||||
|
constexpr static int NOT_SECURE_CONNECTION = 0;
|
||||||
|
|
||||||
|
RESTClientKnobs();
|
||||||
|
|
||||||
|
void set(const std::unordered_map<std::string, int>& knobSettings);
|
||||||
|
std::unordered_map<std::string, int> get() const;
|
||||||
|
std::unordered_map<std::string, int*> knobMap;
|
||||||
|
|
||||||
|
static std::vector<std::string> getKnobDescriptions() {
|
||||||
|
return {
|
||||||
|
"connection_pool_size (pz) Maximum numbers of active connections in the connection-pool",
|
||||||
|
"secure_connection (or sc) Set 1 for secure connection and 0 for insecure connection.",
|
||||||
|
"connect_tries (or ct) Number of times to try to connect for each request.",
|
||||||
|
"connect_timeout (or cto) Number of seconds to wait for a connect request to succeed.",
|
||||||
|
"max_connection_life (or mcl) Maximum number of seconds to use a single TCP connection.",
|
||||||
|
"request_tries (or rt) Number of times to try each request until a parsable HTTP "
|
||||||
|
"response other than 429 is received.",
|
||||||
|
"request_timeout_secs (or rtom) Number of seconds to wait for a request to succeed after a "
|
||||||
|
"connection is established.",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Util interface facilitating parsing of an input REST 'full_url'
|
||||||
|
struct RESTUrl {
|
||||||
|
public:
|
||||||
|
// Connection resources - host and port details
|
||||||
|
std::string host;
|
||||||
|
std::string service;
|
||||||
|
// resource identified by URI
|
||||||
|
std::string resource;
|
||||||
|
// optional REST request parameters
|
||||||
|
std::string reqParameters;
|
||||||
|
// Request 'body' payload
|
||||||
|
std::string body;
|
||||||
|
|
||||||
|
explicit RESTUrl(const std::string& fullUrl, const bool isSecure);
|
||||||
|
explicit RESTUrl(const std::string& fullUrl, const std::string& body, const bool isSecure);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void parseUrl(const std::string& fullUrl, bool isSecure);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
|
@ -72,6 +72,20 @@ Future<REPLY_TYPE(Req)> retryBrokenPromise(RequestStream<Req, P> to, Req request
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR template <class Req>
|
||||||
|
Future<Void> tryInitializeRequestStream(RequestStream<Req>* stream, Hostname hostname, WellKnownEndpoints token) {
|
||||||
|
Optional<NetworkAddress> address = wait(hostname.resolve());
|
||||||
|
if (!address.present()) {
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
if (stream == nullptr) {
|
||||||
|
stream = new RequestStream<Req>(Endpoint::wellKnown({ address.get() }, token));
|
||||||
|
} else {
|
||||||
|
*stream = RequestStream<Req>(Endpoint::wellKnown({ address.get() }, token));
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR template <class Req>
|
ACTOR template <class Req>
|
||||||
Future<ErrorOr<REPLY_TYPE(Req)>> tryGetReplyFromHostname(Req request, Hostname hostname, WellKnownEndpoints token) {
|
Future<ErrorOr<REPLY_TYPE(Req)>> tryGetReplyFromHostname(Req request, Hostname hostname, WellKnownEndpoints token) {
|
||||||
// A wrapper of tryGetReply(request), except that the request is sent to an address resolved from a hostname.
|
// A wrapper of tryGetReply(request), except that the request is sent to an address resolved from a hostname.
|
||||||
|
|
|
@ -1110,10 +1110,10 @@ ACTOR Future<Void> registerWorker(RegisterWorkerRequest req,
|
||||||
newPriorityInfo.processClassFitness = newProcessClass.machineClassFitness(ProcessClass::ClusterController);
|
newPriorityInfo.processClassFitness = newProcessClass.machineClassFitness(ProcessClass::ClusterController);
|
||||||
|
|
||||||
bool isCoordinator =
|
bool isCoordinator =
|
||||||
(std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), req.wi.address()) !=
|
(std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), w.address()) !=
|
||||||
coordinatorAddresses.end()) ||
|
coordinatorAddresses.end()) ||
|
||||||
(req.wi.secondaryAddress().present() &&
|
(w.secondaryAddress().present() &&
|
||||||
std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), req.wi.secondaryAddress().get()) !=
|
std::find(coordinatorAddresses.begin(), coordinatorAddresses.end(), w.secondaryAddress().get()) !=
|
||||||
coordinatorAddresses.end());
|
coordinatorAddresses.end());
|
||||||
|
|
||||||
for (auto it : req.incompatiblePeers) {
|
for (auto it : req.incompatiblePeers) {
|
||||||
|
@ -1933,8 +1933,24 @@ ACTOR Future<Void> handleForcedRecoveries(ClusterControllerData* self, ClusterCo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> startDataDistributor(ClusterControllerData* self) {
|
struct SingletonRecruitThrottler {
|
||||||
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
|
double lastRecruitStart;
|
||||||
|
|
||||||
|
SingletonRecruitThrottler() : lastRecruitStart(-1) {}
|
||||||
|
|
||||||
|
double newRecruitment() {
|
||||||
|
double n = now();
|
||||||
|
double waitTime =
|
||||||
|
std::max(0.0, (lastRecruitStart + SERVER_KNOBS->CC_THROTTLE_SINGLETON_RERECRUIT_INTERVAL - n));
|
||||||
|
lastRecruitStart = n;
|
||||||
|
return waitTime;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ACTOR Future<Void> startDataDistributor(ClusterControllerData* self, double waitTime) {
|
||||||
|
// If master fails at the same time, give it a chance to clear master PID.
|
||||||
|
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||||
|
wait(delay(waitTime));
|
||||||
|
|
||||||
TraceEvent("CCStartDataDistributor", self->id).log();
|
TraceEvent("CCStartDataDistributor", self->id).log();
|
||||||
loop {
|
loop {
|
||||||
|
@ -2003,6 +2019,7 @@ ACTOR Future<Void> startDataDistributor(ClusterControllerData* self) {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
|
ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
|
||||||
|
state SingletonRecruitThrottler recruitThrottler;
|
||||||
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
||||||
wait(self->db.serverInfo->onChange());
|
wait(self->db.serverInfo->onChange());
|
||||||
}
|
}
|
||||||
|
@ -2019,13 +2036,15 @@ ACTOR Future<Void> monitorDataDistributor(ClusterControllerData* self) {
|
||||||
when(wait(self->recruitDistributor.onChange())) {}
|
when(wait(self->recruitDistributor.onChange())) {}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
wait(startDataDistributor(self));
|
wait(startDataDistributor(self, recruitThrottler.newRecruitment()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> startRatekeeper(ClusterControllerData* self) {
|
ACTOR Future<Void> startRatekeeper(ClusterControllerData* self, double waitTime) {
|
||||||
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
|
// If master fails at the same time, give it a chance to clear master PID.
|
||||||
|
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||||
|
wait(delay(waitTime));
|
||||||
|
|
||||||
TraceEvent("CCStartRatekeeper", self->id).log();
|
TraceEvent("CCStartRatekeeper", self->id).log();
|
||||||
loop {
|
loop {
|
||||||
|
@ -2091,6 +2110,7 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData* self) {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
|
ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
|
||||||
|
state SingletonRecruitThrottler recruitThrottler;
|
||||||
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
||||||
wait(self->db.serverInfo->onChange());
|
wait(self->db.serverInfo->onChange());
|
||||||
}
|
}
|
||||||
|
@ -2107,34 +2127,15 @@ ACTOR Future<Void> monitorRatekeeper(ClusterControllerData* self) {
|
||||||
when(wait(self->recruitRatekeeper.onChange())) {}
|
when(wait(self->recruitRatekeeper.onChange())) {}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
wait(startRatekeeper(self));
|
wait(startRatekeeper(self, recruitThrottler.newRecruitment()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Acquires the BM lock by getting the next epoch no.
|
ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self, double waitTime) {
|
||||||
ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
|
// If master fails at the same time, give it a chance to clear master PID.
|
||||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
|
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||||
|
wait(delay(waitTime));
|
||||||
loop {
|
|
||||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
||||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
|
||||||
try {
|
|
||||||
Optional<Value> oldEpoch = wait(tr->get(blobManagerEpochKey));
|
|
||||||
state int64_t newEpoch = oldEpoch.present() ? decodeBlobManagerEpochValue(oldEpoch.get()) + 1 : 1;
|
|
||||||
tr->set(blobManagerEpochKey, blobManagerEpochValueFor(newEpoch));
|
|
||||||
|
|
||||||
wait(tr->commit());
|
|
||||||
TraceEvent(SevDebug, "CCNextBlobManagerEpoch", self->id).detail("Epoch", newEpoch);
|
|
||||||
return newEpoch;
|
|
||||||
} catch (Error& e) {
|
|
||||||
wait(tr->onError(e));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self) {
|
|
||||||
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
|
|
||||||
|
|
||||||
TraceEvent("CCEKP_Start", self->id).log();
|
TraceEvent("CCEKP_Start", self->id).log();
|
||||||
loop {
|
loop {
|
||||||
|
@ -2208,6 +2209,7 @@ ACTOR Future<Void> startEncryptKeyProxy(ClusterControllerData* self) {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) {
|
ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) {
|
||||||
|
state SingletonRecruitThrottler recruitThrottler;
|
||||||
loop {
|
loop {
|
||||||
if (self->db.serverInfo->get().encryptKeyProxy.present() && !self->recruitEncryptKeyProxy.get()) {
|
if (self->db.serverInfo->get().encryptKeyProxy.present() && !self->recruitEncryptKeyProxy.get()) {
|
||||||
choose {
|
choose {
|
||||||
|
@ -2219,13 +2221,36 @@ ACTOR Future<Void> monitorEncryptKeyProxy(ClusterControllerData* self) {
|
||||||
when(wait(self->recruitEncryptKeyProxy.onChange())) {}
|
when(wait(self->recruitEncryptKeyProxy.onChange())) {}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
wait(startEncryptKeyProxy(self));
|
wait(startEncryptKeyProxy(self, recruitThrottler.newRecruitment()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> startBlobManager(ClusterControllerData* self) {
|
// Acquires the BM lock by getting the next epoch no.
|
||||||
wait(delay(0.0)); // If master fails at the same time, give it a chance to clear master PID.
|
ACTOR Future<int64_t> getNextBMEpoch(ClusterControllerData* self) {
|
||||||
|
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(self->cx);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||||
|
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||||
|
try {
|
||||||
|
Optional<Value> oldEpoch = wait(tr->get(blobManagerEpochKey));
|
||||||
|
state int64_t newEpoch = oldEpoch.present() ? decodeBlobManagerEpochValue(oldEpoch.get()) + 1 : 1;
|
||||||
|
tr->set(blobManagerEpochKey, blobManagerEpochValueFor(newEpoch));
|
||||||
|
|
||||||
|
wait(tr->commit());
|
||||||
|
TraceEvent(SevDebug, "CCNextBlobManagerEpoch", self->id).detail("Epoch", newEpoch);
|
||||||
|
return newEpoch;
|
||||||
|
} catch (Error& e) {
|
||||||
|
wait(tr->onError(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> startBlobManager(ClusterControllerData* self, double waitTime) {
|
||||||
|
// If master fails at the same time, give it a chance to clear master PID.
|
||||||
|
// Also wait to avoid too many consecutive recruits in a small time window.
|
||||||
|
wait(delay(waitTime));
|
||||||
|
|
||||||
TraceEvent("CCStartBlobManager", self->id).log();
|
TraceEvent("CCStartBlobManager", self->id).log();
|
||||||
loop {
|
loop {
|
||||||
|
@ -2322,6 +2347,7 @@ ACTOR Future<Void> watchBlobGranulesConfigKey(ClusterControllerData* self) {
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
|
ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
|
||||||
|
state SingletonRecruitThrottler recruitThrottler;
|
||||||
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
while (self->db.serverInfo->get().recoveryState < RecoveryState::ACCEPTING_COMMITS) {
|
||||||
wait(self->db.serverInfo->onChange());
|
wait(self->db.serverInfo->onChange());
|
||||||
}
|
}
|
||||||
|
@ -2352,7 +2378,7 @@ ACTOR Future<Void> monitorBlobManager(ClusterControllerData* self) {
|
||||||
}
|
}
|
||||||
} else if (self->db.blobGranulesEnabled.get()) {
|
} else if (self->db.blobGranulesEnabled.get()) {
|
||||||
// if there is no blob manager present but blob granules are now enabled, recruit a BM
|
// if there is no blob manager present but blob granules are now enabled, recruit a BM
|
||||||
wait(startBlobManager(self));
|
wait(startBlobManager(self, recruitThrottler.newRecruitment()));
|
||||||
} else {
|
} else {
|
||||||
// if there is no blob manager present and blob granules are disabled, wait for a config change
|
// if there is no blob manager present and blob granules are disabled, wait for a config change
|
||||||
wait(self->db.blobGranulesEnabled.onChange());
|
wait(self->db.blobGranulesEnabled.onChange());
|
||||||
|
@ -2481,12 +2507,11 @@ ACTOR Future<Void> workerHealthMonitor(ClusterControllerData* self) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> clusterControllerCore(Reference<IClusterConnectionRecord> connRecord,
|
ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
|
||||||
ClusterControllerFullInterface interf,
|
|
||||||
Future<Void> leaderFail,
|
Future<Void> leaderFail,
|
||||||
|
ServerCoordinators coordinators,
|
||||||
LocalityData locality,
|
LocalityData locality,
|
||||||
ConfigDBType configDBType) {
|
ConfigDBType configDBType) {
|
||||||
state ServerCoordinators coordinators(connRecord);
|
|
||||||
state ClusterControllerData self(interf, locality, coordinators);
|
state ClusterControllerData self(interf, locality, coordinators);
|
||||||
state ConfigBroadcaster configBroadcaster(coordinators, configDBType);
|
state ConfigBroadcaster configBroadcaster(coordinators, configDBType);
|
||||||
state Future<Void> coordinationPingDelay = delay(SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY);
|
state Future<Void> coordinationPingDelay = delay(SERVER_KNOBS->WORKER_COORDINATION_PING_DELAY);
|
||||||
|
@ -2621,7 +2646,7 @@ ACTOR Future<Void> replaceInterface(ClusterControllerFullInterface interf) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRecord,
|
ACTOR Future<Void> clusterController(ServerCoordinators coordinators,
|
||||||
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC,
|
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> currentCC,
|
||||||
bool hasConnected,
|
bool hasConnected,
|
||||||
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
|
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo,
|
||||||
|
@ -2632,10 +2657,9 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
|
||||||
state bool inRole = false;
|
state bool inRole = false;
|
||||||
cci.initEndpoints();
|
cci.initEndpoints();
|
||||||
try {
|
try {
|
||||||
wait(connRecord->resolveHostnames());
|
|
||||||
// Register as a possible leader; wait to be elected
|
// Register as a possible leader; wait to be elected
|
||||||
state Future<Void> leaderFail =
|
state Future<Void> leaderFail =
|
||||||
tryBecomeLeader(connRecord, cci, currentCC, hasConnected, asyncPriorityInfo);
|
tryBecomeLeader(coordinators, cci, currentCC, hasConnected, asyncPriorityInfo);
|
||||||
state Future<Void> shouldReplace = replaceInterface(cci);
|
state Future<Void> shouldReplace = replaceInterface(cci);
|
||||||
|
|
||||||
while (!currentCC->get().present() || currentCC->get().get() != cci) {
|
while (!currentCC->get().present() || currentCC->get().get() != cci) {
|
||||||
|
@ -2654,7 +2678,7 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
|
||||||
startRole(Role::CLUSTER_CONTROLLER, cci.id(), UID());
|
startRole(Role::CLUSTER_CONTROLLER, cci.id(), UID());
|
||||||
inRole = true;
|
inRole = true;
|
||||||
|
|
||||||
wait(clusterControllerCore(connRecord, cci, leaderFail, locality, configDBType));
|
wait(clusterControllerCore(cci, leaderFail, coordinators, locality, configDBType));
|
||||||
}
|
}
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (inRole)
|
if (inRole)
|
||||||
|
@ -2683,7 +2707,8 @@ ACTOR Future<Void> clusterController(Reference<IClusterConnectionRecord> connRec
|
||||||
state bool hasConnected = false;
|
state bool hasConnected = false;
|
||||||
loop {
|
loop {
|
||||||
try {
|
try {
|
||||||
wait(clusterController(connRecord, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType));
|
ServerCoordinators coordinators(connRecord);
|
||||||
|
wait(clusterController(coordinators, currentCC, hasConnected, asyncPriorityInfo, locality, configDBType));
|
||||||
hasConnected = true;
|
hasConnected = true;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() != error_code_coordinators_changed)
|
if (e.code() != error_code_coordinators_changed)
|
||||||
|
|
|
@ -537,8 +537,7 @@ ACTOR Future<Void> changeCoordinators(Reference<ClusterRecoveryData> self) {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
state ClusterConnectionString conn(changeCoordinatorsRequest.newConnectionString.toString());
|
ClusterConnectionString conn(changeCoordinatorsRequest.newConnectionString.toString());
|
||||||
wait(conn.resolveHostnames());
|
|
||||||
wait(self->cstate.move(conn));
|
wait(self->cstate.move(conn));
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() != error_code_actor_cancelled)
|
if (e.code() != error_code_actor_cancelled)
|
||||||
|
|
|
@ -236,6 +236,105 @@ struct ResolutionRequestBuilder {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
ErrorOr<Optional<TenantMapEntry>> getTenantEntry(ProxyCommitData* commitData,
|
||||||
|
Optional<TenantNameRef> tenant,
|
||||||
|
Optional<int64_t> tenantId,
|
||||||
|
bool logOnFailure) {
|
||||||
|
if (tenant.present()) {
|
||||||
|
auto itr = commitData->tenantMap.find(tenant.get());
|
||||||
|
if (itr == commitData->tenantMap.end()) {
|
||||||
|
if (logOnFailure) {
|
||||||
|
TraceEvent(SevWarn, "CommitProxyUnknownTenant", commitData->dbgid).detail("Tenant", tenant.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
return unknown_tenant();
|
||||||
|
} else if (tenantId.present() && tenantId.get() != itr->second.id) {
|
||||||
|
if (logOnFailure) {
|
||||||
|
TraceEvent(SevWarn, "CommitProxyTenantIdMismatch", commitData->dbgid)
|
||||||
|
.detail("Tenant", tenant.get())
|
||||||
|
.detail("TenantId", tenantId)
|
||||||
|
.detail("ExistingId", itr->second.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
return unknown_tenant();
|
||||||
|
}
|
||||||
|
|
||||||
|
return ErrorOr<Optional<TenantMapEntry>>(Optional<TenantMapEntry>(itr->second));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Optional<TenantMapEntry>();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool verifyTenantPrefix(ProxyCommitData* const commitData, const CommitTransactionRequest& req) {
|
||||||
|
ErrorOr<Optional<TenantMapEntry>> tenantEntry =
|
||||||
|
getTenantEntry(commitData, req.tenantInfo.name.castTo<TenantNameRef>(), req.tenantInfo.tenantId, true);
|
||||||
|
|
||||||
|
if (tenantEntry.isError()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tenantEntry.get().present()) {
|
||||||
|
Key tenantPrefix = tenantEntry.get().get().prefix;
|
||||||
|
for (auto& m : req.transaction.mutations) {
|
||||||
|
if (m.param1 != metadataVersionKey) {
|
||||||
|
if (!m.param1.startsWith(tenantPrefix)) {
|
||||||
|
TraceEvent(SevWarnAlways, "TenantPrefixMismatch")
|
||||||
|
.suppressFor(60)
|
||||||
|
.detail("Prefix", tenantPrefix.toHexString())
|
||||||
|
.detail("Key", m.param1.toHexString());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m.type == MutationRef::ClearRange && !m.param2.startsWith(tenantPrefix)) {
|
||||||
|
TraceEvent(SevWarnAlways, "TenantClearRangePrefixMismatch")
|
||||||
|
.suppressFor(60)
|
||||||
|
.detail("Prefix", tenantPrefix.toHexString())
|
||||||
|
.detail("Key", m.param2.toHexString());
|
||||||
|
return false;
|
||||||
|
} else if (m.type == MutationRef::SetVersionstampedKey) {
|
||||||
|
ASSERT(m.param1.size() >= 4);
|
||||||
|
uint8_t* key = const_cast<uint8_t*>(m.param1.begin());
|
||||||
|
int* offset = reinterpret_cast<int*>(&key[m.param1.size() - 4]);
|
||||||
|
if (*offset < tenantPrefix.size()) {
|
||||||
|
TraceEvent(SevWarnAlways, "TenantVersionstampInvalidOffset")
|
||||||
|
.suppressFor(60)
|
||||||
|
.detail("Prefix", tenantPrefix.toHexString())
|
||||||
|
.detail("Key", m.param1.toHexString())
|
||||||
|
.detail("Offset", *offset);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& rc : req.transaction.read_conflict_ranges) {
|
||||||
|
if (rc.begin != metadataVersionKey &&
|
||||||
|
(!rc.begin.startsWith(tenantPrefix) || !rc.end.startsWith(tenantPrefix))) {
|
||||||
|
TraceEvent(SevWarnAlways, "TenantReadConflictPrefixMismatch")
|
||||||
|
.suppressFor(60)
|
||||||
|
.detail("Prefix", tenantPrefix.toHexString())
|
||||||
|
.detail("BeginKey", rc.begin.toHexString())
|
||||||
|
.detail("EndKey", rc.end.toHexString());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& wc : req.transaction.write_conflict_ranges) {
|
||||||
|
if (wc.begin != metadataVersionKey &&
|
||||||
|
(!wc.begin.startsWith(tenantPrefix) || !wc.end.startsWith(tenantPrefix))) {
|
||||||
|
TraceEvent(SevWarnAlways, "TenantWriteConflictPrefixMismatch")
|
||||||
|
.suppressFor(60)
|
||||||
|
.detail("Prefix", tenantPrefix.toHexString())
|
||||||
|
.detail("BeginKey", wc.begin.toHexString())
|
||||||
|
.detail("EndKey", wc.end.toHexString());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData,
|
ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData,
|
||||||
PromiseStream<std::pair<std::vector<CommitTransactionRequest>, int>> out,
|
PromiseStream<std::pair<std::vector<CommitTransactionRequest>, int>> out,
|
||||||
FutureStream<CommitTransactionRequest> in,
|
FutureStream<CommitTransactionRequest> in,
|
||||||
|
@ -282,6 +381,13 @@ ACTOR Future<Void> commitBatcher(ProxyCommitData* commitData,
|
||||||
.detail("Size", bytes)
|
.detail("Size", bytes)
|
||||||
.detail("Client", req.reply.getEndpoint().getPrimaryAddress());
|
.detail("Client", req.reply.getEndpoint().getPrimaryAddress());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!verifyTenantPrefix(commitData, req)) {
|
||||||
|
++commitData->stats.txnCommitErrors;
|
||||||
|
req.reply.sendError(illegal_tenant_access());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
++commitData->stats.txnCommitIn;
|
++commitData->stats.txnCommitIn;
|
||||||
|
|
||||||
if (req.debugID.present()) {
|
if (req.debugID.present()) {
|
||||||
|
@ -450,35 +556,6 @@ ACTOR static Future<ResolveTransactionBatchReply> trackResolutionMetrics(Referen
|
||||||
return reply;
|
return reply;
|
||||||
}
|
}
|
||||||
|
|
||||||
ErrorOr<Optional<TenantMapEntry>> getTenantEntry(ProxyCommitData* commitData,
|
|
||||||
Optional<TenantNameRef> tenant,
|
|
||||||
Optional<int64_t> tenantId,
|
|
||||||
bool logOnFailure) {
|
|
||||||
if (tenant.present()) {
|
|
||||||
auto itr = commitData->tenantMap.find(tenant.get());
|
|
||||||
if (itr == commitData->tenantMap.end()) {
|
|
||||||
if (logOnFailure) {
|
|
||||||
TraceEvent(SevWarn, "CommitProxyUnknownTenant", commitData->dbgid).detail("Tenant", tenant.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
return unknown_tenant();
|
|
||||||
} else if (tenantId.present() && tenantId.get() != itr->second.id) {
|
|
||||||
if (logOnFailure) {
|
|
||||||
TraceEvent(SevWarn, "CommitProxyTenantIdMismatch", commitData->dbgid)
|
|
||||||
.detail("Tenant", tenant.get())
|
|
||||||
.detail("TenantId", tenantId)
|
|
||||||
.detail("ExistingId", itr->second.id);
|
|
||||||
}
|
|
||||||
|
|
||||||
return unknown_tenant();
|
|
||||||
}
|
|
||||||
|
|
||||||
return ErrorOr<Optional<TenantMapEntry>>(Optional<TenantMapEntry>(itr->second));
|
|
||||||
}
|
|
||||||
|
|
||||||
return Optional<TenantMapEntry>();
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace CommitBatch {
|
namespace CommitBatch {
|
||||||
|
|
||||||
struct CommitBatchContext {
|
struct CommitBatchContext {
|
||||||
|
@ -685,6 +762,11 @@ bool canReject(const std::vector<CommitTransactionRequest>& trs) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double computeReleaseDelay(CommitBatchContext* self, double latencyBucket) {
|
||||||
|
return std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE,
|
||||||
|
self->batchOperations * self->pProxyCommitData->commitComputePerOperation[latencyBucket]);
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
|
ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
|
||||||
|
|
||||||
state ProxyCommitData* const pProxyCommitData = self->pProxyCommitData;
|
state ProxyCommitData* const pProxyCommitData = self->pProxyCommitData;
|
||||||
|
@ -708,6 +790,7 @@ ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
|
||||||
// Pre-resolution the commits
|
// Pre-resolution the commits
|
||||||
TEST(pProxyCommitData->latestLocalCommitBatchResolving.get() < localBatchNumber - 1); // Wait for local batch
|
TEST(pProxyCommitData->latestLocalCommitBatchResolving.get() < localBatchNumber - 1); // Wait for local batch
|
||||||
wait(pProxyCommitData->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber - 1));
|
wait(pProxyCommitData->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber - 1));
|
||||||
|
pProxyCommitData->stats.computeLatency.addMeasurement(now() - timeStart);
|
||||||
double queuingDelay = g_network->now() - timeStart;
|
double queuingDelay = g_network->now() - timeStart;
|
||||||
pProxyCommitData->stats.commitBatchQueuingDist->sampleSeconds(queuingDelay);
|
pProxyCommitData->stats.commitBatchQueuingDist->sampleSeconds(queuingDelay);
|
||||||
if ((queuingDelay > (double)SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS / SERVER_KNOBS->VERSIONS_PER_SECOND ||
|
if ((queuingDelay > (double)SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS / SERVER_KNOBS->VERSIONS_PER_SECOND ||
|
||||||
|
@ -736,10 +819,7 @@ ACTOR Future<Void> preresolutionProcessing(CommitBatchContext* self) {
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
self->releaseDelay =
|
self->releaseDelay = delay(computeReleaseDelay(self, latencyBucket), TaskPriority::ProxyMasterVersionReply);
|
||||||
delay(std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE,
|
|
||||||
self->batchOperations * pProxyCommitData->commitComputePerOperation[latencyBucket]),
|
|
||||||
TaskPriority::ProxyMasterVersionReply);
|
|
||||||
|
|
||||||
if (debugID.present()) {
|
if (debugID.present()) {
|
||||||
g_traceBatch.addEvent(
|
g_traceBatch.addEvent(
|
||||||
|
@ -1385,8 +1465,10 @@ ACTOR Future<Void> postResolution(CommitBatchContext* self) {
|
||||||
|
|
||||||
self->computeDuration += g_network->timer() - self->computeStart;
|
self->computeDuration += g_network->timer() - self->computeStart;
|
||||||
if (self->batchOperations > 0) {
|
if (self->batchOperations > 0) {
|
||||||
|
double estimatedDelay = computeReleaseDelay(self, self->latencyBucket);
|
||||||
double computePerOperation =
|
double computePerOperation =
|
||||||
std::min(SERVER_KNOBS->MAX_COMPUTE_PER_OPERATION, self->computeDuration / self->batchOperations);
|
std::min(SERVER_KNOBS->MAX_COMPUTE_PER_OPERATION, self->computeDuration / self->batchOperations);
|
||||||
|
|
||||||
if (computePerOperation <= pProxyCommitData->commitComputePerOperation[self->latencyBucket]) {
|
if (computePerOperation <= pProxyCommitData->commitComputePerOperation[self->latencyBucket]) {
|
||||||
pProxyCommitData->commitComputePerOperation[self->latencyBucket] = computePerOperation;
|
pProxyCommitData->commitComputePerOperation[self->latencyBucket] = computePerOperation;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1401,6 +1483,20 @@ ACTOR Future<Void> postResolution(CommitBatchContext* self) {
|
||||||
pProxyCommitData->stats.minComputeNS =
|
pProxyCommitData->stats.minComputeNS =
|
||||||
std::min<int64_t>(pProxyCommitData->stats.minComputeNS,
|
std::min<int64_t>(pProxyCommitData->stats.minComputeNS,
|
||||||
1e9 * pProxyCommitData->commitComputePerOperation[self->latencyBucket]);
|
1e9 * pProxyCommitData->commitComputePerOperation[self->latencyBucket]);
|
||||||
|
|
||||||
|
if (estimatedDelay >= SERVER_KNOBS->MAX_COMPUTE_DURATION_LOG_CUTOFF ||
|
||||||
|
self->computeDuration >= SERVER_KNOBS->MAX_COMPUTE_DURATION_LOG_CUTOFF) {
|
||||||
|
TraceEvent(SevInfo, "LongComputeDuration", pProxyCommitData->dbgid)
|
||||||
|
.suppressFor(10.0)
|
||||||
|
.detail("EstimatedComputeDuration", estimatedDelay)
|
||||||
|
.detail("ComputeDuration", self->computeDuration)
|
||||||
|
.detail("ComputePerOperation", computePerOperation)
|
||||||
|
.detail("LatencyBucket", self->latencyBucket)
|
||||||
|
.detail("UpdatedComputePerOperationEstimate",
|
||||||
|
pProxyCommitData->commitComputePerOperation[self->latencyBucket])
|
||||||
|
.detail("BatchBytes", self->batchBytes)
|
||||||
|
.detail("BatchOperations", self->batchOperations);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pProxyCommitData->stats.processingMutationDist->sampleSeconds(now() - postResolutionQueuing);
|
pProxyCommitData->stats.processingMutationDist->sampleSeconds(now() - postResolutionQueuing);
|
||||||
|
|
|
@ -26,21 +26,29 @@
|
||||||
#include "fdbserver/LeaderElection.h"
|
#include "fdbserver/LeaderElection.h"
|
||||||
#include "flow/actorcompiler.h" // has to be last include
|
#include "flow/actorcompiler.h" // has to be last include
|
||||||
|
|
||||||
ACTOR Future<GenerationRegReadReply> waitAndSendRead(RequestStream<GenerationRegReadRequest> to,
|
ACTOR Future<GenerationRegReadReply> waitAndSendRead(GenerationRegInterface stateServer, GenerationRegReadRequest req) {
|
||||||
GenerationRegReadRequest req) {
|
|
||||||
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
|
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
|
||||||
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
|
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
|
||||||
state GenerationRegReadReply reply = wait(retryBrokenPromise(to, req));
|
state GenerationRegReadReply reply;
|
||||||
|
if (stateServer.hostname.present()) {
|
||||||
|
wait(store(reply, retryGetReplyFromHostname(req, stateServer.hostname.get(), WLTOKEN_GENERATIONREG_READ)));
|
||||||
|
} else {
|
||||||
|
wait(store(reply, retryBrokenPromise(stateServer.read, req)));
|
||||||
|
}
|
||||||
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
|
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
|
||||||
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
|
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
|
||||||
return reply;
|
return reply;
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<UniqueGeneration> waitAndSendWrite(RequestStream<GenerationRegWriteRequest> to,
|
ACTOR Future<UniqueGeneration> waitAndSendWrite(GenerationRegInterface stateServer, GenerationRegWriteRequest req) {
|
||||||
GenerationRegWriteRequest req) {
|
|
||||||
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
|
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
|
||||||
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
|
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
|
||||||
state UniqueGeneration reply = wait(retryBrokenPromise(to, req));
|
state UniqueGeneration reply;
|
||||||
|
if (stateServer.hostname.present()) {
|
||||||
|
wait(store(reply, retryGetReplyFromHostname(req, stateServer.hostname.get(), WLTOKEN_GENERATIONREG_WRITE)));
|
||||||
|
} else {
|
||||||
|
wait(store(reply, retryBrokenPromise(stateServer.write, req)));
|
||||||
|
}
|
||||||
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
|
if (SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY)
|
||||||
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
|
wait(delay(SERVER_KNOBS->BUGGIFIED_EVENTUAL_CONSISTENCY * deterministicRandom()->random01()));
|
||||||
return reply;
|
return reply;
|
||||||
|
@ -152,7 +160,7 @@ struct CoordinatedStateImpl {
|
||||||
state std::vector<Future<GenerationRegReadReply>> rep_reply;
|
state std::vector<Future<GenerationRegReadReply>> rep_reply;
|
||||||
for (int i = 0; i < replicas.size(); i++) {
|
for (int i = 0; i < replicas.size(); i++) {
|
||||||
Future<GenerationRegReadReply> reply =
|
Future<GenerationRegReadReply> reply =
|
||||||
waitAndSendRead(replicas[i].read, GenerationRegReadRequest(req.key, req.gen));
|
waitAndSendRead(replicas[i], GenerationRegReadRequest(req.key, req.gen));
|
||||||
rep_empty_reply.push_back(nonemptyToNever(reply));
|
rep_empty_reply.push_back(nonemptyToNever(reply));
|
||||||
rep_reply.push_back(emptyToNever(reply));
|
rep_reply.push_back(emptyToNever(reply));
|
||||||
self->ac.add(success(reply));
|
self->ac.add(success(reply));
|
||||||
|
@ -192,8 +200,7 @@ struct CoordinatedStateImpl {
|
||||||
state std::vector<GenerationRegInterface>& replicas = self->coordinators.stateServers;
|
state std::vector<GenerationRegInterface>& replicas = self->coordinators.stateServers;
|
||||||
state std::vector<Future<UniqueGeneration>> wrep_reply;
|
state std::vector<Future<UniqueGeneration>> wrep_reply;
|
||||||
for (int i = 0; i < replicas.size(); i++) {
|
for (int i = 0; i < replicas.size(); i++) {
|
||||||
Future<UniqueGeneration> reply =
|
Future<UniqueGeneration> reply = waitAndSendWrite(replicas[i], GenerationRegWriteRequest(req.kv, req.gen));
|
||||||
waitAndSendWrite(replicas[i].write, GenerationRegWriteRequest(req.kv, req.gen));
|
|
||||||
wrep_reply.push_back(reply);
|
wrep_reply.push_back(reply);
|
||||||
self->ac.add(success(reply));
|
self->ac.add(success(reply));
|
||||||
}
|
}
|
||||||
|
|
|
@ -98,12 +98,16 @@ LeaderElectionRegInterface::LeaderElectionRegInterface(INetwork* local) : Client
|
||||||
}
|
}
|
||||||
|
|
||||||
ServerCoordinators::ServerCoordinators(Reference<IClusterConnectionRecord> ccr) : ClientCoordinators(ccr) {
|
ServerCoordinators::ServerCoordinators(Reference<IClusterConnectionRecord> ccr) : ClientCoordinators(ccr) {
|
||||||
ASSERT(ccr->connectionStringStatus() == ClusterConnectionString::RESOLVED);
|
|
||||||
ClusterConnectionString cs = ccr->getConnectionString();
|
ClusterConnectionString cs = ccr->getConnectionString();
|
||||||
for (auto s = cs.coordinators().begin(); s != cs.coordinators().end(); ++s) {
|
for (auto h : cs.hostnames) {
|
||||||
leaderElectionServers.emplace_back(*s);
|
leaderElectionServers.emplace_back(h);
|
||||||
stateServers.emplace_back(*s);
|
stateServers.emplace_back(h);
|
||||||
configServers.emplace_back(*s);
|
configServers.emplace_back(h);
|
||||||
|
}
|
||||||
|
for (auto s : cs.coordinators()) {
|
||||||
|
leaderElectionServers.emplace_back(s);
|
||||||
|
stateServers.emplace_back(s);
|
||||||
|
configServers.emplace_back(s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -208,10 +212,8 @@ ACTOR Future<Void> openDatabase(ClientData* db,
|
||||||
int* clientCount,
|
int* clientCount,
|
||||||
Reference<AsyncVar<bool>> hasConnectedClients,
|
Reference<AsyncVar<bool>> hasConnectedClients,
|
||||||
OpenDatabaseCoordRequest req,
|
OpenDatabaseCoordRequest req,
|
||||||
Future<Void> checkStuck,
|
Future<Void> checkStuck) {
|
||||||
Reference<AsyncVar<Void>> coordinatorsChanged) {
|
|
||||||
state ErrorOr<CachedSerialization<ClientDBInfo>> replyContents;
|
state ErrorOr<CachedSerialization<ClientDBInfo>> replyContents;
|
||||||
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
|
||||||
state Future<Void> clientInfoOnChange = db->clientInfo->onChange();
|
state Future<Void> clientInfoOnChange = db->clientInfo->onChange();
|
||||||
|
|
||||||
++(*clientCount);
|
++(*clientCount);
|
||||||
|
@ -233,11 +235,6 @@ ACTOR Future<Void> openDatabase(ClientData* db,
|
||||||
clientInfoOnChange = db->clientInfo->onChange();
|
clientInfoOnChange = db->clientInfo->onChange();
|
||||||
replyContents = db->clientInfo->get();
|
replyContents = db->clientInfo->get();
|
||||||
}
|
}
|
||||||
when(wait(coordinatorsChangedOnChange)) {
|
|
||||||
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
|
||||||
replyContents = coordinators_changed();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) {
|
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) {
|
||||||
if (db->clientInfo->get().read().id.isValid()) {
|
if (db->clientInfo->get().read().id.isValid()) {
|
||||||
replyContents = db->clientInfo->get();
|
replyContents = db->clientInfo->get();
|
||||||
|
@ -268,10 +265,7 @@ ACTOR Future<Void> openDatabase(ClientData* db,
|
||||||
ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
|
ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
|
||||||
Reference<AsyncVar<bool>> hasConnectedClients,
|
Reference<AsyncVar<bool>> hasConnectedClients,
|
||||||
Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader,
|
Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader,
|
||||||
ElectionResultRequest req,
|
ElectionResultRequest req) {
|
||||||
Reference<AsyncVar<Void>> coordinatorsChanged) {
|
|
||||||
state bool coordinatorsChangeDetected = false;
|
|
||||||
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
|
||||||
state Future<Void> currentElectedLeaderOnChange = currentElectedLeader->onChange();
|
state Future<Void> currentElectedLeaderOnChange = currentElectedLeader->onChange();
|
||||||
++(*clientCount);
|
++(*clientCount);
|
||||||
hasConnectedClients->set(true);
|
hasConnectedClients->set(true);
|
||||||
|
@ -281,20 +275,11 @@ ACTOR Future<Void> remoteMonitorLeader(int* clientCount,
|
||||||
when(wait(yieldedFuture(currentElectedLeaderOnChange))) {
|
when(wait(yieldedFuture(currentElectedLeaderOnChange))) {
|
||||||
currentElectedLeaderOnChange = currentElectedLeader->onChange();
|
currentElectedLeaderOnChange = currentElectedLeader->onChange();
|
||||||
}
|
}
|
||||||
when(wait(coordinatorsChangedOnChange)) {
|
|
||||||
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
|
||||||
coordinatorsChangeDetected = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) { break; }
|
when(wait(delayJittered(SERVER_KNOBS->CLIENT_REGISTER_INTERVAL))) { break; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (coordinatorsChangeDetected) {
|
req.reply.send(currentElectedLeader->get());
|
||||||
req.reply.sendError(coordinators_changed());
|
|
||||||
} else {
|
|
||||||
req.reply.send(currentElectedLeader->get());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (--(*clientCount) == 0) {
|
if (--(*clientCount) == 0) {
|
||||||
hasConnectedClients->set(false);
|
hasConnectedClients->set(false);
|
||||||
|
@ -325,8 +310,6 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
||||||
state Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader =
|
state Reference<AsyncVar<Optional<LeaderInfo>>> currentElectedLeader =
|
||||||
makeReference<AsyncVar<Optional<LeaderInfo>>>();
|
makeReference<AsyncVar<Optional<LeaderInfo>>>();
|
||||||
state LivenessChecker canConnectToLeader(SERVER_KNOBS->COORDINATOR_LEADER_CONNECTION_TIMEOUT);
|
state LivenessChecker canConnectToLeader(SERVER_KNOBS->COORDINATOR_LEADER_CONNECTION_TIMEOUT);
|
||||||
state Reference<AsyncVar<Void>> coordinatorsChanged = makeReference<AsyncVar<Void>>();
|
|
||||||
state Future<Void> coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
|
||||||
state Future<Void> hasConnectedClientsOnChange = hasConnectedClients->onChange();
|
state Future<Void> hasConnectedClientsOnChange = hasConnectedClients->onChange();
|
||||||
|
|
||||||
loop choose {
|
loop choose {
|
||||||
|
@ -338,14 +321,10 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
||||||
} else {
|
} else {
|
||||||
if (!leaderMon.isValid()) {
|
if (!leaderMon.isValid()) {
|
||||||
leaderMon = monitorLeaderAndGetClientInfo(
|
leaderMon = monitorLeaderAndGetClientInfo(
|
||||||
req.clusterKey, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged);
|
req.clusterKey, req.hostnames, req.coordinators, &clientData, currentElectedLeader);
|
||||||
}
|
}
|
||||||
actors.add(openDatabase(&clientData,
|
actors.add(
|
||||||
&clientCount,
|
openDatabase(&clientData, &clientCount, hasConnectedClients, req, canConnectToLeader.checkStuck()));
|
||||||
hasConnectedClients,
|
|
||||||
req,
|
|
||||||
canConnectToLeader.checkStuck(),
|
|
||||||
coordinatorsChanged));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) {
|
when(ElectionResultRequest req = waitNext(interf.electionResult.getFuture())) {
|
||||||
|
@ -355,10 +334,9 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
||||||
} else {
|
} else {
|
||||||
if (!leaderMon.isValid()) {
|
if (!leaderMon.isValid()) {
|
||||||
leaderMon = monitorLeaderAndGetClientInfo(
|
leaderMon = monitorLeaderAndGetClientInfo(
|
||||||
req.key, req.coordinators, &clientData, currentElectedLeader, coordinatorsChanged);
|
req.key, req.hostnames, req.coordinators, &clientData, currentElectedLeader);
|
||||||
}
|
}
|
||||||
actors.add(remoteMonitorLeader(
|
actors.add(remoteMonitorLeader(&clientCount, hasConnectedClients, currentElectedLeader, req));
|
||||||
&clientCount, hasConnectedClients, currentElectedLeader, req, coordinatorsChanged));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) {
|
when(GetLeaderRequest req = waitNext(interf.getLeader.getFuture())) {
|
||||||
|
@ -499,10 +477,6 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
when(wait(actors.getResult())) {}
|
when(wait(actors.getResult())) {}
|
||||||
when(wait(coordinatorsChangedOnChange)) {
|
|
||||||
leaderMon = Future<Void>();
|
|
||||||
coordinatorsChangedOnChange = coordinatorsChanged->onChange();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -153,17 +153,21 @@ struct CandidacyRequest {
|
||||||
struct ElectionResultRequest {
|
struct ElectionResultRequest {
|
||||||
constexpr static FileIdentifier file_identifier = 11815465;
|
constexpr static FileIdentifier file_identifier = 11815465;
|
||||||
Key key;
|
Key key;
|
||||||
|
std::vector<Hostname> hostnames;
|
||||||
std::vector<NetworkAddress> coordinators;
|
std::vector<NetworkAddress> coordinators;
|
||||||
UID knownLeader;
|
UID knownLeader;
|
||||||
ReplyPromise<Optional<LeaderInfo>> reply;
|
ReplyPromise<Optional<LeaderInfo>> reply;
|
||||||
|
|
||||||
ElectionResultRequest() = default;
|
ElectionResultRequest() = default;
|
||||||
ElectionResultRequest(Key key, std::vector<NetworkAddress> coordinators, UID knownLeader)
|
ElectionResultRequest(Key key,
|
||||||
: key(key), coordinators(std::move(coordinators)), knownLeader(knownLeader) {}
|
std::vector<Hostname> hostnames,
|
||||||
|
std::vector<NetworkAddress> coordinators,
|
||||||
|
UID knownLeader)
|
||||||
|
: key(key), hostnames(std::move(hostnames)), coordinators(std::move(coordinators)), knownLeader(knownLeader) {}
|
||||||
|
|
||||||
template <class Ar>
|
template <class Ar>
|
||||||
void serialize(Ar& ar) {
|
void serialize(Ar& ar) {
|
||||||
serializer(ar, key, coordinators, knownLeader, reply);
|
serializer(ar, key, hostnames, coordinators, knownLeader, reply);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -718,18 +718,19 @@ public:
|
||||||
bool recheck = !healthy && (lastReady != self->initialFailureReactionDelay.isReady() ||
|
bool recheck = !healthy && (lastReady != self->initialFailureReactionDelay.isReady() ||
|
||||||
(lastZeroHealthy && !self->zeroHealthyTeams->get()) || containsFailed);
|
(lastZeroHealthy && !self->zeroHealthyTeams->get()) || containsFailed);
|
||||||
|
|
||||||
// TraceEvent("TeamHealthChangeDetected", self->distributorId)
|
//TraceEvent("TeamHealthChangeDetected", self->distributorId)
|
||||||
// .detail("Team", team->getDesc())
|
// .detail("Team", team->getDesc())
|
||||||
// .detail("ServersLeft", serversLeft)
|
// .detail("ServersLeft", serversLeft)
|
||||||
// .detail("LastServersLeft", lastServersLeft)
|
// .detail("LastServersLeft", lastServersLeft)
|
||||||
// .detail("AnyUndesired", anyUndesired)
|
// .detail("AnyUndesired", anyUndesired)
|
||||||
// .detail("LastAnyUndesired", lastAnyUndesired)
|
// .detail("LastAnyUndesired", lastAnyUndesired)
|
||||||
// .detail("AnyWrongConfiguration", anyWrongConfiguration)
|
// .detail("AnyWrongConfiguration", anyWrongConfiguration)
|
||||||
// .detail("LastWrongConfiguration", lastWrongConfiguration)
|
// .detail("LastWrongConfiguration", lastWrongConfiguration)
|
||||||
// .detail("Recheck", recheck)
|
// .detail("ContainsWigglingServer", anyWigglingServer)
|
||||||
// .detail("BadTeam", badTeam)
|
// .detail("Recheck", recheck)
|
||||||
// .detail("LastZeroHealthy", lastZeroHealthy)
|
// .detail("BadTeam", badTeam)
|
||||||
// .detail("ZeroHealthyTeam", self->zeroHealthyTeams->get());
|
// .detail("LastZeroHealthy", lastZeroHealthy)
|
||||||
|
// .detail("ZeroHealthyTeam", self->zeroHealthyTeams->get());
|
||||||
|
|
||||||
lastReady = self->initialFailureReactionDelay.isReady();
|
lastReady = self->initialFailureReactionDelay.isReady();
|
||||||
lastZeroHealthy = self->zeroHealthyTeams->get();
|
lastZeroHealthy = self->zeroHealthyTeams->get();
|
||||||
|
@ -1103,9 +1104,8 @@ public:
|
||||||
if (worstStatus == DDTeamCollection::Status::WIGGLING && invalidWiggleServer(worstAddr, self, server)) {
|
if (worstStatus == DDTeamCollection::Status::WIGGLING && invalidWiggleServer(worstAddr, self, server)) {
|
||||||
TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId)
|
TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId)
|
||||||
.detail("Address", worstAddr.toString())
|
.detail("Address", worstAddr.toString())
|
||||||
.detail("ProcessId", server->getLastKnownInterface().locality.processId())
|
.detail("ServerId", server->getId())
|
||||||
.detail("WigglingId", self->wigglingId.present());
|
.detail("WigglingId", self->wigglingId.present() ? self->wigglingId.get().toString() : "");
|
||||||
self->excludedServers.set(worstAddr, DDTeamCollection::Status::NONE);
|
|
||||||
worstStatus = DDTeamCollection::Status::NONE;
|
worstStatus = DDTeamCollection::Status::NONE;
|
||||||
}
|
}
|
||||||
otherChanges.push_back(self->excludedServers.onChange(worstAddr));
|
otherChanges.push_back(self->excludedServers.onChange(worstAddr));
|
||||||
|
@ -1127,10 +1127,9 @@ public:
|
||||||
if (testStatus == DDTeamCollection::Status::WIGGLING &&
|
if (testStatus == DDTeamCollection::Status::WIGGLING &&
|
||||||
invalidWiggleServer(testAddr, self, server)) {
|
invalidWiggleServer(testAddr, self, server)) {
|
||||||
TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId)
|
TraceEvent(SevInfo, "InvalidWiggleServer", self->distributorId)
|
||||||
.detail("Address", testAddr.toString())
|
.detail("Address", worstAddr.toString())
|
||||||
.detail("ProcessId", server->getLastKnownInterface().locality.processId())
|
.detail("ServerId", server->getId())
|
||||||
.detail("ValidWigglingId", self->wigglingId.present());
|
.detail("WigglingId", self->wigglingId.present() ? self->wigglingId.get().toString() : "");
|
||||||
self->excludedServers.set(testAddr, DDTeamCollection::Status::NONE);
|
|
||||||
testStatus = DDTeamCollection::Status::NONE;
|
testStatus = DDTeamCollection::Status::NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2052,7 +2051,7 @@ public:
|
||||||
"PerpetualStorageWigglePause",
|
"PerpetualStorageWigglePause",
|
||||||
self->distributorId)
|
self->distributorId)
|
||||||
.detail("Primary", self->primary)
|
.detail("Primary", self->primary)
|
||||||
.detail("ProcessId", id)
|
.detail("ServerId", id)
|
||||||
.detail("BestTeamKeepStuckCount", self->bestTeamKeepStuckCount)
|
.detail("BestTeamKeepStuckCount", self->bestTeamKeepStuckCount)
|
||||||
.detail("ExtraHealthyTeamCount", extraTeamCount)
|
.detail("ExtraHealthyTeamCount", extraTeamCount)
|
||||||
.detail("HealthyTeamCount", self->healthyTeamCount);
|
.detail("HealthyTeamCount", self->healthyTeamCount);
|
||||||
|
@ -2065,7 +2064,7 @@ public:
|
||||||
moveFinishFuture = fv;
|
moveFinishFuture = fv;
|
||||||
TraceEvent("PerpetualStorageWiggleStart", self->distributorId)
|
TraceEvent("PerpetualStorageWiggleStart", self->distributorId)
|
||||||
.detail("Primary", self->primary)
|
.detail("Primary", self->primary)
|
||||||
.detail("ProcessId", id)
|
.detail("ServerId", id)
|
||||||
.detail("ExtraHealthyTeamCount", extraTeamCount)
|
.detail("ExtraHealthyTeamCount", extraTeamCount)
|
||||||
.detail("HealthyTeamCount", self->healthyTeamCount);
|
.detail("HealthyTeamCount", self->healthyTeamCount);
|
||||||
}
|
}
|
||||||
|
@ -2091,7 +2090,7 @@ public:
|
||||||
self->includeStorageServersForWiggle();
|
self->includeStorageServersForWiggle();
|
||||||
TraceEvent("PerpetualStorageWiggleFinish", self->distributorId)
|
TraceEvent("PerpetualStorageWiggleFinish", self->distributorId)
|
||||||
.detail("Primary", self->primary)
|
.detail("Primary", self->primary)
|
||||||
.detail("ProcessId", self->wigglingId.get());
|
.detail("ServerId", self->wigglingId.get());
|
||||||
|
|
||||||
wait(self->eraseStorageWiggleMap(&metadataMap, self->wigglingId.get()) &&
|
wait(self->eraseStorageWiggleMap(&metadataMap, self->wigglingId.get()) &&
|
||||||
self->storageWiggler->finishWiggle());
|
self->storageWiggler->finishWiggle());
|
||||||
|
@ -2112,7 +2111,7 @@ public:
|
||||||
self->includeStorageServersForWiggle();
|
self->includeStorageServersForWiggle();
|
||||||
TraceEvent("PerpetualStorageWiggleExitingPause", self->distributorId)
|
TraceEvent("PerpetualStorageWiggleExitingPause", self->distributorId)
|
||||||
.detail("Primary", self->primary)
|
.detail("Primary", self->primary)
|
||||||
.detail("ProcessId", self->wigglingId.get());
|
.detail("ServerId", self->wigglingId.get());
|
||||||
self->wigglingId.reset();
|
self->wigglingId.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,44 +27,29 @@
|
||||||
|
|
||||||
// Keep trying to become a leader by submitting itself to all coordinators.
|
// Keep trying to become a leader by submitting itself to all coordinators.
|
||||||
// Monitor the health of all coordinators at the same time.
|
// Monitor the health of all coordinators at the same time.
|
||||||
// Note: for coordinators whose NetworkAddress is parsed out of a hostname, a connection failure will cause this actor
|
|
||||||
// to throw `coordinators_changed()` error
|
|
||||||
ACTOR Future<Void> submitCandidacy(Key key,
|
ACTOR Future<Void> submitCandidacy(Key key,
|
||||||
LeaderElectionRegInterface coord,
|
LeaderElectionRegInterface coord,
|
||||||
LeaderInfo myInfo,
|
LeaderInfo myInfo,
|
||||||
UID prevChangeID,
|
UID prevChangeID,
|
||||||
AsyncTrigger* nomineeChange,
|
AsyncTrigger* nomineeChange,
|
||||||
Optional<LeaderInfo>* nominee,
|
Optional<LeaderInfo>* nominee) {
|
||||||
Optional<Hostname> hostname = Optional<Hostname>()) {
|
|
||||||
loop {
|
loop {
|
||||||
state Optional<LeaderInfo> li;
|
state Optional<LeaderInfo> li;
|
||||||
|
if (coord.hostname.present()) {
|
||||||
if (coord.candidacy.getEndpoint().getPrimaryAddress().fromHostname) {
|
wait(store(
|
||||||
state ErrorOr<Optional<LeaderInfo>> rep = wait(coord.candidacy.tryGetReply(
|
li,
|
||||||
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
|
retryGetReplyFromHostname(
|
||||||
TaskPriority::CoordinationReply));
|
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
|
||||||
if (rep.isError()) {
|
coord.hostname.get(),
|
||||||
// Connecting to nominee failed, most likely due to connection failed.
|
WLTOKEN_LEADERELECTIONREG_CANDIDACY,
|
||||||
TraceEvent("SubmitCandadicyError")
|
TaskPriority::CoordinationReply)));
|
||||||
.error(rep.getError())
|
|
||||||
.detail("Hostname", hostname.present() ? hostname.get().toString() : "UnknownHostname")
|
|
||||||
.detail("OldAddr", coord.candidacy.getEndpoint().getPrimaryAddress().toString());
|
|
||||||
if (rep.getError().code() == error_code_request_maybe_delivered) {
|
|
||||||
// Delay to prevent tight resolving loop due to outdated DNS cache
|
|
||||||
wait(delay(FLOW_KNOBS->HOSTNAME_RECONNECT_INIT_INTERVAL));
|
|
||||||
throw coordinators_changed();
|
|
||||||
} else {
|
|
||||||
throw rep.getError();
|
|
||||||
}
|
|
||||||
} else if (rep.present()) {
|
|
||||||
li = rep.get();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
Optional<LeaderInfo> tmp = wait(retryBrokenPromise(
|
wait(store(
|
||||||
coord.candidacy,
|
li,
|
||||||
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
|
retryBrokenPromise(
|
||||||
TaskPriority::CoordinationReply));
|
coord.candidacy,
|
||||||
li = tmp;
|
CandidacyRequest(key, myInfo, nominee->present() ? nominee->get().changeID : UID(), prevChangeID),
|
||||||
|
TaskPriority::CoordinationReply)));
|
||||||
}
|
}
|
||||||
|
|
||||||
wait(Future<Void>(Void())); // Make sure we weren't cancelled
|
wait(Future<Void>(Void())); // Make sure we weren't cancelled
|
||||||
|
@ -104,20 +89,26 @@ Future<Void> buggifyDelayedAsyncVar(Reference<AsyncVar<T>>& var) {
|
||||||
ACTOR Future<Void> changeLeaderCoordinators(ServerCoordinators coordinators, Value forwardingInfo) {
|
ACTOR Future<Void> changeLeaderCoordinators(ServerCoordinators coordinators, Value forwardingInfo) {
|
||||||
std::vector<Future<Void>> forwardRequests;
|
std::vector<Future<Void>> forwardRequests;
|
||||||
forwardRequests.reserve(coordinators.leaderElectionServers.size());
|
forwardRequests.reserve(coordinators.leaderElectionServers.size());
|
||||||
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++)
|
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
|
||||||
forwardRequests.push_back(retryBrokenPromise(coordinators.leaderElectionServers[i].forward,
|
if (coordinators.leaderElectionServers[i].hostname.present()) {
|
||||||
ForwardRequest(coordinators.clusterKey, forwardingInfo)));
|
forwardRequests.push_back(retryGetReplyFromHostname(ForwardRequest(coordinators.clusterKey, forwardingInfo),
|
||||||
|
coordinators.leaderElectionServers[i].hostname.get(),
|
||||||
|
WLTOKEN_LEADERELECTIONREG_FORWARD));
|
||||||
|
} else {
|
||||||
|
forwardRequests.push_back(retryBrokenPromise(coordinators.leaderElectionServers[i].forward,
|
||||||
|
ForwardRequest(coordinators.clusterKey, forwardingInfo)));
|
||||||
|
}
|
||||||
|
}
|
||||||
int quorum_size = forwardRequests.size() / 2 + 1;
|
int quorum_size = forwardRequests.size() / 2 + 1;
|
||||||
wait(quorum(forwardRequests, quorum_size));
|
wait(quorum(forwardRequests, quorum_size));
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> connRecord,
|
ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators,
|
||||||
Value proposedSerializedInterface,
|
Value proposedSerializedInterface,
|
||||||
Reference<AsyncVar<Value>> outSerializedLeader,
|
Reference<AsyncVar<Value>> outSerializedLeader,
|
||||||
bool hasConnected,
|
bool hasConnected,
|
||||||
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo) {
|
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo) {
|
||||||
state ServerCoordinators coordinators(connRecord);
|
|
||||||
state AsyncTrigger nomineeChange;
|
state AsyncTrigger nomineeChange;
|
||||||
state std::vector<Optional<LeaderInfo>> nominees;
|
state std::vector<Optional<LeaderInfo>> nominees;
|
||||||
state LeaderInfo myInfo;
|
state LeaderInfo myInfo;
|
||||||
|
@ -134,6 +125,8 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
|
||||||
wait(delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY));
|
wait(delay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nominees.resize(coordinators.leaderElectionServers.size());
|
||||||
|
|
||||||
myInfo.serializedInfo = proposedSerializedInterface;
|
myInfo.serializedInfo = proposedSerializedInterface;
|
||||||
outSerializedLeader->set(Value());
|
outSerializedLeader->set(Value());
|
||||||
|
|
||||||
|
@ -141,9 +134,6 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
|
||||||
(SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY) ? buggifyDelayedAsyncVar(outSerializedLeader) : Void();
|
(SERVER_KNOBS->BUGGIFY_ALL_COORDINATION || BUGGIFY) ? buggifyDelayedAsyncVar(outSerializedLeader) : Void();
|
||||||
|
|
||||||
while (!iAmLeader) {
|
while (!iAmLeader) {
|
||||||
wait(connRecord->resolveHostnames());
|
|
||||||
coordinators = ServerCoordinators(connRecord);
|
|
||||||
nominees.resize(coordinators.leaderElectionServers.size());
|
|
||||||
state Future<Void> badCandidateTimeout;
|
state Future<Void> badCandidateTimeout;
|
||||||
|
|
||||||
myInfo.changeID = deterministicRandom()->randomUniqueID();
|
myInfo.changeID = deterministicRandom()->randomUniqueID();
|
||||||
|
@ -153,19 +143,12 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
|
||||||
std::vector<Future<Void>> cand;
|
std::vector<Future<Void>> cand;
|
||||||
cand.reserve(coordinators.leaderElectionServers.size());
|
cand.reserve(coordinators.leaderElectionServers.size());
|
||||||
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
|
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
|
||||||
Optional<Hostname> hostname;
|
|
||||||
auto r = connRecord->getConnectionString().networkAddressToHostname.find(
|
|
||||||
coordinators.leaderElectionServers[i].candidacy.getEndpoint().getPrimaryAddress());
|
|
||||||
if (r != connRecord->getConnectionString().networkAddressToHostname.end()) {
|
|
||||||
hostname = r->second;
|
|
||||||
}
|
|
||||||
cand.push_back(submitCandidacy(coordinators.clusterKey,
|
cand.push_back(submitCandidacy(coordinators.clusterKey,
|
||||||
coordinators.leaderElectionServers[i],
|
coordinators.leaderElectionServers[i],
|
||||||
myInfo,
|
myInfo,
|
||||||
prevChangeID,
|
prevChangeID,
|
||||||
&nomineeChange,
|
&nomineeChange,
|
||||||
&nominees[i],
|
&nominees[i]));
|
||||||
hostname));
|
|
||||||
}
|
}
|
||||||
candidacies = waitForAll(cand);
|
candidacies = waitForAll(cand);
|
||||||
|
|
||||||
|
@ -220,24 +203,15 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
|
||||||
} else
|
} else
|
||||||
badCandidateTimeout = Future<Void>();
|
badCandidateTimeout = Future<Void>();
|
||||||
|
|
||||||
try {
|
choose {
|
||||||
choose {
|
when(wait(nomineeChange.onTrigger())) {}
|
||||||
when(wait(nomineeChange.onTrigger())) {}
|
when(wait(badCandidateTimeout.isValid() ? badCandidateTimeout : Never())) {
|
||||||
when(wait(badCandidateTimeout.isValid() ? badCandidateTimeout : Never())) {
|
TEST(true); // Bad candidate timeout
|
||||||
TEST(true); // Bad candidate timeout
|
TraceEvent("LeaderBadCandidateTimeout", myInfo.changeID).log();
|
||||||
TraceEvent("LeaderBadCandidateTimeout", myInfo.changeID).log();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
when(wait(candidacies)) { ASSERT(false); }
|
|
||||||
when(wait(asyncPriorityInfo->onChange())) { break; }
|
|
||||||
}
|
|
||||||
} catch (Error& e) {
|
|
||||||
if (e.code() == error_code_coordinators_changed) {
|
|
||||||
connRecord->getConnectionString().resetToUnresolved();
|
|
||||||
break;
|
break;
|
||||||
} else {
|
|
||||||
throw e;
|
|
||||||
}
|
}
|
||||||
|
when(wait(candidacies)) { ASSERT(false); }
|
||||||
|
when(wait(asyncPriorityInfo->onChange())) { break; }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -258,10 +232,17 @@ ACTOR Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> c
|
||||||
state std::vector<Future<Void>> true_heartbeats;
|
state std::vector<Future<Void>> true_heartbeats;
|
||||||
state std::vector<Future<Void>> false_heartbeats;
|
state std::vector<Future<Void>> false_heartbeats;
|
||||||
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
|
for (int i = 0; i < coordinators.leaderElectionServers.size(); i++) {
|
||||||
Future<LeaderHeartbeatReply> hb =
|
Future<LeaderHeartbeatReply> hb;
|
||||||
retryBrokenPromise(coordinators.leaderElectionServers[i].leaderHeartbeat,
|
if (coordinators.leaderElectionServers[i].hostname.present()) {
|
||||||
LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID),
|
hb = retryGetReplyFromHostname(LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID),
|
||||||
TaskPriority::CoordinationReply);
|
coordinators.leaderElectionServers[i].hostname.get(),
|
||||||
|
WLTOKEN_LEADERELECTIONREG_LEADERHEARTBEAT,
|
||||||
|
TaskPriority::CoordinationReply);
|
||||||
|
} else {
|
||||||
|
hb = retryBrokenPromise(coordinators.leaderElectionServers[i].leaderHeartbeat,
|
||||||
|
LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID),
|
||||||
|
TaskPriority::CoordinationReply);
|
||||||
|
}
|
||||||
true_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ true }));
|
true_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ true }));
|
||||||
false_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ false }));
|
false_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ false }));
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ class ServerCoordinators;
|
||||||
// eventually be set. If the return value is cancelled, the candidacy or leadership of the proposedInterface
|
// eventually be set. If the return value is cancelled, the candidacy or leadership of the proposedInterface
|
||||||
// will eventually end.
|
// will eventually end.
|
||||||
template <class LeaderInterface>
|
template <class LeaderInterface>
|
||||||
Future<Void> tryBecomeLeader(Reference<IClusterConnectionRecord> const& connRecord,
|
Future<Void> tryBecomeLeader(ServerCoordinators const& coordinators,
|
||||||
LeaderInterface const& proposedInterface,
|
LeaderInterface const& proposedInterface,
|
||||||
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader,
|
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader,
|
||||||
bool hasConnected,
|
bool hasConnected,
|
||||||
|
@ -50,20 +50,20 @@ Future<Void> changeLeaderCoordinators(ServerCoordinators const& coordinators, Va
|
||||||
#pragma region Implementation
|
#pragma region Implementation
|
||||||
#endif // __INTEL_COMPILER
|
#endif // __INTEL_COMPILER
|
||||||
|
|
||||||
Future<Void> tryBecomeLeaderInternal(Reference<IClusterConnectionRecord> const& connRecord,
|
Future<Void> tryBecomeLeaderInternal(ServerCoordinators const& coordinators,
|
||||||
Value const& proposedSerializedInterface,
|
Value const& proposedSerializedInterface,
|
||||||
Reference<AsyncVar<Value>> const& outSerializedLeader,
|
Reference<AsyncVar<Value>> const& outSerializedLeader,
|
||||||
bool const& hasConnected,
|
bool const& hasConnected,
|
||||||
Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo);
|
Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo);
|
||||||
|
|
||||||
template <class LeaderInterface>
|
template <class LeaderInterface>
|
||||||
Future<Void> tryBecomeLeader(Reference<IClusterConnectionRecord> const& connRecord,
|
Future<Void> tryBecomeLeader(ServerCoordinators const& coordinators,
|
||||||
LeaderInterface const& proposedInterface,
|
LeaderInterface const& proposedInterface,
|
||||||
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader,
|
Reference<AsyncVar<Optional<LeaderInterface>>> const& outKnownLeader,
|
||||||
bool hasConnected,
|
bool hasConnected,
|
||||||
Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo) {
|
Reference<AsyncVar<ClusterControllerPriorityInfo>> const& asyncPriorityInfo) {
|
||||||
auto serializedInfo = makeReference<AsyncVar<Value>>();
|
auto serializedInfo = makeReference<AsyncVar<Value>>();
|
||||||
Future<Void> m = tryBecomeLeaderInternal(connRecord,
|
Future<Void> m = tryBecomeLeaderInternal(coordinators,
|
||||||
ObjectWriter::toValue(proposedInterface, IncludeVersion()),
|
ObjectWriter::toValue(proposedInterface, IncludeVersion()),
|
||||||
serializedInfo,
|
serializedInfo,
|
||||||
hasConnected,
|
hasConnected,
|
||||||
|
|
|
@ -99,8 +99,17 @@ class GetCommittedVersionQuorum {
|
||||||
|
|
||||||
// Now roll node forward to match the largest committed version of
|
// Now roll node forward to match the largest committed version of
|
||||||
// the replies.
|
// the replies.
|
||||||
state Reference<ConfigFollowerInfo> quorumCfi(new ConfigFollowerInfo(self->replies[target]));
|
|
||||||
try {
|
try {
|
||||||
|
state std::vector<ConfigFollowerInterface> interfs = self->replies[target];
|
||||||
|
std::vector<Future<Void>> fs;
|
||||||
|
for (ConfigFollowerInterface& interf : interfs) {
|
||||||
|
if (interf.hostname.present()) {
|
||||||
|
fs.push_back(tryInitializeRequestStream(
|
||||||
|
&interf.getChanges, interf.hostname.get(), WLTOKEN_CONFIGFOLLOWER_GETCHANGES));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wait(waitForAll(fs));
|
||||||
|
state Reference<ConfigFollowerInfo> quorumCfi(new ConfigFollowerInfo(interfs));
|
||||||
state Version lastSeenVersion = std::max(
|
state Version lastSeenVersion = std::max(
|
||||||
rollback.present() ? rollback.get() : nodeVersion.lastCommitted, self->largestCompactedResponse);
|
rollback.present() ? rollback.get() : nodeVersion.lastCommitted, self->largestCompactedResponse);
|
||||||
ConfigFollowerGetChangesReply reply =
|
ConfigFollowerGetChangesReply reply =
|
||||||
|
@ -108,9 +117,21 @@ class GetCommittedVersionQuorum {
|
||||||
&ConfigFollowerInterface::getChanges,
|
&ConfigFollowerInterface::getChanges,
|
||||||
ConfigFollowerGetChangesRequest{ lastSeenVersion, target }),
|
ConfigFollowerGetChangesRequest{ lastSeenVersion, target }),
|
||||||
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
||||||
wait(timeoutError(cfi.rollforward.getReply(ConfigFollowerRollforwardRequest{
|
|
||||||
rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations }),
|
if (cfi.hostname.present()) {
|
||||||
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
wait(timeoutError(
|
||||||
|
retryGetReplyFromHostname(
|
||||||
|
ConfigFollowerRollforwardRequest{
|
||||||
|
rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations },
|
||||||
|
cfi.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGFOLLOWER_ROLLFORWARD),
|
||||||
|
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
||||||
|
} else {
|
||||||
|
wait(timeoutError(
|
||||||
|
cfi.rollforward.getReply(ConfigFollowerRollforwardRequest{
|
||||||
|
rollback, nodeVersion.lastCommitted, target, reply.changes, reply.annotations }),
|
||||||
|
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
||||||
|
}
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_transaction_too_old) {
|
if (e.code() == error_code_transaction_too_old) {
|
||||||
// Seeing this trace is not necessarily a problem. There
|
// Seeing this trace is not necessarily a problem. There
|
||||||
|
@ -129,9 +150,18 @@ class GetCommittedVersionQuorum {
|
||||||
|
|
||||||
ACTOR static Future<Void> getCommittedVersionActor(GetCommittedVersionQuorum* self, ConfigFollowerInterface cfi) {
|
ACTOR static Future<Void> getCommittedVersionActor(GetCommittedVersionQuorum* self, ConfigFollowerInterface cfi) {
|
||||||
try {
|
try {
|
||||||
ConfigFollowerGetCommittedVersionReply reply =
|
state ConfigFollowerGetCommittedVersionReply reply;
|
||||||
wait(timeoutError(cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{}),
|
if (cfi.hostname.present()) {
|
||||||
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
wait(timeoutError(store(reply,
|
||||||
|
retryGetReplyFromHostname(ConfigFollowerGetCommittedVersionRequest{},
|
||||||
|
cfi.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION)),
|
||||||
|
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
||||||
|
} else {
|
||||||
|
wait(timeoutError(
|
||||||
|
store(reply, cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{})),
|
||||||
|
SERVER_KNOBS->GET_COMMITTED_VERSION_TIMEOUT));
|
||||||
|
}
|
||||||
|
|
||||||
++self->totalRepliesReceived;
|
++self->totalRepliesReceived;
|
||||||
self->largestCompactedResponse = std::max(self->largestCompactedResponse, reply.lastCompacted);
|
self->largestCompactedResponse = std::max(self->largestCompactedResponse, reply.lastCompacted);
|
||||||
|
@ -279,7 +309,15 @@ class PaxosConfigConsumerImpl {
|
||||||
std::vector<Future<Void>> compactionRequests;
|
std::vector<Future<Void>> compactionRequests;
|
||||||
compactionRequests.reserve(compactionRequests.size());
|
compactionRequests.reserve(compactionRequests.size());
|
||||||
for (const auto& cfi : self->cfis) {
|
for (const auto& cfi : self->cfis) {
|
||||||
compactionRequests.push_back(cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
|
if (cfi.hostname.present()) {
|
||||||
|
compactionRequests.push_back(
|
||||||
|
retryGetReplyFromHostname(ConfigFollowerCompactRequest{ compactionVersion },
|
||||||
|
cfi.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGFOLLOWER_COMPACT));
|
||||||
|
} else {
|
||||||
|
compactionRequests.push_back(
|
||||||
|
cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
wait(timeoutError(waitForAll(compactionRequests), 1.0));
|
wait(timeoutError(waitForAll(compactionRequests), 1.0));
|
||||||
|
@ -294,8 +332,18 @@ class PaxosConfigConsumerImpl {
|
||||||
self->resetCommittedVersionQuorum(); // TODO: This seems to fix a segfault, investigate more
|
self->resetCommittedVersionQuorum(); // TODO: This seems to fix a segfault, investigate more
|
||||||
try {
|
try {
|
||||||
state Version committedVersion = wait(getCommittedVersion(self));
|
state Version committedVersion = wait(getCommittedVersion(self));
|
||||||
state Reference<ConfigFollowerInfo> configNodes(
|
state std::vector<ConfigFollowerInterface> readReplicas =
|
||||||
new ConfigFollowerInfo(self->getCommittedVersionQuorum.getReadReplicas()));
|
self->getCommittedVersionQuorum.getReadReplicas();
|
||||||
|
std::vector<Future<Void>> fs;
|
||||||
|
for (ConfigFollowerInterface& readReplica : readReplicas) {
|
||||||
|
if (readReplica.hostname.present()) {
|
||||||
|
fs.push_back(tryInitializeRequestStream(&readReplica.getSnapshotAndChanges,
|
||||||
|
readReplica.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGFOLLOWER_GETSNAPSHOTANDCHANGES));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wait(waitForAll(fs));
|
||||||
|
state Reference<ConfigFollowerInfo> configNodes(new ConfigFollowerInfo(readReplicas));
|
||||||
ConfigFollowerGetSnapshotAndChangesReply reply =
|
ConfigFollowerGetSnapshotAndChangesReply reply =
|
||||||
wait(timeoutError(basicLoadBalance(configNodes,
|
wait(timeoutError(basicLoadBalance(configNodes,
|
||||||
&ConfigFollowerInterface::getSnapshotAndChanges,
|
&ConfigFollowerInterface::getSnapshotAndChanges,
|
||||||
|
@ -349,8 +397,18 @@ class PaxosConfigConsumerImpl {
|
||||||
// returned would be 1.
|
// returned would be 1.
|
||||||
if (committedVersion > self->lastSeenVersion) {
|
if (committedVersion > self->lastSeenVersion) {
|
||||||
ASSERT(self->getCommittedVersionQuorum.getReadReplicas().size() >= self->cfis.size() / 2 + 1);
|
ASSERT(self->getCommittedVersionQuorum.getReadReplicas().size() >= self->cfis.size() / 2 + 1);
|
||||||
state Reference<ConfigFollowerInfo> configNodes(
|
state std::vector<ConfigFollowerInterface> readReplicas =
|
||||||
new ConfigFollowerInfo(self->getCommittedVersionQuorum.getReadReplicas()));
|
self->getCommittedVersionQuorum.getReadReplicas();
|
||||||
|
std::vector<Future<Void>> fs;
|
||||||
|
for (ConfigFollowerInterface& readReplica : readReplicas) {
|
||||||
|
if (readReplica.hostname.present()) {
|
||||||
|
fs.push_back(tryInitializeRequestStream(&readReplica.getChanges,
|
||||||
|
readReplica.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGFOLLOWER_GETCHANGES));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wait(waitForAll(fs));
|
||||||
|
state Reference<ConfigFollowerInfo> configNodes(new ConfigFollowerInfo(readReplicas));
|
||||||
ConfigFollowerGetChangesReply reply = wait(timeoutError(
|
ConfigFollowerGetChangesReply reply = wait(timeoutError(
|
||||||
basicLoadBalance(configNodes,
|
basicLoadBalance(configNodes,
|
||||||
&ConfigFollowerInterface::getChanges,
|
&ConfigFollowerInterface::getChanges,
|
||||||
|
|
|
@ -73,6 +73,8 @@ struct ProxyStats {
|
||||||
|
|
||||||
LatencySample commitBatchingWindowSize;
|
LatencySample commitBatchingWindowSize;
|
||||||
|
|
||||||
|
LatencySample computeLatency;
|
||||||
|
|
||||||
Future<Void> logger;
|
Future<Void> logger;
|
||||||
|
|
||||||
int64_t maxComputeNS;
|
int64_t maxComputeNS;
|
||||||
|
@ -126,6 +128,10 @@ struct ProxyStats {
|
||||||
id,
|
id,
|
||||||
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
||||||
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||||
|
computeLatency("ComputeLatency",
|
||||||
|
id,
|
||||||
|
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
||||||
|
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||||
maxComputeNS(0), minComputeNS(1e12),
|
maxComputeNS(0), minComputeNS(1e12),
|
||||||
commitBatchQueuingDist(Histogram::getHistogram(LiteralStringRef("CommitProxy"),
|
commitBatchQueuingDist(Histogram::getHistogram(LiteralStringRef("CommitProxy"),
|
||||||
LiteralStringRef("CommitBatchQueuing"),
|
LiteralStringRef("CommitBatchQueuing"),
|
||||||
|
|
|
@ -161,9 +161,8 @@ ACTOR Future<std::vector<WorkerInterface>> getCoordWorkers(Database cx,
|
||||||
if (!coordinators.present()) {
|
if (!coordinators.present()) {
|
||||||
throw operation_failed();
|
throw operation_failed();
|
||||||
}
|
}
|
||||||
state ClusterConnectionString ccs(coordinators.get().toString());
|
ClusterConnectionString ccs(coordinators.get().toString());
|
||||||
wait(ccs.resolveHostnames());
|
std::vector<NetworkAddress> coordinatorsAddr = wait(ccs.tryResolveHostnames());
|
||||||
std::vector<NetworkAddress> coordinatorsAddr = ccs.coordinators();
|
|
||||||
std::set<NetworkAddress> coordinatorsAddrSet;
|
std::set<NetworkAddress> coordinatorsAddrSet;
|
||||||
for (const auto& addr : coordinatorsAddr) {
|
for (const auto& addr : coordinatorsAddr) {
|
||||||
TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", addr);
|
TraceEvent(SevDebug, "CoordinatorAddress").detail("Addr", addr);
|
||||||
|
|
|
@ -44,15 +44,29 @@ class SimpleConfigConsumerImpl {
|
||||||
loop {
|
loop {
|
||||||
state Version compactionVersion = self->lastSeenVersion;
|
state Version compactionVersion = self->lastSeenVersion;
|
||||||
wait(delayJittered(self->compactionInterval.get()));
|
wait(delayJittered(self->compactionInterval.get()));
|
||||||
wait(self->cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
|
if (self->cfi.hostname.present()) {
|
||||||
|
wait(retryGetReplyFromHostname(ConfigFollowerCompactRequest{ compactionVersion },
|
||||||
|
self->cfi.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGFOLLOWER_COMPACT));
|
||||||
|
} else {
|
||||||
|
wait(self->cfi.compact.getReply(ConfigFollowerCompactRequest{ compactionVersion }));
|
||||||
|
}
|
||||||
++self->compactRequest;
|
++self->compactRequest;
|
||||||
broadcaster->compact(compactionVersion);
|
broadcaster->compact(compactionVersion);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future<Version> getCommittedVersion(SimpleConfigConsumerImpl* self) {
|
ACTOR static Future<Version> getCommittedVersion(SimpleConfigConsumerImpl* self) {
|
||||||
ConfigFollowerGetCommittedVersionReply committedVersionReply =
|
state ConfigFollowerGetCommittedVersionReply committedVersionReply;
|
||||||
wait(self->cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{}));
|
if (self->cfi.hostname.present()) {
|
||||||
|
wait(store(committedVersionReply,
|
||||||
|
retryGetReplyFromHostname(ConfigFollowerGetCommittedVersionRequest{},
|
||||||
|
self->cfi.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGFOLLOWER_GETCOMMITTEDVERSION)));
|
||||||
|
} else {
|
||||||
|
wait(store(committedVersionReply,
|
||||||
|
self->cfi.getCommittedVersion.getReply(ConfigFollowerGetCommittedVersionRequest{})));
|
||||||
|
}
|
||||||
return committedVersionReply.lastCommitted;
|
return committedVersionReply.lastCommitted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,8 +77,18 @@ class SimpleConfigConsumerImpl {
|
||||||
state Version committedVersion = wait(getCommittedVersion(self));
|
state Version committedVersion = wait(getCommittedVersion(self));
|
||||||
ASSERT_GE(committedVersion, self->lastSeenVersion);
|
ASSERT_GE(committedVersion, self->lastSeenVersion);
|
||||||
if (committedVersion > self->lastSeenVersion) {
|
if (committedVersion > self->lastSeenVersion) {
|
||||||
ConfigFollowerGetChangesReply reply = wait(self->cfi.getChanges.getReply(
|
state ConfigFollowerGetChangesReply reply;
|
||||||
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion }));
|
if (self->cfi.hostname.present()) {
|
||||||
|
wait(store(reply,
|
||||||
|
retryGetReplyFromHostname(
|
||||||
|
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion },
|
||||||
|
self->cfi.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGFOLLOWER_GETCHANGES)));
|
||||||
|
} else {
|
||||||
|
wait(store(reply,
|
||||||
|
self->cfi.getChanges.getReply(
|
||||||
|
ConfigFollowerGetChangesRequest{ self->lastSeenVersion, committedVersion })));
|
||||||
|
}
|
||||||
++self->successfulChangeRequest;
|
++self->successfulChangeRequest;
|
||||||
for (const auto& versionedMutation : reply.changes) {
|
for (const auto& versionedMutation : reply.changes) {
|
||||||
TraceEvent te(SevDebug, "ConsumerFetchedMutation", self->id);
|
TraceEvent te(SevDebug, "ConsumerFetchedMutation", self->id);
|
||||||
|
@ -96,8 +120,17 @@ class SimpleConfigConsumerImpl {
|
||||||
|
|
||||||
ACTOR static Future<Void> getSnapshotAndChanges(SimpleConfigConsumerImpl* self, ConfigBroadcaster* broadcaster) {
|
ACTOR static Future<Void> getSnapshotAndChanges(SimpleConfigConsumerImpl* self, ConfigBroadcaster* broadcaster) {
|
||||||
state Version committedVersion = wait(getCommittedVersion(self));
|
state Version committedVersion = wait(getCommittedVersion(self));
|
||||||
ConfigFollowerGetSnapshotAndChangesReply reply = wait(
|
state ConfigFollowerGetSnapshotAndChangesReply reply;
|
||||||
self->cfi.getSnapshotAndChanges.getReply(ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion }));
|
if (self->cfi.hostname.present()) {
|
||||||
|
wait(store(reply,
|
||||||
|
retryGetReplyFromHostname(ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion },
|
||||||
|
self->cfi.hostname.get(),
|
||||||
|
WLTOKEN_CONFIGFOLLOWER_GETSNAPSHOTANDCHANGES)));
|
||||||
|
} else {
|
||||||
|
wait(store(reply,
|
||||||
|
self->cfi.getSnapshotAndChanges.getReply(
|
||||||
|
ConfigFollowerGetSnapshotAndChangesRequest{ committedVersion })));
|
||||||
|
}
|
||||||
++self->snapshotRequest;
|
++self->snapshotRequest;
|
||||||
TraceEvent(SevDebug, "ConfigConsumerGotSnapshotAndChanges", self->id)
|
TraceEvent(SevDebug, "ConfigConsumerGotSnapshotAndChanges", self->id)
|
||||||
.detail("SnapshotVersion", reply.snapshotVersion)
|
.detail("SnapshotVersion", reply.snapshotVersion)
|
||||||
|
|
|
@ -1980,8 +1980,8 @@ void setupSimulatedSystem(std::vector<Future<Void>>* systemActors,
|
||||||
TEST(useIPv6); // Use IPv6
|
TEST(useIPv6); // Use IPv6
|
||||||
TEST(!useIPv6); // Use IPv4
|
TEST(!useIPv6); // Use IPv4
|
||||||
|
|
||||||
// TODO(renxuan): Use hostname 25% of the time, unless it is disabled
|
// Use hostname 25% of the time, unless it is disabled
|
||||||
bool useHostname = false; // !testConfig.disableHostname && deterministicRandom()->random01() < 0.25;
|
bool useHostname = !testConfig.disableHostname && deterministicRandom()->random01() < 0.25;
|
||||||
TEST(useHostname); // Use hostname
|
TEST(useHostname); // Use hostname
|
||||||
TEST(!useHostname); // Use IP address
|
TEST(!useHostname); // Use IP address
|
||||||
NetworkAddressFromHostname fromHostname =
|
NetworkAddressFromHostname fromHostname =
|
||||||
|
|
|
@ -831,7 +831,8 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto& coordinator : coordinators.ccr->getConnectionString().coordinators()) {
|
std::vector<NetworkAddress> addressVec = wait(coordinators.ccr->getConnectionString().tryResolveHostnames());
|
||||||
|
for (const auto& coordinator : addressVec) {
|
||||||
roles.addCoordinatorRole(coordinator);
|
roles.addCoordinatorRole(coordinator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1689,8 +1690,7 @@ static JsonBuilderObject configurationFetcher(Optional<DatabaseConfiguration> co
|
||||||
}
|
}
|
||||||
statusObj["excluded_servers"] = excludedServersArr;
|
statusObj["excluded_servers"] = excludedServersArr;
|
||||||
}
|
}
|
||||||
std::vector<ClientLeaderRegInterface> coordinatorLeaderServers = coordinators.clientLeaderServers;
|
int count = coordinators.clientLeaderServers.size();
|
||||||
int count = coordinatorLeaderServers.size();
|
|
||||||
statusObj["coordinators_count"] = count;
|
statusObj["coordinators_count"] = count;
|
||||||
} catch (Error&) {
|
} catch (Error&) {
|
||||||
incomplete_reasons->insert("Could not retrieve all configuration status information.");
|
incomplete_reasons->insert("Could not retrieve all configuration status information.");
|
||||||
|
@ -2505,7 +2505,8 @@ static JsonBuilderArray tlogFetcher(int* logFaultTolerance,
|
||||||
|
|
||||||
static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration configuration,
|
static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration configuration,
|
||||||
ServerCoordinators coordinators,
|
ServerCoordinators coordinators,
|
||||||
std::vector<WorkerDetails>& workers,
|
const std::vector<NetworkAddress>& coordinatorAddresses,
|
||||||
|
const std::vector<WorkerDetails>& workers,
|
||||||
int extraTlogEligibleZones,
|
int extraTlogEligibleZones,
|
||||||
int minStorageReplicasRemaining,
|
int minStorageReplicasRemaining,
|
||||||
int oldLogFaultTolerance,
|
int oldLogFaultTolerance,
|
||||||
|
@ -2521,11 +2522,11 @@ static JsonBuilderObject faultToleranceStatusFetcher(DatabaseConfiguration confi
|
||||||
int maxCoordinatorFailures = (coordinators.clientLeaderServers.size() - 1) / 2;
|
int maxCoordinatorFailures = (coordinators.clientLeaderServers.size() - 1) / 2;
|
||||||
|
|
||||||
std::map<NetworkAddress, StringRef> workerZones;
|
std::map<NetworkAddress, StringRef> workerZones;
|
||||||
for (auto& worker : workers) {
|
for (const auto& worker : workers) {
|
||||||
workerZones[worker.interf.address()] = worker.interf.locality.zoneId().orDefault(LiteralStringRef(""));
|
workerZones[worker.interf.address()] = worker.interf.locality.zoneId().orDefault(LiteralStringRef(""));
|
||||||
}
|
}
|
||||||
std::map<StringRef, int> coordinatorZoneCounts;
|
std::map<StringRef, int> coordinatorZoneCounts;
|
||||||
for (auto& coordinator : coordinators.ccr->getConnectionString().coordinators()) {
|
for (const auto& coordinator : coordinatorAddresses) {
|
||||||
auto zone = workerZones[coordinator];
|
auto zone = workerZones[coordinator];
|
||||||
coordinatorZoneCounts[zone] += 1;
|
coordinatorZoneCounts[zone] += 1;
|
||||||
}
|
}
|
||||||
|
@ -3061,6 +3062,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
||||||
state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
|
state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
|
||||||
wait(success(primaryDCFO));
|
wait(success(primaryDCFO));
|
||||||
|
|
||||||
|
std::vector<NetworkAddress> coordinatorAddresses =
|
||||||
|
wait(coordinators.ccr->getConnectionString().tryResolveHostnames());
|
||||||
|
|
||||||
int logFaultTolerance = 100;
|
int logFaultTolerance = 100;
|
||||||
if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) {
|
if (db->get().recoveryState >= RecoveryState::ACCEPTING_COMMITS) {
|
||||||
statusObj["logs"] = tlogFetcher(&logFaultTolerance, db, address_workers);
|
statusObj["logs"] = tlogFetcher(&logFaultTolerance, db, address_workers);
|
||||||
|
@ -3070,6 +3074,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
||||||
statusObj["fault_tolerance"] =
|
statusObj["fault_tolerance"] =
|
||||||
faultToleranceStatusFetcher(configuration.get(),
|
faultToleranceStatusFetcher(configuration.get(),
|
||||||
coordinators,
|
coordinators,
|
||||||
|
coordinatorAddresses,
|
||||||
workers,
|
workers,
|
||||||
extraTlogEligibleZones,
|
extraTlogEligibleZones,
|
||||||
minStorageReplicasRemaining,
|
minStorageReplicasRemaining,
|
||||||
|
|
|
@ -859,9 +859,9 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
|
||||||
NetworkAddressList publicNetworkAddresses;
|
NetworkAddressList publicNetworkAddresses;
|
||||||
NetworkAddressList listenNetworkAddresses;
|
NetworkAddressList listenNetworkAddresses;
|
||||||
|
|
||||||
connectionRecord.resolveHostnamesBlocking();
|
std::vector<Hostname>& hostnames = connectionRecord.getConnectionString().hostnames;
|
||||||
auto& coordinators = connectionRecord.getConnectionString().coordinators();
|
const std::vector<NetworkAddress>& coords = connectionRecord.getConnectionString().coordinators();
|
||||||
ASSERT(coordinators.size() > 0);
|
ASSERT(hostnames.size() + coords.size() > 0);
|
||||||
|
|
||||||
for (int ii = 0; ii < publicAddressStrs.size(); ++ii) {
|
for (int ii = 0; ii < publicAddressStrs.size(); ++ii) {
|
||||||
const std::string& publicAddressStr = publicAddressStrs[ii];
|
const std::string& publicAddressStr = publicAddressStrs[ii];
|
||||||
|
@ -930,13 +930,26 @@ std::pair<NetworkAddressList, NetworkAddressList> buildNetworkAddresses(
|
||||||
listenNetworkAddresses.secondaryAddress = currentListenAddress;
|
listenNetworkAddresses.secondaryAddress = currentListenAddress;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool hasSameCoord = std::all_of(coordinators.begin(), coordinators.end(), [&](const NetworkAddress& address) {
|
bool matchCoordinatorsTls = std::all_of(coords.begin(), coords.end(), [&](const NetworkAddress& address) {
|
||||||
if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) {
|
if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) {
|
||||||
return address.isTLS() == currentPublicAddress.isTLS();
|
return address.isTLS() == currentPublicAddress.isTLS();
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
if (!hasSameCoord) {
|
// If true, further check hostnames.
|
||||||
|
if (matchCoordinatorsTls) {
|
||||||
|
matchCoordinatorsTls = std::all_of(hostnames.begin(), hostnames.end(), [&](Hostname& hostname) {
|
||||||
|
Optional<NetworkAddress> resolvedAddress = hostname.resolveBlocking();
|
||||||
|
if (resolvedAddress.present()) {
|
||||||
|
NetworkAddress address = resolvedAddress.get();
|
||||||
|
if (address.ip == currentPublicAddress.ip && address.port == currentPublicAddress.port) {
|
||||||
|
return address.isTLS() == currentPublicAddress.isTLS();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (!matchCoordinatorsTls) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"ERROR: TLS state of public address %s does not match in coordinator list.\n",
|
"ERROR: TLS state of public address %s does not match in coordinator list.\n",
|
||||||
publicAddressStr.c_str());
|
publicAddressStr.c_str());
|
||||||
|
|
|
@ -3455,7 +3455,8 @@ ACTOR Future<GetRangeReqAndResultRef> quickGetKeyValues(
|
||||||
tr.setVersion(version);
|
tr.setVersion(version);
|
||||||
// TODO: is DefaultPromiseEndpoint the best priority for this?
|
// TODO: is DefaultPromiseEndpoint the best priority for this?
|
||||||
tr.trState->taskID = TaskPriority::DefaultPromiseEndpoint;
|
tr.trState->taskID = TaskPriority::DefaultPromiseEndpoint;
|
||||||
Future<RangeResult> rangeResultFuture = tr.getRange(prefixRange(prefix), Snapshot::True);
|
Future<RangeResult> rangeResultFuture =
|
||||||
|
tr.getRange(prefixRange(prefix), GetRangeLimits::ROW_LIMIT_UNLIMITED, Snapshot::True);
|
||||||
// TODO: async in case it needs to read from other servers.
|
// TODO: async in case it needs to read from other servers.
|
||||||
RangeResult rangeResult = wait(rangeResultFuture);
|
RangeResult rangeResult = wait(rangeResultFuture);
|
||||||
a->dependsOn(rangeResult.arena());
|
a->dependsOn(rangeResult.arena());
|
||||||
|
|
|
@ -2977,21 +2977,40 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
|
||||||
Reference<IClusterConnectionRecord> connRecord,
|
Reference<IClusterConnectionRecord> connRecord,
|
||||||
Reference<AsyncVar<Value>> result,
|
Reference<AsyncVar<Value>> result,
|
||||||
MonitorLeaderInfo info) {
|
MonitorLeaderInfo info) {
|
||||||
state ClusterConnectionString ccf = info.intermediateConnRecord->getConnectionString();
|
ClusterConnectionString cs = info.intermediateConnRecord->getConnectionString();
|
||||||
state std::vector<NetworkAddress> addrs = ccf.coordinators();
|
state int coordinatorsSize = cs.hostnames.size() + cs.coordinators().size();
|
||||||
state ElectionResultRequest request;
|
state ElectionResultRequest request;
|
||||||
state int index = 0;
|
state int index = 0;
|
||||||
state int successIndex = 0;
|
state int successIndex = 0;
|
||||||
request.key = ccf.clusterKey();
|
state std::vector<LeaderElectionRegInterface> leaderElectionServers;
|
||||||
request.coordinators = ccf.coordinators();
|
|
||||||
|
|
||||||
deterministicRandom()->randomShuffle(addrs);
|
leaderElectionServers.reserve(coordinatorsSize);
|
||||||
|
for (const auto& h : cs.hostnames) {
|
||||||
|
leaderElectionServers.push_back(LeaderElectionRegInterface(h));
|
||||||
|
}
|
||||||
|
for (const auto& c : cs.coordinators()) {
|
||||||
|
leaderElectionServers.push_back(LeaderElectionRegInterface(c));
|
||||||
|
}
|
||||||
|
deterministicRandom()->randomShuffle(leaderElectionServers);
|
||||||
|
|
||||||
|
request.key = cs.clusterKey();
|
||||||
|
request.hostnames = cs.hostnames;
|
||||||
|
request.coordinators = cs.coordinators();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
LeaderElectionRegInterface interf(addrs[index]);
|
LeaderElectionRegInterface interf = leaderElectionServers[index];
|
||||||
|
bool usingHostname = interf.hostname.present();
|
||||||
request.reply = ReplyPromise<Optional<LeaderInfo>>();
|
request.reply = ReplyPromise<Optional<LeaderInfo>>();
|
||||||
|
|
||||||
ErrorOr<Optional<LeaderInfo>> leader = wait(interf.electionResult.tryGetReply(request));
|
state ErrorOr<Optional<LeaderInfo>> leader;
|
||||||
|
if (usingHostname) {
|
||||||
|
wait(store(
|
||||||
|
leader,
|
||||||
|
tryGetReplyFromHostname(request, interf.hostname.get(), WLTOKEN_LEADERELECTIONREG_ELECTIONRESULT)));
|
||||||
|
} else {
|
||||||
|
wait(store(leader, interf.electionResult.tryGetReply(request)));
|
||||||
|
}
|
||||||
|
|
||||||
if (leader.present()) {
|
if (leader.present()) {
|
||||||
if (leader.get().present()) {
|
if (leader.get().present()) {
|
||||||
if (leader.get().get().forward) {
|
if (leader.get().get().forward) {
|
||||||
|
@ -3027,14 +3046,9 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
|
||||||
}
|
}
|
||||||
successIndex = index;
|
successIndex = index;
|
||||||
} else {
|
} else {
|
||||||
if (leader.isError() && leader.getError().code() == error_code_coordinators_changed) {
|
index = (index + 1) % coordinatorsSize;
|
||||||
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
|
|
||||||
throw coordinators_changed();
|
|
||||||
}
|
|
||||||
index = (index + 1) % addrs.size();
|
|
||||||
if (index == successIndex) {
|
if (index == successIndex) {
|
||||||
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
|
wait(delay(CLIENT_KNOBS->COORDINATOR_RECONNECTION_DELAY));
|
||||||
throw coordinators_changed();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3042,22 +3056,11 @@ ACTOR Future<MonitorLeaderInfo> monitorLeaderWithDelayedCandidacyImplOneGenerati
|
||||||
|
|
||||||
ACTOR Future<Void> monitorLeaderWithDelayedCandidacyImplInternal(Reference<IClusterConnectionRecord> connRecord,
|
ACTOR Future<Void> monitorLeaderWithDelayedCandidacyImplInternal(Reference<IClusterConnectionRecord> connRecord,
|
||||||
Reference<AsyncVar<Value>> outSerializedLeaderInfo) {
|
Reference<AsyncVar<Value>> outSerializedLeaderInfo) {
|
||||||
wait(connRecord->resolveHostnames());
|
|
||||||
state MonitorLeaderInfo info(connRecord);
|
state MonitorLeaderInfo info(connRecord);
|
||||||
loop {
|
loop {
|
||||||
try {
|
MonitorLeaderInfo _info =
|
||||||
wait(info.intermediateConnRecord->resolveHostnames());
|
wait(monitorLeaderWithDelayedCandidacyImplOneGeneration(connRecord, outSerializedLeaderInfo, info));
|
||||||
MonitorLeaderInfo _info =
|
info = _info;
|
||||||
wait(monitorLeaderWithDelayedCandidacyImplOneGeneration(connRecord, outSerializedLeaderInfo, info));
|
|
||||||
info = _info;
|
|
||||||
} catch (Error& e) {
|
|
||||||
if (e.code() == error_code_coordinators_changed) {
|
|
||||||
TraceEvent("MonitorLeaderWithDelayedCandidacyCoordinatorsChanged").suppressFor(1.0);
|
|
||||||
info.intermediateConnRecord->getConnectionString().resetToUnresolved();
|
|
||||||
} else {
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3191,6 +3194,7 @@ ACTOR Future<Void> fdbd(Reference<IClusterConnectionRecord> connRecord,
|
||||||
actors.push_back(serveProcess());
|
actors.push_back(serveProcess());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
ServerCoordinators coordinators(connRecord);
|
||||||
if (g_network->isSimulated()) {
|
if (g_network->isSimulated()) {
|
||||||
whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,";
|
whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,";
|
||||||
}
|
}
|
||||||
|
|
|
@ -2096,7 +2096,8 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
state ClusterConnectionString old(currentKey.get().toString());
|
ClusterConnectionString old(currentKey.get().toString());
|
||||||
|
state std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
|
||||||
|
|
||||||
std::vector<ProcessData> workers = wait(::getWorkers(&tr));
|
std::vector<ProcessData> workers = wait(::getWorkers(&tr));
|
||||||
|
|
||||||
|
@ -2106,7 +2107,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::set<Optional<Standalone<StringRef>>> checkDuplicates;
|
std::set<Optional<Standalone<StringRef>>> checkDuplicates;
|
||||||
for (const auto& addr : old.coordinators()) {
|
for (const auto& addr : oldCoordinators) {
|
||||||
auto findResult = addr_locality.find(addr);
|
auto findResult = addr_locality.find(addr);
|
||||||
if (findResult != addr_locality.end()) {
|
if (findResult != addr_locality.end()) {
|
||||||
if (checkDuplicates.count(findResult->second.zoneId())) {
|
if (checkDuplicates.count(findResult->second.zoneId())) {
|
||||||
|
|
|
@ -106,6 +106,7 @@ struct CycleWorkload : TestWorkload {
|
||||||
state Transaction tr(cx);
|
state Transaction tr(cx);
|
||||||
if (deterministicRandom()->random01() >= self->traceParentProbability) {
|
if (deterministicRandom()->random01() >= self->traceParentProbability) {
|
||||||
state Span span("CycleClient"_loc);
|
state Span span("CycleClient"_loc);
|
||||||
|
// TraceEvent("CycleTracingTransaction", span.context).log();
|
||||||
TraceEvent("CycleTracingTransaction", span.context).log();
|
TraceEvent("CycleTracingTransaction", span.context).log();
|
||||||
tr.setOption(FDBTransactionOptions::SPAN_PARENT,
|
tr.setOption(FDBTransactionOptions::SPAN_PARENT,
|
||||||
BinaryWriter::toValue(span.context, Unversioned()));
|
BinaryWriter::toValue(span.context, Unversioned()));
|
||||||
|
|
|
@ -132,7 +132,7 @@ struct DataLossRecoveryWorkload : TestWorkload {
|
||||||
} else {
|
} else {
|
||||||
tr.clear(key);
|
tr.clear(key);
|
||||||
}
|
}
|
||||||
wait(timeoutError(tr.commit(), 30.0));
|
wait(tr.commit());
|
||||||
break;
|
break;
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
wait(tr.onError(e));
|
wait(tr.onError(e));
|
||||||
|
|
|
@ -329,9 +329,7 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
|
||||||
for (int j = i; j < end; j++) {
|
for (int j = i; j < end; j++) {
|
||||||
if (deterministicRandom()->random01() < self->initialKeyDensity) {
|
if (deterministicRandom()->random01() < self->initialKeyDensity) {
|
||||||
Key key = self->getKeyForIndex(tenantNum, j);
|
Key key = self->getKeyForIndex(tenantNum, j);
|
||||||
if (key.size() <= (key.startsWith(systemKeys.begin)
|
if (key.size() <= getMaxWriteKeySize(key, false)) {
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
|
|
||||||
Value value = self->getRandomValue();
|
Value value = self->getRandomValue();
|
||||||
value = value.substr(
|
value = value.substr(
|
||||||
0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT));
|
0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT));
|
||||||
|
@ -1091,24 +1089,22 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
|
||||||
pos = littleEndian32(*(int32_t*)&value.end()[-4]);
|
pos = littleEndian32(*(int32_t*)&value.end()[-4]);
|
||||||
}
|
}
|
||||||
|
|
||||||
contract = {
|
contract = { std::make_pair(error_code_key_too_large,
|
||||||
std::make_pair(error_code_key_too_large,
|
key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
|
||||||
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin)
|
: key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
: ExceptionContract::Never),
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT))),
|
std::make_pair(error_code_value_too_large,
|
||||||
std::make_pair(error_code_value_too_large,
|
ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)),
|
||||||
ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)),
|
std::make_pair(error_code_invalid_mutation_type,
|
||||||
std::make_pair(
|
ExceptionContract::requiredIf(!isValidMutationType(op) ||
|
||||||
error_code_invalid_mutation_type,
|
!isAtomicOp((MutationRef::Type)op))),
|
||||||
ExceptionContract::requiredIf(!isValidMutationType(op) || !isAtomicOp((MutationRef::Type)op))),
|
std::make_pair(error_code_key_outside_legal_range,
|
||||||
std::make_pair(error_code_key_outside_legal_range,
|
ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))),
|
||||||
ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))),
|
std::make_pair(error_code_client_invalid_operation,
|
||||||
std::make_pair(
|
ExceptionContract::requiredIf((op == MutationRef::SetVersionstampedKey &&
|
||||||
error_code_client_invalid_operation,
|
(pos < 0 || pos + 10 > key.size() - 4)) ||
|
||||||
ExceptionContract::requiredIf(
|
(op == MutationRef::SetVersionstampedValue &&
|
||||||
(op == MutationRef::SetVersionstampedKey && (pos < 0 || pos + 10 > key.size() - 4)) ||
|
(pos < 0 || pos + 10 > value.size() - 4)))) };
|
||||||
(op == MutationRef::SetVersionstampedValue && (pos < 0 || pos + 10 > value.size() - 4))))
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void callback(Reference<ITransaction> tr) override { tr->atomicOp(key, value, (FDBMutationTypes::Option)op); }
|
void callback(Reference<ITransaction> tr) override { tr->atomicOp(key, value, (FDBMutationTypes::Option)op); }
|
||||||
|
@ -1131,11 +1127,10 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
|
||||||
key = makeKey();
|
key = makeKey();
|
||||||
}
|
}
|
||||||
value = makeValue();
|
value = makeValue();
|
||||||
contract = { std::make_pair(
|
contract = { std::make_pair(error_code_key_too_large,
|
||||||
error_code_key_too_large,
|
key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
|
||||||
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin)
|
: key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
: ExceptionContract::Never),
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT))),
|
|
||||||
std::make_pair(error_code_value_too_large,
|
std::make_pair(error_code_value_too_large,
|
||||||
ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)),
|
ExceptionContract::requiredIf(value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)),
|
||||||
std::make_pair(error_code_key_outside_legal_range,
|
std::make_pair(error_code_key_outside_legal_range,
|
||||||
|
@ -1268,11 +1263,11 @@ struct FuzzApiCorrectnessWorkload : TestWorkload {
|
||||||
TestWatch(unsigned int id, FuzzApiCorrectnessWorkload* workload, Reference<ITransaction> tr)
|
TestWatch(unsigned int id, FuzzApiCorrectnessWorkload* workload, Reference<ITransaction> tr)
|
||||||
: BaseTest(id, workload, "TestWatch") {
|
: BaseTest(id, workload, "TestWatch") {
|
||||||
key = makeKey();
|
key = makeKey();
|
||||||
contract = { std::make_pair(
|
printf("Watching: %d %s\n", key.size(), printable(key.substr(0, std::min(key.size(), 20))).c_str());
|
||||||
error_code_key_too_large,
|
contract = { std::make_pair(error_code_key_too_large,
|
||||||
ExceptionContract::requiredIf(key.size() > (key.startsWith(systemKeys.begin)
|
key.size() > getMaxWriteKeySize(key, true) ? ExceptionContract::Always
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
: key.size() > getMaxWriteKeySize(key, false) ? ExceptionContract::Possible
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT))),
|
: ExceptionContract::Never),
|
||||||
std::make_pair(error_code_watches_disabled, ExceptionContract::Possible),
|
std::make_pair(error_code_watches_disabled, ExceptionContract::Possible),
|
||||||
std::make_pair(error_code_key_outside_legal_range,
|
std::make_pair(error_code_key_outside_legal_range,
|
||||||
ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))),
|
ExceptionContract::requiredIf((key >= workload->getMaxKey(tr)))),
|
||||||
|
|
|
@ -541,7 +541,12 @@ struct RemoveServersSafelyWorkload : TestWorkload {
|
||||||
state AddressExclusion coordExcl;
|
state AddressExclusion coordExcl;
|
||||||
// Exclude a coordinator under buggify, but only if fault tolerance is > 0 and kill set is non-empty already
|
// Exclude a coordinator under buggify, but only if fault tolerance is > 0 and kill set is non-empty already
|
||||||
if (BUGGIFY && toKill.size()) {
|
if (BUGGIFY && toKill.size()) {
|
||||||
std::vector<NetworkAddress> coordinators = wait(getCoordinators(cx));
|
Optional<ClusterConnectionString> csOptional = wait(getConnectionString(cx));
|
||||||
|
state std::vector<NetworkAddress> coordinators;
|
||||||
|
if (csOptional.present()) {
|
||||||
|
ClusterConnectionString cs = csOptional.get();
|
||||||
|
wait(store(coordinators, cs.tryResolveHostnames()));
|
||||||
|
}
|
||||||
if (coordinators.size() > 2) {
|
if (coordinators.size() > 2) {
|
||||||
auto randomCoordinator = deterministicRandom()->randomChoice(coordinators);
|
auto randomCoordinator = deterministicRandom()->randomChoice(coordinators);
|
||||||
coordExcl = AddressExclusion(randomCoordinator.ip, randomCoordinator.port);
|
coordExcl = AddressExclusion(randomCoordinator.ip, randomCoordinator.port);
|
||||||
|
|
|
@ -957,9 +957,9 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
||||||
boost::split(
|
boost::split(
|
||||||
process_addresses, coordinator_processes_key.get().toString(), [](char c) { return c == ','; });
|
process_addresses, coordinator_processes_key.get().toString(), [](char c) { return c == ','; });
|
||||||
ASSERT(process_addresses.size() == cs.coordinators().size() + cs.hostnames.size());
|
ASSERT(process_addresses.size() == cs.coordinators().size() + cs.hostnames.size());
|
||||||
wait(cs.resolveHostnames());
|
|
||||||
// compare the coordinator process network addresses one by one
|
// compare the coordinator process network addresses one by one
|
||||||
for (const auto& network_address : cs.coordinators()) {
|
std::vector<NetworkAddress> coordinators = wait(cs.tryResolveHostnames());
|
||||||
|
for (const auto& network_address : coordinators) {
|
||||||
ASSERT(std::find(process_addresses.begin(), process_addresses.end(), network_address.toString()) !=
|
ASSERT(std::find(process_addresses.begin(), process_addresses.end(), network_address.toString()) !=
|
||||||
process_addresses.end());
|
process_addresses.end());
|
||||||
}
|
}
|
||||||
|
@ -1077,19 +1077,20 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload {
|
||||||
tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
tx->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||||
Optional<Value> res = wait(tx->get(coordinatorsKey));
|
Optional<Value> res = wait(tx->get(coordinatorsKey));
|
||||||
ASSERT(res.present()); // Otherwise, database is in a bad state
|
ASSERT(res.present()); // Otherwise, database is in a bad state
|
||||||
state ClusterConnectionString csNew(res.get().toString());
|
ClusterConnectionString csNew(res.get().toString());
|
||||||
wait(csNew.resolveHostnames());
|
// verify the cluster decription
|
||||||
ASSERT(csNew.coordinators().size() == old_coordinators_processes.size() + 1);
|
ASSERT(new_cluster_description == csNew.clusterKeyName().toString());
|
||||||
|
ASSERT(csNew.hostnames.size() + csNew.coordinators().size() ==
|
||||||
|
old_coordinators_processes.size() + 1);
|
||||||
|
std::vector<NetworkAddress> newCoordinators = wait(csNew.tryResolveHostnames());
|
||||||
// verify the coordinators' addresses
|
// verify the coordinators' addresses
|
||||||
for (const auto& network_address : csNew.coordinators()) {
|
for (const auto& network_address : newCoordinators) {
|
||||||
std::string address_str = network_address.toString();
|
std::string address_str = network_address.toString();
|
||||||
ASSERT(std::find(old_coordinators_processes.begin(),
|
ASSERT(std::find(old_coordinators_processes.begin(),
|
||||||
old_coordinators_processes.end(),
|
old_coordinators_processes.end(),
|
||||||
address_str) != old_coordinators_processes.end() ||
|
address_str) != old_coordinators_processes.end() ||
|
||||||
new_coordinator_process == address_str);
|
new_coordinator_process == address_str);
|
||||||
}
|
}
|
||||||
// verify the cluster decription
|
|
||||||
ASSERT(new_cluster_description == csNew.clusterKeyName().toString());
|
|
||||||
tx->reset();
|
tx->reset();
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
wait(tx->onError(e));
|
wait(tx->onError(e));
|
||||||
|
|
|
@ -30,7 +30,7 @@ void forceLinkMemcpyTests();
|
||||||
void forceLinkMemcpyPerfTests();
|
void forceLinkMemcpyPerfTests();
|
||||||
#if (!defined(TLS_DISABLED) && !defined(_WIN32))
|
#if (!defined(TLS_DISABLED) && !defined(_WIN32))
|
||||||
void forceLinkStreamCipherTests();
|
void forceLinkStreamCipherTests();
|
||||||
void forceLinkBLockCiherTests();
|
void forceLinkBlobCipherTests();
|
||||||
#endif
|
#endif
|
||||||
void forceLinkParallelStreamTests();
|
void forceLinkParallelStreamTests();
|
||||||
void forceLinkSimExternalConnectionTests();
|
void forceLinkSimExternalConnectionTests();
|
||||||
|
@ -39,6 +39,8 @@ void forceLinkSimKmsConnectorTests();
|
||||||
void forceLinkIThreadPoolTests();
|
void forceLinkIThreadPoolTests();
|
||||||
void forceLinkTokenSignTests();
|
void forceLinkTokenSignTests();
|
||||||
void forceLinkVersionVectorTests();
|
void forceLinkVersionVectorTests();
|
||||||
|
void forceLinkRESTClientTests();
|
||||||
|
void forceLinkRESTUtilsTests();
|
||||||
|
|
||||||
struct UnitTestWorkload : TestWorkload {
|
struct UnitTestWorkload : TestWorkload {
|
||||||
bool enabled;
|
bool enabled;
|
||||||
|
@ -88,6 +90,8 @@ struct UnitTestWorkload : TestWorkload {
|
||||||
forceLinkIThreadPoolTests();
|
forceLinkIThreadPoolTests();
|
||||||
forceLinkTokenSignTests();
|
forceLinkTokenSignTests();
|
||||||
forceLinkVersionVectorTests();
|
forceLinkVersionVectorTests();
|
||||||
|
forceLinkRESTClientTests();
|
||||||
|
forceLinkRESTUtilsTests();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string description() const override { return "UnitTests"; }
|
std::string description() const override { return "UnitTests"; }
|
||||||
|
|
|
@ -653,9 +653,7 @@ struct WriteDuringReadWorkload : TestWorkload {
|
||||||
for (int j = i; j < end; j++) {
|
for (int j = i; j < end; j++) {
|
||||||
if (deterministicRandom()->random01() < self->initialKeyDensity) {
|
if (deterministicRandom()->random01() < self->initialKeyDensity) {
|
||||||
Key key = self->getKeyForIndex(j);
|
Key key = self->getKeyForIndex(j);
|
||||||
if (key.size() <= (key.startsWith(systemKeys.begin)
|
if (key.size() <= getMaxWriteKeySize(key, false)) {
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
|
|
||||||
Value value = self->getRandomValue();
|
Value value = self->getRandomValue();
|
||||||
value =
|
value =
|
||||||
value.substr(0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT));
|
value.substr(0, std::min<int>(value.size(), CLIENT_KNOBS->VALUE_SIZE_LIMIT));
|
||||||
|
@ -898,18 +896,10 @@ struct WriteDuringReadWorkload : TestWorkload {
|
||||||
tr.clear(range);
|
tr.clear(range);
|
||||||
if (!noConflict) {
|
if (!noConflict) {
|
||||||
KeyRangeRef conflict(
|
KeyRangeRef conflict(
|
||||||
range.begin.substr(0,
|
range.begin.substr(
|
||||||
std::min<int>(range.begin.size(),
|
0, std::min<int>(range.begin.size(), getMaxClearKeySize(range.begin) + 1)),
|
||||||
(range.begin.startsWith(systemKeys.begin)
|
range.end.substr(
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
0, std::min<int>(range.end.size(), getMaxClearKeySize(range.end) + 1)));
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1)),
|
|
||||||
range.end.substr(0,
|
|
||||||
std::min<int>(range.end.size(),
|
|
||||||
(range.end.startsWith(systemKeys.begin)
|
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1)));
|
|
||||||
self->addedConflicts.insert(conflict, true);
|
self->addedConflicts.insert(conflict, true);
|
||||||
}
|
}
|
||||||
self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin),
|
self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin),
|
||||||
|
@ -922,9 +912,7 @@ struct WriteDuringReadWorkload : TestWorkload {
|
||||||
if (noConflict)
|
if (noConflict)
|
||||||
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
|
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
|
||||||
tr.clear(key);
|
tr.clear(key);
|
||||||
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin)
|
if (!noConflict && key.size() <= getMaxClearKeySize(key)) {
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT)) {
|
|
||||||
self->addedConflicts.insert(key, true);
|
self->addedConflicts.insert(key, true);
|
||||||
}
|
}
|
||||||
self->memoryDatabase.erase(key);
|
self->memoryDatabase.erase(key);
|
||||||
|
@ -936,18 +924,9 @@ struct WriteDuringReadWorkload : TestWorkload {
|
||||||
//TraceEvent("WDRAddWriteConflict").detail("Range", range);
|
//TraceEvent("WDRAddWriteConflict").detail("Range", range);
|
||||||
tr.addWriteConflictRange(range);
|
tr.addWriteConflictRange(range);
|
||||||
KeyRangeRef conflict(
|
KeyRangeRef conflict(
|
||||||
range.begin.substr(0,
|
range.begin.substr(
|
||||||
std::min<int>(range.begin.size(),
|
0, std::min<int>(range.begin.size(), getMaxKeySize(range.begin) + 1)),
|
||||||
(range.begin.startsWith(systemKeys.begin)
|
range.end.substr(0, std::min<int>(range.end.size(), getMaxKeySize(range.end) + 1)));
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1)),
|
|
||||||
range.end.substr(0,
|
|
||||||
std::min<int>(range.end.size(),
|
|
||||||
(range.end.startsWith(systemKeys.begin)
|
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1)));
|
|
||||||
self->addedConflicts.insert(conflict, true);
|
self->addedConflicts.insert(conflict, true);
|
||||||
} else if (operationType == 8 && !disableDelay) {
|
} else if (operationType == 8 && !disableDelay) {
|
||||||
double maxTime = 6.0;
|
double maxTime = 6.0;
|
||||||
|
@ -991,18 +970,10 @@ struct WriteDuringReadWorkload : TestWorkload {
|
||||||
tr.atomicOp(versionStampKey, value, MutationRef::SetVersionstampedKey);
|
tr.atomicOp(versionStampKey, value, MutationRef::SetVersionstampedKey);
|
||||||
tr.clear(range);
|
tr.clear(range);
|
||||||
KeyRangeRef conflict(
|
KeyRangeRef conflict(
|
||||||
range.begin.substr(0,
|
range.begin.substr(
|
||||||
std::min<int>(range.begin.size(),
|
0, std::min<int>(range.begin.size(), getMaxClearKeySize(range.begin) + 1)),
|
||||||
(range.begin.startsWith(systemKeys.begin)
|
range.end.substr(
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
0, std::min<int>(range.end.size(), getMaxClearKeySize(range.end) + 1)));
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1)),
|
|
||||||
range.end.substr(0,
|
|
||||||
std::min<int>(range.end.size(),
|
|
||||||
(range.end.startsWith(systemKeys.begin)
|
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
||||||
1)));
|
|
||||||
self->addedConflicts.insert(conflict, true);
|
self->addedConflicts.insert(conflict, true);
|
||||||
self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin),
|
self->memoryDatabase.erase(self->memoryDatabase.lower_bound(range.begin),
|
||||||
self->memoryDatabase.lower_bound(range.end));
|
self->memoryDatabase.lower_bound(range.end));
|
||||||
|
@ -1043,10 +1014,9 @@ struct WriteDuringReadWorkload : TestWorkload {
|
||||||
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
|
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
|
||||||
tr.atomicOp(key, value, opType);
|
tr.atomicOp(key, value, opType);
|
||||||
//TraceEvent("WDRAtomicOpSuccess").detail("Key", key).detail("Value", value.size());
|
//TraceEvent("WDRAtomicOpSuccess").detail("Key", key).detail("Value", value.size());
|
||||||
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin)
|
if (!noConflict && key.size() <= getMaxWriteKeySize(key, false)) {
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
||||||
self->addedConflicts.insert(key, true);
|
self->addedConflicts.insert(key, true);
|
||||||
|
}
|
||||||
Optional<Value> existing = self->memoryGet(&self->memoryDatabase, key);
|
Optional<Value> existing = self->memoryGet(&self->memoryDatabase, key);
|
||||||
self->memoryDatabase[key] =
|
self->memoryDatabase[key] =
|
||||||
self->applyAtomicOp(existing.present() ? Optional<StringRef>(existing.get())
|
self->applyAtomicOp(existing.present() ? Optional<StringRef>(existing.get())
|
||||||
|
@ -1063,10 +1033,9 @@ struct WriteDuringReadWorkload : TestWorkload {
|
||||||
if (noConflict)
|
if (noConflict)
|
||||||
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
|
tr.setOption(FDBTransactionOptions::NEXT_WRITE_NO_WRITE_CONFLICT_RANGE);
|
||||||
tr.set(key, value);
|
tr.set(key, value);
|
||||||
if (!noConflict && key.size() <= (key.startsWith(systemKeys.begin)
|
if (!noConflict && key.size() <= getMaxWriteKeySize(key, false)) {
|
||||||
? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT
|
|
||||||
: CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
||||||
self->addedConflicts.insert(key, true);
|
self->addedConflicts.insert(key, true);
|
||||||
|
}
|
||||||
//TraceEvent("WDRSetSuccess").detail("Key", key).detail("Value", value.size());
|
//TraceEvent("WDRSetSuccess").detail("Key", key).detail("Value", value.size());
|
||||||
self->memoryDatabase[key] = value;
|
self->memoryDatabase[key] = value;
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,9 @@
|
||||||
#include "flow/flow.h"
|
#include "flow/flow.h"
|
||||||
#include "flow/genericactors.actor.h"
|
#include "flow/genericactors.actor.h"
|
||||||
|
|
||||||
|
#if defined(HAVE_WOLFSSL)
|
||||||
|
#include <wolfssl/options.h>
|
||||||
|
#endif
|
||||||
#include <openssl/aes.h>
|
#include <openssl/aes.h>
|
||||||
#include <openssl/engine.h>
|
#include <openssl/engine.h>
|
||||||
#include <openssl/evp.h>
|
#include <openssl/evp.h>
|
||||||
|
|
|
@ -84,6 +84,10 @@ set(FLOW_SRCS
|
||||||
actorcompiler.h
|
actorcompiler.h
|
||||||
crc32c.h
|
crc32c.h
|
||||||
crc32c.cpp
|
crc32c.cpp
|
||||||
|
ppc-asm.h
|
||||||
|
crc32.S
|
||||||
|
crc32_wrapper.h
|
||||||
|
crc32_wrapper.c
|
||||||
error_definitions.h
|
error_definitions.h
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h
|
${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h
|
||||||
flat_buffers.cpp
|
flat_buffers.cpp
|
||||||
|
@ -172,6 +176,10 @@ if(NOT WITH_TLS)
|
||||||
else()
|
else()
|
||||||
target_link_libraries(flow PUBLIC OpenSSL::SSL)
|
target_link_libraries(flow PUBLIC OpenSSL::SSL)
|
||||||
target_link_libraries(flow_sampling PUBLIC OpenSSL::SSL)
|
target_link_libraries(flow_sampling PUBLIC OpenSSL::SSL)
|
||||||
|
if(USE_WOLFSSL)
|
||||||
|
target_include_directories(flow SYSTEM BEFORE PUBLIC ${WOLFSSL_INCLUDE_DIR}/wolfssl)
|
||||||
|
target_include_directories(flow_sampling SYSTEM BEFORE PUBLIC ${WOLFSSL_INCLUDE_DIR}/wolfssl)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(flow PUBLIC Threads::Threads ${CMAKE_DL_LIBS})
|
target_link_libraries(flow PUBLIC Threads::Threads ${CMAKE_DL_LIBS})
|
||||||
target_link_libraries(flow_sampling PUBLIC Threads::Threads ${CMAKE_DL_LIBS})
|
target_link_libraries(flow_sampling PUBLIC Threads::Threads ${CMAKE_DL_LIBS})
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue