Merge remote-tracking branch 'upstream/master' into fdbcli-hints
This commit is contained in:
commit
9545091899
|
@ -179,9 +179,6 @@ set(SEED "0x${SEED_}" CACHE STRING "Random seed for testing")
|
|||
################################################################################
|
||||
|
||||
include(CompileBoost)
|
||||
if(WITH_TLS)
|
||||
add_subdirectory(FDBLibTLS)
|
||||
endif()
|
||||
add_subdirectory(flow)
|
||||
add_subdirectory(fdbrpc)
|
||||
add_subdirectory(fdbclient)
|
||||
|
|
11
Makefile
11
Makefile
|
@ -51,7 +51,7 @@ ifeq ($(PLATFORM),Linux)
|
|||
CXXFLAGS += -std=c++17
|
||||
|
||||
BOOST_BASEDIR ?= /opt
|
||||
TLS_LIBDIR ?= /usr/local/lib
|
||||
TLS_LIBDIR ?= /usr/local/lib64
|
||||
DLEXT := so
|
||||
java_DLEXT := so
|
||||
TARGET_LIBC_VERSION ?= 2.11
|
||||
|
@ -67,7 +67,7 @@ else ifeq ($(PLATFORM),Darwin)
|
|||
.LIBPATTERNS := lib%.dylib lib%.a
|
||||
|
||||
BOOST_BASEDIR ?= ${HOME}
|
||||
TLS_LIBDIR ?= /usr/local/lib
|
||||
TLS_LIBDIR ?= /usr/local/lib64
|
||||
DLEXT := dylib
|
||||
java_DLEXT := jnilib
|
||||
else
|
||||
|
@ -112,8 +112,8 @@ CFLAGS += -DTLS_DISABLED
|
|||
FDB_TLS_LIB :=
|
||||
TLS_LIBS :=
|
||||
else
|
||||
FDB_TLS_LIB := lib/libFDBLibTLS.a
|
||||
TLS_LIBS += $(addprefix $(TLS_LIBDIR)/,libtls.a libssl.a libcrypto.a)
|
||||
FDB_TLS_LIB :=
|
||||
TLS_LIBS += $(addprefix $(TLS_LIBDIR)/,libssl.a libcrypto.a)
|
||||
endif
|
||||
|
||||
CXXFLAGS += -Wno-deprecated -DBOOST_ERROR_CODE_HEADER_ONLY -DBOOST_SYSTEM_NO_DEPRECATED
|
||||
|
@ -126,9 +126,6 @@ VPATH += $(addprefix :,$(filter-out lib,$(patsubst -L%,%,$(filter -L%,$(LDFLAGS)
|
|||
|
||||
CS_PROJECTS := flow/actorcompiler flow/coveragetool fdbclient/vexillographer
|
||||
CPP_PROJECTS := flow fdbrpc fdbclient fdbbackup fdbserver fdbcli bindings/c bindings/java fdbmonitor bindings/flow/tester bindings/flow
|
||||
ifndef TLS_DISABLED
|
||||
CPP_PROJECTS += FDBLibTLS
|
||||
endif
|
||||
OTHER_PROJECTS := bindings/python bindings/ruby bindings/go
|
||||
|
||||
CS_MK_GENERATED := $(CS_PROJECTS:=/generated.mk)
|
||||
|
|
|
@ -38,6 +38,21 @@ else()
|
|||
endif()
|
||||
add_dependencies(fdb_c fdb_c_generated fdb_c_options)
|
||||
target_link_libraries(fdb_c PUBLIC $<BUILD_INTERFACE:fdbclient>)
|
||||
if(APPLE)
|
||||
set(symbols ${CMAKE_CURRENT_BINARY_DIR}/fdb_c.symbols)
|
||||
add_custom_command(OUTPUT ${symbols}
|
||||
COMMAND $<TARGET_FILE:Python::Interpreter> ${CMAKE_CURRENT_SOURCE_DIR}/symbolify.py
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/foundationdb/fdb_c.h
|
||||
${symbols}
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/symbolify.py ${CMAKE_CURRENT_SOURCE_DIR}/foundationdb/fdb_c.h
|
||||
COMMENT "Generate exported_symbols_list")
|
||||
add_custom_target(exported_symbols_list DEPENDS ${symbols})
|
||||
add_dependencies(fdb_c exported_symbols_list)
|
||||
target_link_options(fdb_c PRIVATE "LINKER:-no_weak_exports,-exported_symbols_list,${symbols}")
|
||||
elseif(WIN32)
|
||||
else()
|
||||
target_link_options(fdb_c PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/fdb_c.map,-z,nodelete")
|
||||
endif()
|
||||
target_include_directories(fdb_c PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
||||
|
|
|
@ -107,7 +107,12 @@ fdb_error_t fdb_network_set_option( FDBNetworkOption option,
|
|||
}
|
||||
|
||||
fdb_error_t fdb_setup_network_impl() {
|
||||
CATCH_AND_RETURN( API->setupNetwork(); );
|
||||
CATCH_AND_RETURN(
|
||||
try {
|
||||
API->setupNetwork();
|
||||
} catch (boost::system::system_error& e) {
|
||||
return error_code_tls_error;
|
||||
} );
|
||||
}
|
||||
|
||||
fdb_error_t fdb_setup_network_v13( const char* localAddress ) {
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
if __name__ == '__main__':
|
||||
import re
|
||||
import sys
|
||||
r = re.compile('DLLEXPORT[^(]*(fdb_[^(]*)[(]')
|
||||
(fdb_c_h, symbols_file) = sys.argv[1:]
|
||||
with open(fdb_c_h, 'r') as f:
|
||||
symbols = sorted(set('_' + m.group(1) for m in r.finditer(f.read())))
|
||||
with open(symbols_file, 'w') as f:
|
||||
f.write('\n'.join(symbols))
|
||||
f.write('\n')
|
|
@ -82,7 +82,7 @@ void fdb_flow_test() {
|
|||
fdb->setupNetwork();
|
||||
startThread(networkThread, fdb);
|
||||
|
||||
g_network = newNet2( false );
|
||||
g_network = newNet2(false);
|
||||
|
||||
openTraceFile(NetworkAddress(), 1000000, 1000000, ".");
|
||||
systemMonitor();
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
fdb_flow_tester_CFLAGS := -Ibindings/c $(fdbrpc_CFLAGS)
|
||||
fdb_flow_tester_LDFLAGS := -Llib $(fdbrpc_LDFLAGS) -lfdb_c
|
||||
fdb_flow_tester_LIBS := lib/libfdb_flow.a lib/libflow.a lib/libfdb_c.$(DLEXT)
|
||||
fdb_flow_tester_STATIC_LIBS := $(TLS_LIBS)
|
||||
|
||||
fdb_flow_tester: lib/libfdb_c.$(DLEXT)
|
||||
@mkdir -p bindings/flow/bin
|
||||
|
|
|
@ -54,7 +54,8 @@ type RangeOptions struct {
|
|||
// Reverse indicates that the read should be performed in lexicographic
|
||||
// (false) or reverse lexicographic (true) order. When Reverse is true and
|
||||
// Limit is non-zero, the last Limit key-value pairs in the range are
|
||||
// returned.
|
||||
// returned. Reading ranges in reverse is supported natively by the
|
||||
// database and should have minimal extra cost.
|
||||
Reverse bool
|
||||
}
|
||||
|
||||
|
|
|
@ -184,7 +184,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
|||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||
* will be limited starting at the end of the range.
|
||||
* @param reverse return results starting at the end of the range in reverse order
|
||||
* @param reverse return results starting at the end of the range in reverse order.
|
||||
* Reading ranges in reverse is supported natively by the database and should
|
||||
* have minimal extra cost.
|
||||
*
|
||||
* @return a handle to access the results of the asynchronous call
|
||||
*/
|
||||
|
@ -205,7 +207,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
|||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||
* will be limited starting at the end of the range.
|
||||
* @param reverse return results starting at the end of the range in reverse order
|
||||
* @param reverse return results starting at the end of the range in reverse order.
|
||||
* Reading ranges in reverse is supported natively by the database and should
|
||||
* have minimal extra cost.
|
||||
* @param mode provide a hint about how the results are to be used. This
|
||||
* can provide speed improvements or efficiency gains based on the caller's
|
||||
* knowledge of the upcoming access pattern.
|
||||
|
@ -272,7 +276,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
|||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||
* will be limited starting at the end of the range.
|
||||
* @param reverse return results starting at the end of the range in reverse order
|
||||
* @param reverse return results starting at the end of the range in reverse order.
|
||||
* Reading ranges in reverse is supported natively by the database and should
|
||||
* have minimal extra cost.
|
||||
*
|
||||
* @return a handle to access the results of the asynchronous call
|
||||
*/
|
||||
|
@ -293,7 +299,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
|||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||
* will be limited starting at the end of the range.
|
||||
* @param reverse return results starting at the end of the range in reverse order
|
||||
* @param reverse return results starting at the end of the range in reverse order.
|
||||
* Reading ranges in reverse is supported natively by the database and should
|
||||
* have minimal extra cost.
|
||||
* @param mode provide a hint about how the results are to be used. This
|
||||
* can provide speed improvements or efficiency gains based on the caller's
|
||||
* knowledge of the upcoming access pattern.
|
||||
|
@ -369,7 +377,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
|||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||
* will be limited starting at the end of the range.
|
||||
* @param reverse return results starting at the end of the range in reverse order
|
||||
* @param reverse return results starting at the end of the range in reverse order.
|
||||
* Reading ranges in reverse is supported natively by the database and should
|
||||
* have minimal extra cost.
|
||||
*
|
||||
* @return a handle to access the results of the asynchronous call
|
||||
*/
|
||||
|
@ -393,7 +403,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
|||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||
* will be limited starting at the end of the range.
|
||||
* @param reverse return results starting at the end of the range in reverse order
|
||||
* @param reverse return results starting at the end of the range in reverse order.
|
||||
* Reading ranges in reverse is supported natively by the database and should
|
||||
* have minimal extra cost.
|
||||
* @param mode provide a hint about how the results are to be used. This
|
||||
* can provide speed improvements or efficiency gains based on the caller's
|
||||
* knowledge of the upcoming access pattern.
|
||||
|
|
|
@ -817,9 +817,9 @@ public class DirectoryLayer implements Directory {
|
|||
|
||||
private static long unpackLittleEndian(byte[] bytes) {
|
||||
assert bytes.length == 8;
|
||||
int value = 0;
|
||||
long value = 0;
|
||||
for(int i = 0; i < 8; ++i) {
|
||||
value += (bytes[i] << (i * 8));
|
||||
value += (Byte.toUnsignedLong(bytes[i]) << (i * 8));
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
FROM centos:6
|
||||
LABEL version=0.1.9
|
||||
ENV DOCKER_IMAGEVER=0.1.9
|
||||
|
||||
# Install dependencies for developer tools, bindings,\
|
||||
# documentation, actorcompiler, and packaging tools\
|
||||
|
@ -8,9 +6,10 @@ RUN yum install -y yum-utils &&\
|
|||
yum-config-manager --enable rhel-server-rhscl-7-rpms &&\
|
||||
yum -y install centos-release-scl epel-release &&\
|
||||
yum -y install devtoolset-8-8.1-1.el6 java-1.8.0-openjdk-devel \
|
||||
devtoolset-8-gcc-8.3.1-3.1.el6 devtoolset-8-gcc-c++-8.3.1-3.1.el6 \
|
||||
rh-python36-python-devel devtoolset-8-valgrind-devel \
|
||||
mono-core rh-ruby24 golang python27 rpm-build debbuild \
|
||||
python-pip npm dos2unix valgrind-devel ccache distcc devtoolset-8-libubsan-devel libubsan-devel &&\
|
||||
python-pip dos2unix valgrind-devel ccache distcc devtoolset-8-libubsan-devel libubsan-devel &&\
|
||||
pip install boto3==1.1.1
|
||||
|
||||
USER root
|
||||
|
@ -19,32 +18,42 @@ RUN adduser --comment '' fdb && chown fdb /opt
|
|||
|
||||
# wget of bintray without forcing UTF-8 encoding results in 403 Forbidden
|
||||
RUN cd /opt/ &&\
|
||||
curl -L https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.bz2 > boost_1_67_0.tar.bz2 &&\
|
||||
echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost_1_67_0.tar.bz2" > boost-sha.txt &&\
|
||||
sha256sum -c boost-sha.txt &&\
|
||||
curl -L https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.bz2 -o boost_1_67_0.tar.bz2 &&\
|
||||
echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost_1_67_0.tar.bz2" > boost-sha-67.txt &&\
|
||||
sha256sum -c boost-sha-67.txt &&\
|
||||
tar -xjf boost_1_67_0.tar.bz2 &&\
|
||||
rm -rf boost_1_67_0.tar.bz2 boost-sha.txt boost_1_67_0/libs
|
||||
rm -rf boost_1_67_0.tar.bz2 boost-sha-67.txt boost_1_67_0/libs &&\
|
||||
curl -L https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.bz2 -o boost_1_72_0.tar.bz2 &&\
|
||||
echo "59c9b274bc451cf91a9ba1dd2c7fdcaf5d60b1b3aa83f2c9fa143417cc660722 boost_1_72_0.tar.bz2" > boost-sha-72.txt &&\
|
||||
sha256sum -c boost-sha-72.txt &&\
|
||||
tar -xjf boost_1_72_0.tar.bz2 &&\
|
||||
rm -rf boost_1_72_0.tar.bz2 boost-sha-72.txt boost_1_72_0/libs
|
||||
|
||||
# install cmake
|
||||
RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz > /tmp/cmake.tar.gz &&\
|
||||
RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz -o /tmp/cmake.tar.gz &&\
|
||||
echo "563a39e0a7c7368f81bfa1c3aff8b590a0617cdfe51177ddc808f66cc0866c76 /tmp/cmake.tar.gz" > /tmp/cmake-sha.txt &&\
|
||||
sha256sum -c /tmp/cmake-sha.txt &&\
|
||||
cd /tmp && tar xf cmake.tar.gz &&\
|
||||
cp -r cmake-3.13.4-Linux-x86_64/* /usr/local/ &&\
|
||||
rm -rf cmake.tar.gz cmake-3.13.4-Linux-x86_64 cmake-sha.txt
|
||||
|
||||
# install LibreSSL
|
||||
RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip > ninja.zip &&\
|
||||
# install Ninja
|
||||
RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip -o ninja.zip &&\
|
||||
unzip ninja.zip && cd ninja-1.9.0 && scl enable devtoolset-8 -- ./configure.py --bootstrap && cp ninja /usr/bin &&\
|
||||
cd .. && rm -rf ninja-1.9.0 ninja.zip &&\
|
||||
curl -L https://ftp.openbsd.org/pub/OpenBSD/LibreSSL/libressl-2.8.2.tar.gz > /tmp/libressl.tar.gz &&\
|
||||
cd /tmp && echo "b8cb31e59f1294557bfc80f2a662969bc064e83006ceef0574e2553a1c254fd5 libressl.tar.gz" > libressl-sha.txt &&\
|
||||
sha256sum -c libressl-sha.txt && tar xf libressl.tar.gz &&\
|
||||
cd libressl-2.8.2 && cd /tmp/libressl-2.8.2 && scl enable devtoolset-8 -- ./configure --prefix=/usr/local/stow/libressl CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
|
||||
cd /tmp/libressl-2.8.2 && scl enable devtoolset-8 -- make -j`nproc` install &&\
|
||||
rm -rf /tmp/libressl-2.8.2 /tmp/libressl.tar.gz
|
||||
cd .. && rm -rf ninja-1.9.0 ninja.zip
|
||||
|
||||
# install openssl
|
||||
RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1d.tar.gz -o openssl.tar.gz &&\
|
||||
echo "1e3a91bc1f9dfce01af26026f856e064eab4c8ee0a8f457b5ae30b40b8b711f2 openssl.tar.gz" > openssl-sha.txt &&\
|
||||
sha256sum -c openssl-sha.txt && tar -xzf openssl.tar.gz &&\
|
||||
cd openssl-1.1.1d && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
|
||||
scl enable devtoolset-8 -- make -j`nproc` && scl enable devtoolset-8 -- make -j1 install &&\
|
||||
ln -sv /usr/local/lib64/lib*.so.1.1 /usr/lib64/ &&\
|
||||
cd /tmp/ && rm -rf /tmp/openssl-1.1.1d /tmp/openssl.tar.gz
|
||||
|
||||
LABEL version=0.1.12
|
||||
ENV DOCKER_IMAGEVER=0.1.12
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
|
||||
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++
|
||||
CMD scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash
|
||||
CMD scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash
|
||||
|
|
|
@ -2,7 +2,7 @@ version: "3"
|
|||
|
||||
services:
|
||||
common: &common
|
||||
image: foundationdb/foundationdb-build:0.1.9
|
||||
image: foundationdb/foundationdb-build:0.1.12
|
||||
|
||||
build-setup: &build-setup
|
||||
<<: *common
|
||||
|
@ -36,11 +36,11 @@ services:
|
|||
|
||||
release-packages: &release-packages
|
||||
<<: *release-setup
|
||||
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages'
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages'
|
||||
|
||||
snapshot-packages: &snapshot-packages
|
||||
<<: *build-setup
|
||||
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages'
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages'
|
||||
|
||||
prb-packages:
|
||||
<<: *snapshot-packages
|
||||
|
@ -48,11 +48,11 @@ services:
|
|||
|
||||
release-bindings: &release-bindings
|
||||
<<: *release-setup
|
||||
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings'
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings'
|
||||
|
||||
snapshot-bindings: &snapshot-bindings
|
||||
<<: *build-setup
|
||||
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings'
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings'
|
||||
|
||||
prb-bindings:
|
||||
<<: *snapshot-bindings
|
||||
|
@ -60,7 +60,7 @@ services:
|
|||
|
||||
snapshot-cmake: &snapshot-cmake
|
||||
<<: *build-setup
|
||||
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DUSE_WERROR=1 -DFDB_RELEASE=0 -DVALGRIND=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "packages" "strip_targets" && cpack'
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=0 -DVALGRIND=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "packages" "strip_targets" && cpack'
|
||||
|
||||
prb-cmake:
|
||||
<<: *snapshot-cmake
|
||||
|
@ -68,7 +68,7 @@ services:
|
|||
|
||||
snapshot-ctest: &snapshot-ctest
|
||||
<<: *build-setup
|
||||
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DUSE_WERROR=1 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'
|
||||
|
||||
prb-ctest:
|
||||
<<: *snapshot-ctest
|
||||
|
@ -76,7 +76,7 @@ services:
|
|||
|
||||
snapshot-correctness: &snapshot-correctness
|
||||
<<: *build-setup
|
||||
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DUSE_WERROR=1 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure'
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure'
|
||||
|
||||
prb-correctness:
|
||||
<<: *snapshot-correctness
|
||||
|
|
|
@ -9,21 +9,32 @@ if(USE_VALGRIND)
|
|||
endif()
|
||||
|
||||
################################################################################
|
||||
# LibreSSL
|
||||
# SSL
|
||||
################################################################################
|
||||
|
||||
set(DISABLE_TLS OFF CACHE BOOL "Don't try to find LibreSSL and always build without TLS support")
|
||||
if(DISABLE_TLS)
|
||||
set(WITH_TLS OFF)
|
||||
else()
|
||||
set(LIBRESSL_USE_STATIC_LIBS TRUE)
|
||||
find_package(LibreSSL)
|
||||
if(LibreSSL_FOUND)
|
||||
set(OPENSSL_USE_STATIC_LIBS TRUE)
|
||||
find_package(OpenSSL)
|
||||
if(NOT OPENSSL_FOUND)
|
||||
set(LIBRESSL_USE_STATIC_LIBS TRUE)
|
||||
find_package(LibreSSL)
|
||||
if (LIBRESSL_FOUND)
|
||||
add_library(OpenSSL::SSL ALIAS LibreSSL)
|
||||
endif()
|
||||
endif()
|
||||
if(OPENSSL_FOUND OR LIBRESSL_FOUND)
|
||||
set(WITH_TLS ON)
|
||||
add_compile_options(-DHAVE_OPENSSL)
|
||||
else()
|
||||
message(STATUS "LibreSSL NOT Found - Will compile without TLS Support")
|
||||
message(STATUS "You can set LibreSSL_ROOT to the LibreSSL install directory to help cmake find it")
|
||||
message(STATUS "Neither OpenSSL nor LibreSSL were found - Will compile without TLS Support")
|
||||
message(STATUS "You can set OPENSSL_ROOT_DIR or LibreSSL_ROOT to the LibreSSL install directory to help cmake find it")
|
||||
set(WITH_TLS OFF)
|
||||
endif()
|
||||
if(WIN32)
|
||||
message(STATUS "TLS is temporarilty disabled on macOS while libressl -> openssl transition happens")
|
||||
set(WITH_TLS OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
@ -59,8 +70,8 @@ endif()
|
|||
# Pip
|
||||
################################################################################
|
||||
|
||||
find_package(Virtualenv)
|
||||
if (Virtualenv_FOUND)
|
||||
find_package(Python3 COMPONENTS Interpreter)
|
||||
if (Python3_Interpreter_FOUND)
|
||||
set(WITH_DOCUMENTATION ON)
|
||||
else()
|
||||
set(WITH_DOCUMENTATION OFF)
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
find_program(_VIRTUALENV_EXE virtualenv)
|
||||
|
||||
# get version and test that program actually works
|
||||
if(_VIRTUALENV_EXE)
|
||||
execute_process(
|
||||
COMMAND ${_VIRTUALENV_EXE} --version
|
||||
RESULT_VARIABLE ret_code
|
||||
OUTPUT_VARIABLE version_string
|
||||
ERROR_VARIABLE error_output
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if(ret_code EQUAL 0 AND NOT ERROR_VARIABLE)
|
||||
# we found a working virtualenv
|
||||
set(VIRTUALENV_EXE ${_VIRTUALENV_EXE})
|
||||
set(VIRTUALENV_VERSION version_string)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
find_package_handle_standard_args(Virtualenv
|
||||
REQUIRED_VARS VIRTUALENV_EXE
|
||||
VERSION_VAR ${VIRTUALENV_VERSION})
|
|
@ -10,7 +10,7 @@ set(pip_command ${venv_dir}/bin/pip${EXE_SUFFIX})
|
|||
set(python_command ${venv_dir}/bin/python${EXE_SUFFIX})
|
||||
|
||||
add_custom_command(OUTPUT ${venv_dir}/venv_setup
|
||||
COMMAND ${VIRTUALENV_EXE} venv &&
|
||||
COMMAND ${Python3_EXECUTABLE} -m venv venv &&
|
||||
${CMAKE_COMMAND} -E copy ${sphinx_dir}/.pip.conf ${venv_dir}/pip.conf &&
|
||||
. ${venv_dir}/bin/activate &&
|
||||
${pip_command} install --upgrade pip &&
|
||||
|
@ -86,7 +86,7 @@ else()
|
|||
endif()
|
||||
|
||||
add_custom_target(docpreview
|
||||
COMMAND ${python_command} -m SimpleHTTPServer ${port}
|
||||
COMMAND ${python_command} -m http.server ${port}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html
|
||||
USES_TERMINAL)
|
||||
add_dependencies(docpreview html)
|
||||
|
|
|
@ -693,12 +693,18 @@ Upgrades from 6.1.x will keep all your old data and configuration settings. Data
|
|||
Upgrading from 6.0.x
|
||||
--------------------
|
||||
|
||||
Upgrades from 6.0.x will keep all your old data and configuration settings. Data distribution will slowly reorganize how data is spread across storage servers.
|
||||
Upgrades from 6.0.x will keep all your old data and configuration settings.
|
||||
|
||||
Upgrading from 5.2.x
|
||||
--------------------
|
||||
|
||||
Upgrades from 5.2.x will keep all your old data and configuration settings.
|
||||
Upgrades from 5.2.x will keep all your old data and configuration settings. Some affinities that certain roles have for running on processes that haven't set a process class have changed, which may result in these processes running in different locations after upgrading. To avoid this, set process classes as needed. The following changes were made:
|
||||
|
||||
* The proxies and master no longer prefer ``resolution`` or ``transaction`` class processes to processes with unset class.
|
||||
* The resolver no longer prefers ``transaction`` class processes to processes with unset class.
|
||||
* The cluster controller no longer prefers ``master``, ``resolution`` or ``proxy`` class processes to processes with unset class.
|
||||
|
||||
See :ref:`guidelines-process-class-config` for recommendations on setting process classes. All of the above roles will prefer ``stateless`` class processes to ones that don't set a class.
|
||||
|
||||
Upgrading from 5.0.x - 5.1.x
|
||||
----------------------------
|
||||
|
|
|
@ -528,8 +528,7 @@ Applications must provide error handling and an appropriate retry loop around th
|
|||
|snapshot|
|
||||
|
||||
``reverse``
|
||||
|
||||
If non-zero, key-value pairs will be returned in reverse lexicographical order beginning at the end of the range.
|
||||
If non-zero, key-value pairs will be returned in reverse lexicographical order beginning at the end of the range. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
|
||||
|
||||
.. type:: FDBStreamingMode
|
||||
|
||||
|
|
|
@ -293,7 +293,7 @@ A |database-blurb1| |database-blurb2|
|
|||
|
||||
If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned.
|
||||
|
||||
If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order.
|
||||
If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
|
||||
|
||||
If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how to retrieve the specified range. This option should generally not be specified, allowing FoundationDB to retrieve the full range very efficiently.
|
||||
|
||||
|
@ -505,7 +505,7 @@ Reading data
|
|||
|
||||
If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned.
|
||||
|
||||
If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order.
|
||||
If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
|
||||
|
||||
If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how the returned container is likely to be used. The default is :data:`StreamingMode.iterator`.
|
||||
|
||||
|
|
|
@ -287,7 +287,7 @@ A |database-blurb1| |database-blurb2|
|
|||
Only the first ``limit`` keys (and their values) in the range will be returned.
|
||||
|
||||
``:reverse``
|
||||
If ``true``, then the keys in the range will be returned in reverse order.
|
||||
If ``true``, then the keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
|
||||
|
||||
If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order.
|
||||
|
||||
|
@ -461,7 +461,7 @@ Reading data
|
|||
Only the first ``limit`` keys (and their values) in the range will be returned.
|
||||
|
||||
``:reverse``
|
||||
If true, then the keys in the range will be returned in reverse order.
|
||||
If ``true``, then the keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
|
||||
|
||||
If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order.
|
||||
|
||||
|
|
|
@ -27,11 +27,11 @@ System requirements
|
|||
* Or, an unsupported Linux distribution with:
|
||||
|
||||
* Kernel version between 2.6.33 and 3.0.x (inclusive) or 3.7 or greater
|
||||
* Works with .deb or .rpm packages
|
||||
* Preferably .deb or .rpm package support
|
||||
|
||||
* Or, macOS 10.7 or later
|
||||
|
||||
.. warning:: The macOS version of the FoundationDB server is intended for use on locally accessible development machines only. Other uses are not supported.
|
||||
.. warning:: The macOS and Windows versions of the FoundationDB server are intended for use on locally accessible development machines only. Other uses are not supported.
|
||||
|
||||
* 4GB **ECC** RAM (per fdbserver process)
|
||||
* Storage
|
||||
|
@ -387,6 +387,8 @@ FoundationDB will never use processes on the same machine for the replication of
|
|||
|
||||
FoundationDB replicates data to three machines, and at least three available machines are required to make progress. This is the recommended mode for a cluster of five or more machines in a single datacenter.
|
||||
|
||||
.. note:: When running in cloud environments with managed disks that are already replicated and persistent, ``double`` replication may still be considered for 5+ machine clusters. This will result in lower availability fault tolerance for planned or unplanned failures and lower total read throughput, but offers a reasonable tradeoff for cost.
|
||||
|
||||
``three_data_hall`` mode
|
||||
FoundationDB stores data in triplicate, with one copy on a storage server in each of three data halls. The transaction logs are replicated four times, with two data halls containing two replicas apiece. Four available machines (two in each of two data halls) are therefore required to make progress. This configuration enables the cluster to remain available after losing a single data hall and one machine in another data hall.
|
||||
|
||||
|
@ -768,14 +770,12 @@ Region configuration is better in almost all ways than the ``three_datacenter``
|
|||
Known limitations
|
||||
-----------------
|
||||
|
||||
The 6.0 release still has a number of rough edges related to region configuration. This is a collection of all the issues that have been pointed out in the sections above. These issues should be significantly improved in future releases of FoundationDB:
|
||||
The 6.2 release still has a number of rough edges related to region configuration. This is a collection of all the issues that have been pointed out in the sections above. These issues should be significantly improved in future releases of FoundationDB:
|
||||
|
||||
* FoundationDB supports replicating data to at most two regions.
|
||||
|
||||
* ``two_satellite_fast`` does not hide latency properly when configured with more than 4 satellite transaction logs.
|
||||
|
||||
* While a datacenter has failed, the maximum write throughput of the cluster will be roughly 1/3 of normal performance.
|
||||
|
||||
.. _guidelines-process-class-config:
|
||||
|
||||
Guidelines for setting process class
|
||||
|
|
|
@ -156,6 +156,7 @@ Other Changes
|
|||
|
||||
* Does not support upgrades from any version older than 5.0.
|
||||
* Normalized the capitalization of trace event names and attributes. `(PR #455) <https://github.com/apple/foundationdb/pull/455>`_
|
||||
* Various stateless processes now have a higher affinity for running on processes with unset process class, which may result in those roles changing location upon upgrade. See :ref:`version-specific-upgrading` for details. `(PR #526) <https://github.com/apple/foundationdb/pull/526>`_
|
||||
* Increased the memory requirements of the transaction log by 400MB. [6.0.5] `(PR #673) <https://github.com/apple/foundationdb/pull/673>`_
|
||||
|
||||
Earlier release notes
|
||||
|
|
|
@ -37,7 +37,6 @@
|
|||
#include "fdbclient/json_spirit/json_spirit_writer_template.h"
|
||||
|
||||
#include "fdbrpc/Platform.h"
|
||||
#include "fdbrpc/TLSConnection.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
|
@ -3225,22 +3224,22 @@ int main(int argc, char* argv[]) {
|
|||
blobCredentials.push_back(args->OptionArg());
|
||||
break;
|
||||
#ifndef TLS_DISABLED
|
||||
case TLSOptions::OPT_TLS_PLUGIN:
|
||||
case TLSParams::OPT_TLS_PLUGIN:
|
||||
args->OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_CERTIFICATES:
|
||||
case TLSParams::OPT_TLS_CERTIFICATES:
|
||||
tlsCertPath = args->OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_PASSWORD:
|
||||
case TLSParams::OPT_TLS_PASSWORD:
|
||||
tlsPassword = args->OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_CA_FILE:
|
||||
case TLSParams::OPT_TLS_CA_FILE:
|
||||
tlsCAPath = args->OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_KEY:
|
||||
case TLSParams::OPT_TLS_KEY:
|
||||
tlsKeyPath = args->OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_VERIFY_PEERS:
|
||||
case TLSParams::OPT_TLS_VERIFY_PEERS:
|
||||
tlsVerifyPeers = args->OptionArg();
|
||||
break;
|
||||
#endif
|
||||
|
@ -3855,6 +3854,13 @@ int main(int argc, char* argv[]) {
|
|||
} catch (Error& e) {
|
||||
TraceEvent(SevError, "MainError").error(e);
|
||||
status = FDB_EXIT_MAIN_ERROR;
|
||||
} catch (boost::system::system_error& e) {
|
||||
if (g_network) {
|
||||
TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what());
|
||||
} else {
|
||||
fprintf(stderr, "ERROR: %s (%d)\n", e.what(), e.code().value());
|
||||
}
|
||||
status = FDB_EXIT_MAIN_EXCEPTION;
|
||||
} catch (std::exception& e) {
|
||||
TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what());
|
||||
status = FDB_EXIT_MAIN_EXCEPTION;
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
#include "fdbclient/FDBOptions.g.h"
|
||||
|
||||
#include "flow/DeterministicRandom.h"
|
||||
#include "fdbrpc/TLSConnection.h"
|
||||
#include "fdbrpc/Platform.h"
|
||||
|
||||
#include "flow/SimpleOpt.h"
|
||||
|
@ -1602,9 +1601,9 @@ ACTOR Future<Void> timeWarning( double when, const char* msg ) {
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> checkStatus(Future<Void> f, Reference<ClusterConnectionFile> clusterFile, bool displayDatabaseAvailable = true) {
|
||||
ACTOR Future<Void> checkStatus(Future<Void> f, Database db, bool displayDatabaseAvailable = true) {
|
||||
wait(f);
|
||||
StatusObject s = wait(StatusClient::statusFetcher(clusterFile));
|
||||
StatusObject s = wait(StatusClient::statusFetcher(db));
|
||||
printf("\n");
|
||||
printStatus(s, StatusClient::MINIMAL, displayDatabaseAvailable);
|
||||
printf("\n");
|
||||
|
@ -1646,7 +1645,7 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
|
|||
|
||||
state Optional<ConfigureAutoResult> conf;
|
||||
if( tokens[startToken] == LiteralStringRef("auto") ) {
|
||||
StatusObject s = wait( makeInterruptable(StatusClient::statusFetcher( ccf )) );
|
||||
StatusObject s = wait( makeInterruptable(StatusClient::statusFetcher( db )) );
|
||||
if(warn.isValid())
|
||||
warn.cancel();
|
||||
|
||||
|
@ -1776,6 +1775,10 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
|
|||
printf("Configuration changed\n");
|
||||
ret=false;
|
||||
break;
|
||||
case ConfigurationResult::LOCKED_NOT_NEW:
|
||||
printf("ERROR: `only new databases can be configured as locked`\n");
|
||||
ret = true;
|
||||
break;
|
||||
default:
|
||||
ASSERT(false);
|
||||
ret=true;
|
||||
|
@ -2091,7 +2094,7 @@ ACTOR Future<bool> exclude( Database db, std::vector<StringRef> tokens, Referenc
|
|||
return true;
|
||||
}
|
||||
}
|
||||
StatusObject status = wait( makeInterruptable( StatusClient::statusFetcher( ccf ) ) );
|
||||
StatusObject status = wait( makeInterruptable( StatusClient::statusFetcher( db ) ) );
|
||||
|
||||
state std::string errorString = "ERROR: Could not calculate the impact of this exclude on the total free space in the cluster.\n"
|
||||
"Please try the exclude again in 30 seconds.\n"
|
||||
|
@ -2537,22 +2540,22 @@ struct CLIOptions {
|
|||
|
||||
#ifndef TLS_DISABLED
|
||||
// TLS Options
|
||||
case TLSOptions::OPT_TLS_PLUGIN:
|
||||
case TLSParams::OPT_TLS_PLUGIN:
|
||||
args.OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_CERTIFICATES:
|
||||
case TLSParams::OPT_TLS_CERTIFICATES:
|
||||
tlsCertPath = args.OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_CA_FILE:
|
||||
case TLSParams::OPT_TLS_CA_FILE:
|
||||
tlsCAPath = args.OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_KEY:
|
||||
case TLSParams::OPT_TLS_KEY:
|
||||
tlsKeyPath = args.OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_PASSWORD:
|
||||
case TLSParams::OPT_TLS_PASSWORD:
|
||||
tlsPassword = args.OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_VERIFY_PEERS:
|
||||
case TLSParams::OPT_TLS_VERIFY_PEERS:
|
||||
tlsVerifyPeers = args.OptionArg();
|
||||
break;
|
||||
#endif
|
||||
|
@ -2603,7 +2606,7 @@ ACTOR Future<Void> addInterface( std::map<Key,std::pair<Value,ClientLeaderRegInt
|
|||
(*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf);
|
||||
}
|
||||
}
|
||||
when( wait(delay(1.0)) ) {}
|
||||
when( wait(delay(CLIENT_KNOBS->CLI_CONNECT_TIMEOUT)) ) {}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
@ -2666,7 +2669,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
|
||||
if (!opt.exec.present()) {
|
||||
if(opt.initialStatusCheck) {
|
||||
Future<Void> checkStatusF = checkStatus(Void(), db->getConnectionFile());
|
||||
Future<Void> checkStatusF = checkStatus(Void(), db);
|
||||
wait(makeInterruptable(success(checkStatusF)));
|
||||
}
|
||||
else {
|
||||
|
@ -2704,7 +2707,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
linenoise.historyAdd(line);
|
||||
}
|
||||
|
||||
warn = checkStatus(timeWarning(5.0, "\nWARNING: Long delay (Ctrl-C to interrupt)\n"), db->getConnectionFile());
|
||||
warn = checkStatus(timeWarning(5.0, "\nWARNING: Long delay (Ctrl-C to interrupt)\n"), db);
|
||||
|
||||
try {
|
||||
state UID randomID = deterministicRandom()->randomUniqueID();
|
||||
|
@ -2849,7 +2852,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
continue;
|
||||
}
|
||||
|
||||
StatusObject s = wait(makeInterruptable(StatusClient::statusFetcher(db->getConnectionFile())));
|
||||
StatusObject s = wait(makeInterruptable(StatusClient::statusFetcher(db)));
|
||||
|
||||
if (!opt.exec.present()) printf("\n");
|
||||
printStatus(s, level);
|
||||
|
@ -3795,5 +3798,8 @@ int main(int argc, char **argv) {
|
|||
} catch (Error& e) {
|
||||
printf("ERROR: %s (%d)\n", e.what(), e.code());
|
||||
return 1;
|
||||
} catch (boost::system::system_error& e) {
|
||||
printf("ERROR: %s (%d)\n", e.what(), e.code().value());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -357,8 +357,23 @@ public:
|
|||
return writeFile(snapshotFolderString(snapshotBeginVersion) + format("/%d/", snapshotFileCount / (BUGGIFY ? 1 : 5000)) + fileName);
|
||||
}
|
||||
|
||||
// Find what should be the filename of a path by finding whatever is after the last forward or backward slash, or failing to find those, the whole string.
|
||||
static std::string fileNameOnly(std::string path) {
|
||||
// Find the last forward slash position, defaulting to 0 if not found
|
||||
int pos = path.find_last_of('/');
|
||||
if(pos == std::string::npos) {
|
||||
pos = 0;
|
||||
}
|
||||
// Find the last backward slash position after pos, and update pos if found
|
||||
int b = path.find_last_of('\\', pos);
|
||||
if(b != std::string::npos) {
|
||||
pos = b;
|
||||
}
|
||||
return path.substr(pos + 1);
|
||||
}
|
||||
|
||||
static bool pathToRangeFile(RangeFile &out, std::string path, int64_t size) {
|
||||
std::string name = basename(path);
|
||||
std::string name = fileNameOnly(path);
|
||||
RangeFile f;
|
||||
f.fileName = path;
|
||||
f.fileSize = size;
|
||||
|
@ -371,7 +386,7 @@ public:
|
|||
}
|
||||
|
||||
static bool pathToLogFile(LogFile &out, std::string path, int64_t size) {
|
||||
std::string name = basename(path);
|
||||
std::string name = fileNameOnly(path);
|
||||
LogFile f;
|
||||
f.fileName = path;
|
||||
f.fileSize = size;
|
||||
|
@ -389,7 +404,7 @@ public:
|
|||
}
|
||||
|
||||
static bool pathToKeyspaceSnapshotFile(KeyspaceSnapshotFile &out, std::string path) {
|
||||
std::string name = basename(path);
|
||||
std::string name = fileNameOnly(path);
|
||||
KeyspaceSnapshotFile f;
|
||||
f.fileName = path;
|
||||
int len;
|
||||
|
|
|
@ -1959,8 +1959,8 @@ public:
|
|||
}
|
||||
|
||||
if (!g_network->isSimulated() && !forceAction) {
|
||||
state StatusObject srcStatus = wait(StatusClient::statusFetcher(backupAgent->taskBucket->src->getConnectionFile()));
|
||||
StatusObject destStatus = wait(StatusClient::statusFetcher(dest->getConnectionFile()));
|
||||
state StatusObject srcStatus = wait(StatusClient::statusFetcher(backupAgent->taskBucket->src));
|
||||
StatusObject destStatus = wait(StatusClient::statusFetcher(dest));
|
||||
checkAtomicSwitchOverConfig(srcStatus, destStatus, tagName);
|
||||
}
|
||||
|
||||
|
|
|
@ -192,6 +192,10 @@ public:
|
|||
Future<Void> clientInfoMonitor;
|
||||
Future<Void> connected;
|
||||
|
||||
Reference<AsyncVar<Optional<ClusterInterface>>> statusClusterInterface;
|
||||
Future<Void> statusLeaderMon;
|
||||
double lastStatusFetch;
|
||||
|
||||
int apiVersion;
|
||||
|
||||
int mvCacheInsertLocation;
|
||||
|
|
|
@ -46,6 +46,7 @@ ClientKnobs::ClientKnobs(bool randomize) {
|
|||
init( CLIENT_EXAMPLE_AMOUNT, 20 );
|
||||
init( MAX_CLIENT_STATUS_AGE, 1.0 );
|
||||
init( MAX_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_PROXY_CONNECTIONS = 1;
|
||||
init( STATUS_IDLE_TIMEOUT, 120.0 );
|
||||
|
||||
// wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin
|
||||
|
||||
|
@ -200,7 +201,8 @@ ClientKnobs::ClientKnobs(bool randomize) {
|
|||
|
||||
init( CONSISTENCY_CHECK_RATE_LIMIT_MAX, 50e6 ); // Limit in per sec
|
||||
init( CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME, 7 * 24 * 60 * 60 ); // 7 days
|
||||
|
||||
//fdbcli
|
||||
init( CLI_CONNECT_PARALLELISM, 10 );
|
||||
|
||||
//fdbcli
|
||||
init( CLI_CONNECT_PARALLELISM, 400 );
|
||||
init( CLI_CONNECT_TIMEOUT, 10.0 );
|
||||
}
|
||||
|
|
|
@ -45,6 +45,7 @@ public:
|
|||
int CLIENT_EXAMPLE_AMOUNT;
|
||||
double MAX_CLIENT_STATUS_AGE;
|
||||
int MAX_PROXY_CONNECTIONS;
|
||||
double STATUS_IDLE_TIMEOUT;
|
||||
|
||||
// wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin
|
||||
double WRONG_SHARD_SERVER_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test)
|
||||
|
@ -190,10 +191,11 @@ public:
|
|||
|
||||
int CONSISTENCY_CHECK_RATE_LIMIT_MAX;
|
||||
int CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME;
|
||||
|
||||
//fdbcli
|
||||
int CLI_CONNECT_PARALLELISM;
|
||||
|
||||
// fdbcli
|
||||
int CLI_CONNECT_PARALLELISM;
|
||||
double CLI_CONNECT_TIMEOUT;
|
||||
|
||||
ClientKnobs(bool randomize = false);
|
||||
};
|
||||
|
||||
|
|
|
@ -52,6 +52,13 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
|
|||
return out;
|
||||
}
|
||||
|
||||
if (mode == "locked") {
|
||||
// Setting this key is interpreted as an instruction to use the normal version-stamp-based mechanism for locking
|
||||
// the database.
|
||||
out[databaseLockedKey.toString()] = deterministicRandom()->randomUniqueID().toString();
|
||||
return out;
|
||||
}
|
||||
|
||||
size_t pos;
|
||||
|
||||
// key:=value is unvalidated and unchecked
|
||||
|
@ -300,6 +307,17 @@ ACTOR Future<ConfigurationResult::Type> changeConfig( Database cx, std::map<std:
|
|||
// make sure we have essential configuration options
|
||||
std::string initKey = configKeysPrefix.toString() + "initialized";
|
||||
state bool creating = m.count( initKey ) != 0;
|
||||
state Optional<UID> locked;
|
||||
{
|
||||
auto iter = m.find(databaseLockedKey.toString());
|
||||
if (iter != m.end()) {
|
||||
if (!creating) {
|
||||
return ConfigurationResult::LOCKED_NOT_NEW;
|
||||
}
|
||||
locked = UID::fromString(iter->second);
|
||||
m.erase(iter);
|
||||
}
|
||||
}
|
||||
if (creating) {
|
||||
m[initIdKey.toString()] = deterministicRandom()->randomUniqueID().toString();
|
||||
if (!isCompleteConfiguration(m)) {
|
||||
|
@ -486,6 +504,15 @@ ACTOR Future<ConfigurationResult::Type> changeConfig( Database cx, std::map<std:
|
|||
tr.addReadConflictRange( singleKeyRange(m.begin()->first) );
|
||||
}
|
||||
|
||||
if (locked.present()) {
|
||||
ASSERT(creating);
|
||||
tr.atomicOp(databaseLockedKey,
|
||||
BinaryWriter::toValue(locked.get(), Unversioned())
|
||||
.withPrefix(LiteralStringRef("0123456789"))
|
||||
.withSuffix(LiteralStringRef("\x00\x00\x00\x00")),
|
||||
MutationRef::SetVersionstampedValue);
|
||||
}
|
||||
|
||||
for (auto i = m.begin(); i != m.end(); ++i) {
|
||||
tr.set( StringRef(i->first), StringRef(i->second) );
|
||||
}
|
||||
|
@ -958,9 +985,13 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
|
|||
|
||||
if(g_network->isSimulated()) {
|
||||
for(int i = 0; i < (desiredCoordinators.size()/2)+1; i++) {
|
||||
auto address = NetworkAddress(desiredCoordinators[i].ip,desiredCoordinators[i].port,true,false);
|
||||
g_simulator.protectedAddresses.insert(address);
|
||||
TraceEvent("ProtectCoordinator").detail("Address", address).backtrace();
|
||||
auto addresses = g_simulator.getProcessByAddress(desiredCoordinators[i])->addresses;
|
||||
|
||||
g_simulator.protectedAddresses.insert(addresses.address);
|
||||
if(addresses.secondaryAddress.present()) {
|
||||
g_simulator.protectedAddresses.insert(addresses.secondaryAddress.get());
|
||||
}
|
||||
TraceEvent("ProtectCoordinator").detail("Address", desiredCoordinators[i]).backtrace();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1119,8 +1150,7 @@ struct AutoQuorumChange : IQuorumChange {
|
|||
*err = CoordinatorsResult::NOT_ENOUGH_MACHINES;
|
||||
return vector<NetworkAddress>();
|
||||
}
|
||||
desiredCount = std::max(oldCoordinators.size(), (workers.size() - 1) | 1);
|
||||
chosen.resize(desiredCount);
|
||||
chosen.resize((chosen.size() - 1) | 1);
|
||||
}
|
||||
|
||||
return chosen;
|
||||
|
@ -1516,10 +1546,14 @@ ACTOR Future<std::set<NetworkAddress>> checkForExcludingServers(Database cx, vec
|
|||
state bool ok = true;
|
||||
inProgressExclusion.clear();
|
||||
for(auto& s : serverList) {
|
||||
auto addr = decodeServerListValue( s.value ).address();
|
||||
if ( addressExcluded(exclusions, addr) ) {
|
||||
auto addresses = decodeServerListValue( s.value ).getKeyValues.getEndpoint().addresses;
|
||||
if ( addressExcluded(exclusions, addresses.address) ) {
|
||||
ok = false;
|
||||
inProgressExclusion.insert(addr);
|
||||
inProgressExclusion.insert(addresses.address);
|
||||
}
|
||||
if ( addresses.secondaryAddress.present() && addressExcluded(exclusions, addresses.secondaryAddress.get()) ) {
|
||||
ok = false;
|
||||
inProgressExclusion.insert(addresses.secondaryAddress.get());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -61,7 +61,8 @@ public:
|
|||
NOT_ENOUGH_WORKERS,
|
||||
REGION_REPLICATION_MISMATCH,
|
||||
DCID_MISSING,
|
||||
SUCCESS
|
||||
LOCKED_NOT_NEW,
|
||||
SUCCESS,
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -37,12 +37,12 @@
|
|||
#include "fdbrpc/LoadBalance.h"
|
||||
#include "fdbrpc/Net2FileSystem.h"
|
||||
#include "fdbrpc/simulator.h"
|
||||
#include "fdbrpc/TLSConnection.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/DeterministicRandom.h"
|
||||
#include "flow/Knobs.h"
|
||||
#include "flow/Platform.h"
|
||||
#include "flow/SystemMonitor.h"
|
||||
#include "flow/TLSPolicy.h"
|
||||
#include "flow/UnitTest.h"
|
||||
|
||||
#if defined(CMAKE_BUILD) || !defined(WIN32)
|
||||
|
@ -66,12 +66,15 @@ using std::min;
|
|||
using std::pair;
|
||||
|
||||
NetworkOptions networkOptions;
|
||||
Reference<TLSOptions> tlsOptions;
|
||||
TLSParams tlsParams;
|
||||
static Reference<TLSPolicy> tlsPolicy;
|
||||
|
||||
static void initTLSOptions() {
|
||||
if (!tlsOptions) {
|
||||
tlsOptions = Reference<TLSOptions>(new TLSOptions());
|
||||
static void initTLSPolicy() {
|
||||
#ifndef TLS_DISABLED
|
||||
if (!tlsPolicy) {
|
||||
tlsPolicy = Reference<TLSPolicy>(new TLSPolicy(TLSPolicy::Is::CLIENT));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/");
|
||||
|
@ -884,49 +887,46 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> valu
|
|||
break;
|
||||
case FDBNetworkOptions::TLS_CERT_PATH:
|
||||
validateOptionValue(value, true);
|
||||
initTLSOptions();
|
||||
tlsOptions->set_cert_file( value.get().toString() );
|
||||
tlsParams.tlsCertPath = value.get().toString();
|
||||
break;
|
||||
case FDBNetworkOptions::TLS_CERT_BYTES:
|
||||
initTLSOptions();
|
||||
tlsOptions->set_cert_data( value.get().toString() );
|
||||
break;
|
||||
case FDBNetworkOptions::TLS_CA_PATH:
|
||||
case FDBNetworkOptions::TLS_CERT_BYTES: {
|
||||
validateOptionValue(value, true);
|
||||
initTLSOptions();
|
||||
tlsOptions->set_ca_file( value.get().toString() );
|
||||
tlsParams.tlsCertBytes = value.get().toString();
|
||||
break;
|
||||
case FDBNetworkOptions::TLS_CA_BYTES:
|
||||
}
|
||||
case FDBNetworkOptions::TLS_CA_PATH: {
|
||||
validateOptionValue(value, true);
|
||||
initTLSOptions();
|
||||
tlsOptions->set_ca_data(value.get().toString());
|
||||
tlsParams.tlsCAPath = value.get().toString();
|
||||
break;
|
||||
}
|
||||
case FDBNetworkOptions::TLS_CA_BYTES: {
|
||||
validateOptionValue(value, true);
|
||||
tlsParams.tlsCABytes = value.get().toString();
|
||||
break;
|
||||
}
|
||||
case FDBNetworkOptions::TLS_PASSWORD:
|
||||
validateOptionValue(value, true);
|
||||
initTLSOptions();
|
||||
tlsOptions->set_key_password(value.get().toString());
|
||||
tlsParams.tlsPassword = value.get().toString();
|
||||
break;
|
||||
case FDBNetworkOptions::TLS_KEY_PATH:
|
||||
validateOptionValue(value, true);
|
||||
initTLSOptions();
|
||||
tlsOptions->set_key_file( value.get().toString() );
|
||||
validateOptionValue(value, true);
|
||||
tlsParams.tlsKeyPath = value.get().toString();
|
||||
break;
|
||||
case FDBNetworkOptions::TLS_KEY_BYTES:
|
||||
case FDBNetworkOptions::TLS_KEY_BYTES: {
|
||||
validateOptionValue(value, true);
|
||||
initTLSOptions();
|
||||
tlsOptions->set_key_data( value.get().toString() );
|
||||
tlsParams.tlsKeyBytes = value.get().toString();
|
||||
break;
|
||||
}
|
||||
case FDBNetworkOptions::TLS_VERIFY_PEERS:
|
||||
validateOptionValue(value, true);
|
||||
initTLSOptions();
|
||||
try {
|
||||
tlsOptions->set_verify_peers({ value.get().toString() });
|
||||
} catch( Error& e ) {
|
||||
initTLSPolicy();
|
||||
#ifndef TLS_DISABLED
|
||||
if (!tlsPolicy->set_verify_peers({ value.get().toString() })) {
|
||||
TraceEvent(SevWarnAlways, "TLSValidationSetError")
|
||||
.error( e )
|
||||
.detail("Input", value.get().toString() );
|
||||
throw invalid_option_value();
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
case FDBNetworkOptions::CLIENT_BUGGIFY_ENABLE:
|
||||
enableBuggify(true, BuggifyType::Client);
|
||||
|
@ -984,15 +984,11 @@ void setupNetwork(uint64_t transportId, bool useMetrics) {
|
|||
if (!networkOptions.logClientInfo.present())
|
||||
networkOptions.logClientInfo = true;
|
||||
|
||||
g_network = newNet2(false, useMetrics || networkOptions.traceDirectory.present());
|
||||
initTLSPolicy();
|
||||
|
||||
g_network = newNet2(false, useMetrics || networkOptions.traceDirectory.present(), tlsPolicy, tlsParams);
|
||||
FlowTransport::createInstance(true, transportId);
|
||||
Net2FileSystem::newFileSystem();
|
||||
|
||||
initTLSOptions();
|
||||
|
||||
#ifndef TLS_DISABLED
|
||||
tlsOptions->register_network();
|
||||
#endif
|
||||
}
|
||||
|
||||
void runNetwork() {
|
||||
|
@ -2550,8 +2546,8 @@ ACTOR void checkWrites( Database cx, Future<Void> committed, Promise<Void> outCo
|
|||
} else {
|
||||
Optional<Value> val = wait( tr.get( it->range().begin ) );
|
||||
if( !val.present() || val.get() != m.setValue ) {
|
||||
TraceEvent evt = TraceEvent(SevError, "CheckWritesFailed")
|
||||
.detail("Class", "Set")
|
||||
TraceEvent evt(SevError, "CheckWritesFailed");
|
||||
evt.detail("Class", "Set")
|
||||
.detail("Key", it->range().begin)
|
||||
.detail("Expected", m.setValue);
|
||||
if( !val.present() )
|
||||
|
|
|
@ -1165,8 +1165,8 @@ Optional<Value> getValueFromJSON(StatusObject statusObj) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Optional<Value>> getJSON(Reference<ClusterConnectionFile> clusterFile) {
|
||||
StatusObject statusObj = wait(StatusClient::statusFetcher(clusterFile));
|
||||
ACTOR Future<Optional<Value>> getJSON(Database db) {
|
||||
StatusObject statusObj = wait(StatusClient::statusFetcher(db));
|
||||
return getValueFromJSON(statusObj);
|
||||
}
|
||||
|
||||
|
@ -1194,7 +1194,7 @@ Future< Optional<Value> > ReadYourWritesTransaction::get( const Key& key, bool s
|
|||
|
||||
if (key == LiteralStringRef("\xff\xff/status/json")){
|
||||
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) {
|
||||
return getJSON(tr.getDatabase()->getConnectionFile());
|
||||
return getJSON(tr.getDatabase());
|
||||
}
|
||||
else {
|
||||
return Optional<Value>();
|
||||
|
|
|
@ -451,7 +451,7 @@ StatusObject getClientDatabaseStatus(StatusObjectReader client, StatusObjectRead
|
|||
return databaseStatus;
|
||||
}
|
||||
|
||||
ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f ) {
|
||||
ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f, Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface) {
|
||||
if (!g_network) throw network_not_setup();
|
||||
|
||||
state StatusObject statusObj;
|
||||
|
@ -461,13 +461,10 @@ ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f
|
|||
// This could be read from the JSON but doing so safely is ugly so using a real var.
|
||||
state bool quorum_reachable = false;
|
||||
state int coordinatorsFaultTolerance = 0;
|
||||
state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
|
||||
|
||||
try {
|
||||
state int64_t clientTime = time(0);
|
||||
|
||||
state Future<Void> leaderMon = monitorLeader<ClusterInterface>(f, clusterInterface);
|
||||
|
||||
StatusObject _statusObjClient = wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance));
|
||||
statusObjClient = _statusObjClient;
|
||||
|
||||
|
@ -547,6 +544,23 @@ ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f
|
|||
return statusObj;
|
||||
}
|
||||
|
||||
Future<StatusObject> StatusClient::statusFetcher( Reference<ClusterConnectionFile> clusterFile ) {
|
||||
return statusFetcherImpl(clusterFile);
|
||||
ACTOR Future<Void> timeoutMonitorLeader(Database db) {
|
||||
state Future<Void> leadMon = monitorLeader<ClusterInterface>(db->getConnectionFile(), db->statusClusterInterface);
|
||||
loop {
|
||||
wait(delay(CLIENT_KNOBS->STATUS_IDLE_TIMEOUT + 0.00001 + db->lastStatusFetch - now()));
|
||||
if(now() - db->lastStatusFetch > CLIENT_KNOBS->STATUS_IDLE_TIMEOUT) {
|
||||
db->statusClusterInterface = Reference<AsyncVar<Optional<ClusterInterface>>>();
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Future<StatusObject> StatusClient::statusFetcher( Database db ) {
|
||||
db->lastStatusFetch = now();
|
||||
if(!db->statusClusterInterface) {
|
||||
db->statusClusterInterface = Reference<AsyncVar<Optional<ClusterInterface>>>(new AsyncVar<Optional<ClusterInterface>>);
|
||||
db->statusLeaderMon = timeoutMonitorLeader(db);
|
||||
}
|
||||
|
||||
return statusFetcherImpl(db->getConnectionFile(), db->statusClusterInterface);
|
||||
}
|
||||
|
|
|
@ -23,11 +23,12 @@
|
|||
|
||||
#include "flow/flow.h"
|
||||
#include "fdbclient/Status.h"
|
||||
#include "fdbclient/DatabaseContext.h"
|
||||
|
||||
class StatusClient {
|
||||
public:
|
||||
enum StatusLevel { MINIMAL = 0, NORMAL = 1, DETAILED = 2, JSON = 3 };
|
||||
static Future<StatusObject> statusFetcher(Reference<ClusterConnectionFile> clusterFile);
|
||||
static Future<StatusObject> statusFetcher(Database db);
|
||||
};
|
||||
|
||||
#endif
|
|
@ -390,7 +390,7 @@ struct SplitMetricsRequest {
|
|||
struct GetStorageMetricsReply {
|
||||
constexpr static FileIdentifier file_identifier = 15491478;
|
||||
StorageMetrics load;
|
||||
StorageMetrics free;
|
||||
StorageMetrics available;
|
||||
StorageMetrics capacity;
|
||||
double bytesInputRate;
|
||||
|
||||
|
@ -398,7 +398,7 @@ struct GetStorageMetricsReply {
|
|||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, load, free, capacity, bytesInputRate);
|
||||
serializer(ar, load, available, capacity, bytesInputRate);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
* compile-time configuration.
|
||||
*/
|
||||
|
||||
#ifndef HAVE_OPENSSL
|
||||
#if !defined(HAVE_OPENSSL) || defined(TLS_DISABLED)
|
||||
|
||||
#include <string.h>
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
* See md5.c for more information.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_OPENSSL
|
||||
#if defined(HAVE_OPENSSL) && !defined(TLS_DISABLED)
|
||||
#include <openssl/md5.h>
|
||||
#elif !defined(_MD5_H)
|
||||
#define _MD5_H
|
||||
|
|
|
@ -161,7 +161,7 @@ description is not currently required but encouraged.
|
|||
defaultFor="500"/>
|
||||
<Option name="transaction_retry_limit" code="501"
|
||||
paramType="Int" paramDescription="number of times to retry"
|
||||
description="Set a timeout in milliseconds which, when elapsed, will cause a transaction automatically to be cancelled. This sets the ``retry_limit`` option of each transaction created by this database. See the transaction option description for more information."
|
||||
description="Set a maximum number of retries after which additional calls to ``onError`` will throw the most recently seen error code. This sets the ``retry_limit`` option of each transaction created by this database. See the transaction option description for more information."
|
||||
defaultFor="501"/>
|
||||
<Option name="transaction_max_retry_delay" code="502"
|
||||
paramType="Int" paramDescription="value in milliseconds of maximum delay"
|
||||
|
@ -220,7 +220,7 @@ description is not currently required but encouraged.
|
|||
<Option name="debug_transaction_identifier" code="403" paramType="String" paramDescription="String identifier to be used when tracing or profiling this transaction. The identifier must not exceed 100 characters."
|
||||
description="Sets a client provided identifier for the transaction that will be used in scenarios like tracing or profiling. Client trace logging or transaction profiling must be separately enabled." />
|
||||
<Option name="log_transaction" code="404"
|
||||
description="Enables tracing for this transaction and logs results to the client trace logs. The DEBUG_TRANSACTION_IDENTIFIER option must be set before using this option, and client trace logging must be enabled and to get log output." />
|
||||
description="Enables tracing for this transaction and logs results to the client trace logs. The DEBUG_TRANSACTION_IDENTIFIER option must be set before using this option, and client trace logging must be enabled to get log output." />
|
||||
<Option name="transaction_logging_max_field_length" code="405" paramType="Int" paramDescription="Maximum length of escaped key and value fields."
|
||||
description="Sets the maximum escaped length of key and value fields to be logged to the trace file via the LOG_TRANSACTION option, after which the field will be truncated. A negative value disables truncation." />
|
||||
<Option name="timeout" code="500"
|
||||
|
@ -243,7 +243,7 @@ description is not currently required but encouraged.
|
|||
<Option name="snapshot_ryw_disable" code="601"
|
||||
description="Snapshot read operations will not see the results of writes done in the same transaction. This was the default behavior prior to API version 300." />
|
||||
<Option name="lock_aware" code="700"
|
||||
description="The transaction can read and write to locked databases, and is resposible for checking that it took the lock."/>
|
||||
description="The transaction can read and write to locked databases, and is responsible for checking that it took the lock."/>
|
||||
<Option name="used_during_commit_protection_disable" code="701"
|
||||
description="By default, operations that are performed on a transaction while it is being committed will not only fail themselves, but they will attempt to fail other in-flight operations (such as the commit) as well. This behavior is intended to help developers discover situations where operations could be unintentionally executed after the transaction has been reset. Setting this option removes that protection, causing only the offending operation to fail."/>
|
||||
<Option name="read_lock_aware" code="702"
|
||||
|
|
|
@ -26,7 +26,6 @@ set(FDBRPC_SRCS
|
|||
sim2.actor.cpp
|
||||
sim_validation.cpp
|
||||
TimedRequest.h
|
||||
TLSConnection.actor.cpp
|
||||
TraceFileIO.cpp)
|
||||
|
||||
set(FDBRPC_THIRD_PARTY_SRCS
|
||||
|
|
|
@ -233,6 +233,7 @@ struct YieldMockNetwork : INetwork, ReferenceCounted<YieldMockNetwork> {
|
|||
virtual TaskPriority getCurrentTask() { return baseNetwork->getCurrentTask(); }
|
||||
virtual void setCurrentTask(TaskPriority taskID) { baseNetwork->setCurrentTask(taskID); }
|
||||
virtual double now() { return baseNetwork->now(); }
|
||||
virtual double timer() { return baseNetwork->timer(); }
|
||||
virtual void stop() { return baseNetwork->stop(); }
|
||||
virtual bool isSimulated() const { return baseNetwork->isSimulated(); }
|
||||
virtual void onMainThread(Promise<Void>&& signal, TaskPriority taskID) { return baseNetwork->onMainThread(std::move(signal), taskID); }
|
||||
|
|
|
@ -302,7 +302,7 @@ ACTOR Future<Void> connectionMonitor( Reference<Peer> peer ) {
|
|||
state double lastRefreshed = now();
|
||||
state int64_t lastBytesReceived = peer->bytesReceived;
|
||||
loop {
|
||||
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME));
|
||||
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME, TaskPriority::ReadSocket));
|
||||
if (lastBytesReceived < peer->bytesReceived) {
|
||||
lastRefreshed = now();
|
||||
lastBytesReceived = peer->bytesReceived;
|
||||
|
@ -317,7 +317,7 @@ ACTOR Future<Void> connectionMonitor( Reference<Peer> peer ) {
|
|||
|
||||
//We cannot let an error be thrown from connectionMonitor while still on the stack from scanPackets in connectionReader
|
||||
//because then it would not call the destructor of connectionReader when connectionReader is cancelled.
|
||||
wait(delay(0));
|
||||
wait(delay(0, TaskPriority::ReadSocket));
|
||||
|
||||
if (peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0) {
|
||||
if (peer->peerReferences == 0 &&
|
||||
|
@ -332,7 +332,7 @@ ACTOR Future<Void> connectionMonitor( Reference<Peer> peer ) {
|
|||
}
|
||||
}
|
||||
|
||||
wait (delayJittered(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME));
|
||||
wait (delayJittered(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME, TaskPriority::ReadSocket));
|
||||
|
||||
// TODO: Stop monitoring and close the connection with no onDisconnect requests outstanding
|
||||
state ReplyPromise<Void> reply;
|
||||
|
@ -429,14 +429,15 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
|
|||
|
||||
try {
|
||||
choose {
|
||||
when(Reference<IConnection> _conn =
|
||||
wait(INetworkConnections::net()->connect(self->destination))) {
|
||||
when( Reference<IConnection> _conn = wait( INetworkConnections::net()->connect(self->destination) ) ) {
|
||||
conn = _conn;
|
||||
wait(conn->connectHandshake());
|
||||
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(false));
|
||||
if (self->unsent.empty()) {
|
||||
_conn->close();
|
||||
conn->close();
|
||||
conn = Reference<IConnection>();
|
||||
continue;
|
||||
} else {
|
||||
conn = _conn;
|
||||
TraceEvent("ConnectionExchangingConnectPacket", conn->getDebugID())
|
||||
.suppressFor(1.0)
|
||||
.detail("PeerAddr", self->destination);
|
||||
|
@ -965,6 +966,7 @@ ACTOR static Future<Void> connectionReader(
|
|||
|
||||
ACTOR static Future<Void> connectionIncoming( TransportData* self, Reference<IConnection> conn ) {
|
||||
try {
|
||||
wait(conn->acceptHandshake());
|
||||
state Promise<Reference<Peer>> onConnected;
|
||||
state Future<Void> reader = connectionReader( self, conn, Reference<Peer>(), onConnected );
|
||||
choose {
|
||||
|
@ -991,11 +993,13 @@ ACTOR static Future<Void> listen( TransportData* self, NetworkAddress listenAddr
|
|||
try {
|
||||
loop {
|
||||
Reference<IConnection> conn = wait( listener->accept() );
|
||||
TraceEvent("ConnectionFrom", conn->getDebugID()).suppressFor(1.0)
|
||||
.detail("FromAddress", conn->getPeerAddress())
|
||||
.detail("ListenAddress", listenAddr.toString());
|
||||
incoming.add( connectionIncoming(self, conn) );
|
||||
wait(delay(0) || delay(FLOW_KNOBS->CONNECTION_ACCEPT_DELAY, TaskPriority::WriteSocket));
|
||||
if(conn) {
|
||||
TraceEvent("ConnectionFrom", conn->getDebugID()).suppressFor(1.0)
|
||||
.detail("FromAddress", conn->getPeerAddress())
|
||||
.detail("ListenAddress", listenAddr.toString());
|
||||
incoming.add( connectionIncoming(self, conn) );
|
||||
}
|
||||
wait(delay(0, TaskPriority::AcceptSocket));
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevError, "ListenError").error(e);
|
||||
|
@ -1119,7 +1123,7 @@ void FlowTransport::removePeerReference(const Endpoint& endpoint, bool isStream)
|
|||
.detail("Address", endpoint.getPrimaryAddress())
|
||||
.detail("Token", endpoint.token);
|
||||
}
|
||||
if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0) {
|
||||
if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0 && peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_UNREFERENCED_CLOSE_DELAY) {
|
||||
peer->resetPing.trigger();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,136 +0,0 @@
|
|||
/*
|
||||
* ITLSPlugin.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FDB_ITLSPLUGIN_H
|
||||
#define FDB_ITLSPLUGIN_H
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
struct ITLSSession {
|
||||
enum { SUCCESS = 0, WANT_READ = -1, WANT_WRITE = -2, FAILED = -3 };
|
||||
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
|
||||
// handshake should return SUCCESS if the handshake is complete,
|
||||
// FAILED on fatal error, or one of WANT_READ or WANT_WRITE if the
|
||||
// handshake should be reattempted after more data can be
|
||||
// read/written on the underlying connection.
|
||||
virtual int handshake() = 0;
|
||||
|
||||
// read should return the (non-zero) number of bytes read,
|
||||
// WANT_READ or WANT_WRITE if the operation is blocked by the
|
||||
// underlying stream, or FAILED if there is an error (including a
|
||||
// closed connection).
|
||||
virtual int read(uint8_t* data, int length) = 0;
|
||||
|
||||
// write should return the (non-zero) number of bytes written, or
|
||||
// WANT_READ or WANT_WRITE if the operation is blocked by the
|
||||
// underlying stream, or FAILED if there is an error.
|
||||
virtual int write(const uint8_t* data, int length) = 0;
|
||||
};
|
||||
|
||||
// Returns the number of bytes sent (possibly 0), or -1 on error
|
||||
// (including connection close)
|
||||
typedef int(*TLSSendCallbackFunc)(void* ctx, const uint8_t* buf, int len);
|
||||
|
||||
// Returns the number of bytes read (possibly 0), or -1 on error
|
||||
// (including connection close)
|
||||
typedef int(*TLSRecvCallbackFunc)(void* ctx, uint8_t* buf, int len);
|
||||
|
||||
struct ITLSPolicy {
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
|
||||
// set_ca_data should import the provided certificate list and
|
||||
// associate it with this policy. cert_data will point to a PEM
|
||||
// encoded certificate list of trust roots.
|
||||
//
|
||||
// set_ca_data should return true if the operation succeeded,
|
||||
// and false otherwise. After the first call to create_session for
|
||||
// a given policy, set_ca_data should immediately return false
|
||||
// if called.
|
||||
virtual bool set_ca_data(const uint8_t* ca_data, int ca_len) = 0;
|
||||
|
||||
// set_cert_data should import the provided certificate list and
|
||||
// associate it with this policy. cert_data will point to a PEM
|
||||
// encoded certificate list, ordered such that each certificate
|
||||
// certifies the one before it.
|
||||
//
|
||||
// cert_data may additionally contain key information, which must
|
||||
// be ignored.
|
||||
//
|
||||
// set_cert_data should return true if the operation succeeded,
|
||||
// and false otherwise. After the first call to create_session for
|
||||
// a given policy, set_cert_data should immediately return false
|
||||
// if called.
|
||||
virtual bool set_cert_data(const uint8_t* cert_data, int cert_len) = 0;
|
||||
|
||||
// set_key_data should import the provided private key and
|
||||
// associate it with this policy. key_data will point to a PEM
|
||||
// encoded key, which may be encrypted. If encrypted the password
|
||||
// argument should be specified, otherwise it may be NULL.
|
||||
//
|
||||
// key_data may additionally contain certificate information,
|
||||
// which must be ignored.
|
||||
//
|
||||
// set_key_data should return true if the operation succeeded, and
|
||||
// false otherwise. After the first call to create_session for a
|
||||
// given policy, set_key_data should immediately return false if
|
||||
// called.
|
||||
virtual bool set_key_data(const uint8_t* key_data, int key_len, const char* password) = 0;
|
||||
|
||||
// set_verify_peers should modify the validation rules for
|
||||
// verifying a peer during connection handshake. The format of
|
||||
// verify_peers is implementation specific.
|
||||
//
|
||||
// set_verify_peers should return true if the operation succeed,
|
||||
// and false otherwise. After the first call to create_session for
|
||||
// a given policy, set_verify_peers should immediately return
|
||||
// false if called.
|
||||
virtual bool set_verify_peers(int count, const uint8_t* verify_peers[], int verify_peers_len[]) = 0;
|
||||
|
||||
// create_session should return a new object that implements
|
||||
// ITLSSession, associated with this policy. After the first call
|
||||
// to create_session for a given policy, further calls to
|
||||
// ITLSPolicy::set_* will fail and return false.
|
||||
//
|
||||
// The newly created session should use send_func and recv_func to
|
||||
// send and receive data on the underlying transport, and must
|
||||
// provide send_ctx/recv_ctx to the callbacks.
|
||||
//
|
||||
// uid will be used to identify this session within trace events
|
||||
virtual ITLSSession* create_session(bool is_client, const char *servername, TLSSendCallbackFunc send_func, void* send_ctx, TLSRecvCallbackFunc recv_func, void* recv_ctx, void* uid) = 0;
|
||||
};
|
||||
|
||||
struct ITLSPlugin {
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
|
||||
// create_policy should return a new object that implements
|
||||
// ITLSPolicy.
|
||||
virtual ITLSPolicy* create_policy() = 0;
|
||||
|
||||
static inline const char* get_plugin_type_name_and_version() { return "ITLSPlugin"; }
|
||||
};
|
||||
|
||||
#endif /* FDB_ITLSPLUGIN_H */
|
|
@ -18,27 +18,21 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _FLOW_LOADPLUGIN_H_
|
||||
#define _FLOW_LOADPLUGIN_H_
|
||||
|
||||
#pragma once
|
||||
|
||||
// Specialized TLS plugin library
|
||||
extern "C" void *get_tls_plugin(const char *plugin_type_name_and_version);
|
||||
|
||||
// Name of specialized TLS Plugin
|
||||
extern const char* tlsPluginName;
|
||||
#include <string>
|
||||
#include "flow/flow.h"
|
||||
|
||||
template <class T>
|
||||
Reference<T> loadPlugin( std::string const& plugin_name ) {
|
||||
void *(*get_plugin)(const char*) = NULL;
|
||||
#ifndef TLS_DISABLED
|
||||
if (!plugin_name.compare(tlsPluginName)) {
|
||||
get_plugin = (void*(*)(const char*)) get_tls_plugin;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
void* plugin = loadLibrary( plugin_name.c_str() );
|
||||
if (plugin)
|
||||
get_plugin = (void*(*)(const char*))loadFunction( plugin, "get_plugin" );
|
||||
}
|
||||
void* plugin = loadLibrary( plugin_name.c_str() );
|
||||
if (plugin)
|
||||
get_plugin = (void*(*)(const char*))loadFunction( plugin, "get_plugin" );
|
||||
return (get_plugin) ? Reference<T>( (T*)get_plugin( T::get_plugin_type_name_and_version() ) ) : Reference<T>( NULL );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -129,8 +129,7 @@ public:
|
|||
std::vector<LocalityEntry> const& getEntries() const
|
||||
{ return _entryArray; }
|
||||
|
||||
std::vector<LocalityEntry> const& getMutableEntries() const
|
||||
{ return _mutableEntryArray; }
|
||||
std::vector<LocalityEntry>& getMutableEntries() { return _mutableEntryArray; }
|
||||
|
||||
std::vector<LocalityEntry> const& getGroupEntries() const
|
||||
{ return _localitygroup->_entryArray; }
|
||||
|
@ -253,7 +252,7 @@ public:
|
|||
|
||||
while (nRandomItems > 0)
|
||||
{
|
||||
if (nItemsLeft <= 0) {
|
||||
if (nRandomItems > nItemsLeft || nItemsLeft <= 0) {
|
||||
bComplete = false;
|
||||
break;
|
||||
}
|
||||
|
@ -479,6 +478,8 @@ public:
|
|||
|
||||
Reference<StringToIntMap> _keymap;
|
||||
|
||||
virtual std::vector<std::vector<AttribValue>> const& getKeyValueArray() const { return _keyValueArray; }
|
||||
|
||||
protected:
|
||||
virtual Reference<StringToIntMap>& getGroupValueMap()
|
||||
{ return _localitygroup->getGroupValueMap(); }
|
||||
|
|
|
@ -119,6 +119,8 @@ struct PolicyAcross : IReplicationPolicy, public ReferenceCounted<PolicyAcross>
|
|||
explicit PolicyAcross(const PolicyAcross& other) : PolicyAcross(other._count, other._attribKey, other._policy) {}
|
||||
virtual ~PolicyAcross();
|
||||
virtual std::string name() const { return "Across"; }
|
||||
std::string embeddedPolicyName() const { return _policy->name(); }
|
||||
int getCount() const { return _count; }
|
||||
virtual std::string info() const { return format("%s^%d x ", _attribKey.c_str(), _count) + _policy->info(); }
|
||||
virtual int maxResults() const { return _count * _policy->maxResults(); }
|
||||
virtual int depth() const { return 1 + _policy->depth(); }
|
||||
|
|
|
@ -82,14 +82,63 @@ double ratePolicy(
|
|||
return rating;
|
||||
}
|
||||
|
||||
bool findBestPolicySet(
|
||||
std::vector<LocalityEntry>& bestResults,
|
||||
Reference<LocalitySet> & localitySet,
|
||||
Reference<IReplicationPolicy> const& policy,
|
||||
unsigned int nMinItems,
|
||||
unsigned int nSelectTests,
|
||||
unsigned int nPolicyTests)
|
||||
{
|
||||
int mostUsedZoneCount(Reference<LocalitySet>& logServerSet, std::vector<LocalityEntry>& bestSet) {
|
||||
AttribKey indexKey = logServerSet->keyIndex("zoneid");
|
||||
std::map<AttribValue, int> entries;
|
||||
for(int i = 0; i < bestSet.size(); i++) {
|
||||
Optional<AttribValue> value = logServerSet->getRecordViaEntry(bestSet[i])->getValue(indexKey);
|
||||
entries[value.get()]++;
|
||||
}
|
||||
int maxEntries = 0;
|
||||
for(auto it : entries) {
|
||||
maxEntries = std::max(maxEntries, it.second);
|
||||
}
|
||||
return maxEntries;
|
||||
}
|
||||
|
||||
bool findBestPolicySetSimple(int targetUniqueValueCount, Reference<LocalitySet>& logServerSet, std::vector<LocalityEntry>& bestSet,
|
||||
int desired) {
|
||||
auto& mutableEntries = logServerSet->getMutableEntries();
|
||||
deterministicRandom()->randomShuffle(mutableEntries);
|
||||
// First make sure the current localitySet is able to fulfuill the policy
|
||||
AttribKey indexKey = logServerSet->keyIndex("zoneid");
|
||||
int uniqueValueCount = logServerSet->getKeyValueArray()[indexKey._id].size();
|
||||
|
||||
if (uniqueValueCount < targetUniqueValueCount) {
|
||||
// logServerSet won't be able to fulfill the policy
|
||||
return false;
|
||||
}
|
||||
|
||||
std::map<AttribValue, std::vector<int>> entries;
|
||||
for(int i = 0; i < mutableEntries.size(); i++) {
|
||||
Optional<AttribValue> value = logServerSet->getRecord(mutableEntries[i]._id)->getValue(indexKey);
|
||||
if (value.present()) {
|
||||
entries[value.get()].push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_WE_THINK(uniqueValueCount == entries.size());
|
||||
|
||||
desired = std::max(desired, targetUniqueValueCount);
|
||||
auto it = entries.begin();
|
||||
while (bestSet.size() < desired) {
|
||||
if(it->second.size()) {
|
||||
bestSet.push_back(mutableEntries[it->second.back()]);
|
||||
it->second.pop_back();
|
||||
}
|
||||
|
||||
++it;
|
||||
if(it == entries.end()) {
|
||||
it = entries.begin();
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool findBestPolicySetExpensive(std::vector<LocalityEntry>& bestResults, Reference<LocalitySet>& localitySet,
|
||||
Reference<IReplicationPolicy> const& policy, unsigned int nMinItems,
|
||||
unsigned int nSelectTests, unsigned int nPolicyTests) {
|
||||
bool bSucceeded = true;
|
||||
Reference<LocalitySet> bestLocalitySet, testLocalitySet;
|
||||
std::vector<LocalityEntry> results;
|
||||
|
@ -113,9 +162,7 @@ bool findBestPolicySet(
|
|||
}
|
||||
|
||||
// Get some additional random items, if needed
|
||||
if ((nMinItems > results.size()) &&
|
||||
(!localitySet->random(results, results, nMinItems-results.size())))
|
||||
{
|
||||
if ((nMinItems > results.size()) && (!localitySet->random(results, results, nMinItems - results.size()))) {
|
||||
bSucceeded = false;
|
||||
break;
|
||||
}
|
||||
|
@ -158,6 +205,53 @@ bool findBestPolicySet(
|
|||
return bSucceeded;
|
||||
}
|
||||
|
||||
bool findBestPolicySet(std::vector<LocalityEntry>& bestResults, Reference<LocalitySet>& localitySet,
|
||||
Reference<IReplicationPolicy> const& policy, unsigned int nMinItems, unsigned int nSelectTests,
|
||||
unsigned int nPolicyTests) {
|
||||
|
||||
bool bestFound = false;
|
||||
|
||||
// Specialization for policies of shape:
|
||||
// - PolicyOne()
|
||||
// - PolicyAcross(,"zoneId",PolicyOne())
|
||||
// - TODO: More specializations for common policies
|
||||
if (policy->name() == "One") {
|
||||
bestFound = true;
|
||||
int count = 0;
|
||||
auto& mutableEntries = localitySet->getMutableEntries();
|
||||
deterministicRandom()->randomShuffle(mutableEntries);
|
||||
for (auto const& entry : mutableEntries) {
|
||||
bestResults.push_back(entry);
|
||||
if (++count == nMinItems) break;
|
||||
}
|
||||
} else if (policy->name() == "Across") {
|
||||
PolicyAcross* pa = (PolicyAcross*)policy.getPtr();
|
||||
std::set<std::string> attributeKeys;
|
||||
pa->attributeKeys(&attributeKeys);
|
||||
if (pa->embeddedPolicyName() == "One" && attributeKeys.size() == 1 &&
|
||||
*attributeKeys.begin() == "zoneid" // This algorithm can actually apply to any field
|
||||
) {
|
||||
bestFound = findBestPolicySetSimple(pa->getCount(), localitySet, bestResults, nMinItems);
|
||||
if (bestFound && g_network->isSimulated()) {
|
||||
std::vector<LocalityEntry> oldBest;
|
||||
auto oldBestFound =
|
||||
findBestPolicySetExpensive(oldBest, localitySet, policy, nMinItems, nSelectTests, nPolicyTests);
|
||||
if (!oldBestFound) {
|
||||
TraceEvent(SevError, "FBPSMissmatch").detail("Policy", policy->info());
|
||||
} else {
|
||||
ASSERT(mostUsedZoneCount(localitySet, bestResults) <= mostUsedZoneCount(localitySet, oldBest));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bestFound =
|
||||
findBestPolicySetExpensive(bestResults, localitySet, policy, nMinItems, nSelectTests, nPolicyTests);
|
||||
}
|
||||
} else {
|
||||
bestFound = findBestPolicySetExpensive(bestResults, localitySet, policy, nMinItems, nSelectTests, nPolicyTests);
|
||||
}
|
||||
return bestFound;
|
||||
}
|
||||
|
||||
bool findBestUniquePolicySet(
|
||||
std::vector<LocalityEntry>& bestResults,
|
||||
Reference<LocalitySet> & localitySet,
|
||||
|
|
|
@ -1,545 +0,0 @@
|
|||
/*
|
||||
* TLSConnection.actor.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include "flow/flow.h"
|
||||
#include "flow/network.h"
|
||||
#include "flow/Knobs.h"
|
||||
#include "fdbrpc/TLSConnection.h"
|
||||
#include "fdbrpc/ITLSPlugin.h"
|
||||
#include "fdbrpc/LoadPlugin.h"
|
||||
#include "fdbrpc/Platform.h"
|
||||
#include "fdbrpc/IAsyncFile.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
// Name of specialized TLS Plugin
|
||||
const char* tlsPluginName = "fdb-libressl-plugin";
|
||||
|
||||
// Must not throw an exception from this function!
|
||||
static int send_func(void* ctx, const uint8_t* buf, int len) {
|
||||
TLSConnection* conn = (TLSConnection*)ctx;
|
||||
|
||||
try {
|
||||
SendBuffer sb;
|
||||
sb.bytes_sent = 0;
|
||||
sb.bytes_written = len;
|
||||
sb.data = buf;
|
||||
sb.next = 0;
|
||||
|
||||
int w = conn->conn->write( &sb );
|
||||
return w;
|
||||
} catch ( Error& e ) {
|
||||
TraceEvent("TLSConnectionSendError", conn->getDebugID()).suppressFor(1.0).detail("Peer", conn->getPeerAddress().toString()).error(e);
|
||||
return -1;
|
||||
} catch ( ... ) {
|
||||
TraceEvent("TLSConnectionSendError", conn->getDebugID()).suppressFor(1.0).detail("Peer", conn->getPeerAddress()).error( unknown_error() );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Must not throw an exception from this function!
|
||||
static int recv_func(void* ctx, uint8_t* buf, int len) {
|
||||
TLSConnection* conn = (TLSConnection*)ctx;
|
||||
|
||||
try {
|
||||
int r = conn->conn->read( buf, buf + len );
|
||||
return r;
|
||||
} catch ( Error& e ) {
|
||||
TraceEvent("TLSConnectionRecvError", conn->getDebugID()).suppressFor(1.0).detail("Peer", conn->getPeerAddress()).error(e);
|
||||
return -1;
|
||||
} catch ( ... ) {
|
||||
TraceEvent("TLSConnectionRecvError", conn->getDebugID()).suppressFor(1.0).detail("Peer", conn->getPeerAddress()).error( unknown_error() );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> handshake( TLSConnection* self ) {
|
||||
state std::pair<IPAddress,uint16_t> peerIP = std::make_pair(self->conn->getPeerAddress().ip, self->is_client ? self->conn->getPeerAddress().port : static_cast<uint16_t>(0));
|
||||
if(!self->is_client) {
|
||||
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
|
||||
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
|
||||
if (now() < iter->second.second) {
|
||||
if(iter->second.first >= FLOW_KNOBS->TLS_SERVER_CONNECTION_THROTTLE_ATTEMPTS) {
|
||||
TraceEvent("TLSIncomingConnectionThrottlingWarning", self->getDebugID()).suppressFor(1.0).detail("PeerIP", peerIP.first.toString());
|
||||
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT));
|
||||
throw connection_failed();
|
||||
}
|
||||
} else {
|
||||
g_network->networkInfo.serverTLSConnectionThrottler.erase(peerIP);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
loop {
|
||||
int r = self->session->handshake();
|
||||
if(BUGGIFY_WITH_PROB(0.001)) {
|
||||
r = ITLSSession::FAILED;
|
||||
}
|
||||
if ( r == ITLSSession::SUCCESS ) break;
|
||||
if ( r == ITLSSession::FAILED ) {
|
||||
TraceEvent("TLSConnectionHandshakeError", self->getDebugID()).suppressFor(1.0).detail("Peer", self->getPeerAddress());
|
||||
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
|
||||
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
|
||||
iter->second.first++;
|
||||
} else {
|
||||
g_network->networkInfo.serverTLSConnectionThrottler[peerIP] = std::make_pair(0,now() + (self->is_client ? FLOW_KNOBS->TLS_CLIENT_CONNECTION_THROTTLE_TIMEOUT : FLOW_KNOBS->TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT));
|
||||
}
|
||||
throw connection_failed();
|
||||
}
|
||||
ASSERT( r == ITLSSession::WANT_WRITE || r == ITLSSession::WANT_READ );
|
||||
wait( r == ITLSSession::WANT_WRITE ? self->conn->onWritable() : self->conn->onReadable() );
|
||||
}
|
||||
|
||||
TraceEvent("TLSConnectionHandshakeSuccessful", self->getDebugID()).suppressFor(1.0).detail("Peer", self->getPeerAddress());
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
TLSConnection::TLSConnection( Reference<IConnection> const& conn, Reference<ITLSPolicy> const& policy, bool is_client, std::string host) : conn(conn), write_wants(0), read_wants(0), uid(conn->getDebugID()), is_client(is_client) {
|
||||
const char * serverName = host.empty() ? NULL : host.c_str();
|
||||
session = Reference<ITLSSession>( policy->create_session(is_client, serverName, send_func, this, recv_func, this, (void*)&uid) );
|
||||
if ( !session ) {
|
||||
// If session is NULL, we're trusting policy->create_session
|
||||
// to have used its provided logging function to have logged
|
||||
// the error
|
||||
throw tls_error();
|
||||
}
|
||||
handshook = handshake(this);
|
||||
}
|
||||
|
||||
Future<Void> TLSConnection::onWritable() {
|
||||
if ( !handshook.isReady() )
|
||||
return handshook;
|
||||
return
|
||||
write_wants == ITLSSession::WANT_READ ? conn->onReadable() :
|
||||
write_wants == ITLSSession::WANT_WRITE ? conn->onWritable() :
|
||||
Void();
|
||||
}
|
||||
|
||||
Future<Void> TLSConnection::onReadable() {
|
||||
if ( !handshook.isReady() )
|
||||
return handshook;
|
||||
return
|
||||
read_wants == ITLSSession::WANT_READ ? conn->onReadable() :
|
||||
read_wants == ITLSSession::WANT_WRITE ? conn->onWritable() :
|
||||
Void();
|
||||
}
|
||||
|
||||
int TLSConnection::read( uint8_t* begin, uint8_t* end ) {
|
||||
if ( !handshook.isReady() ) return 0;
|
||||
handshook.get();
|
||||
|
||||
read_wants = 0;
|
||||
int r = session->read( begin, end - begin );
|
||||
if ( r > 0 )
|
||||
return r;
|
||||
|
||||
if ( r == ITLSSession::FAILED ) throw connection_failed();
|
||||
|
||||
ASSERT( r == ITLSSession::WANT_WRITE || r == ITLSSession::WANT_READ );
|
||||
|
||||
read_wants = r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int TLSConnection::write( SendBuffer const* buffer, int limit ) {
|
||||
ASSERT(limit > 0);
|
||||
|
||||
if ( !handshook.isReady() ) return 0;
|
||||
handshook.get();
|
||||
|
||||
write_wants = 0;
|
||||
int toSend = std::min(limit, buffer->bytes_written - buffer->bytes_sent);
|
||||
ASSERT(toSend);
|
||||
int w = session->write( buffer->data + buffer->bytes_sent, toSend );
|
||||
if ( w > 0 )
|
||||
return w;
|
||||
|
||||
if ( w == ITLSSession::FAILED ) throw connection_failed();
|
||||
|
||||
ASSERT( w == ITLSSession::WANT_WRITE || w == ITLSSession::WANT_READ );
|
||||
|
||||
write_wants = w;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ACTOR Future<Reference<IConnection>> wrap( Reference<ITLSPolicy> policy, bool is_client, Future<Reference<IConnection>> c, std::string host) {
|
||||
state Reference<IConnection> conn = wait(c);
|
||||
try {
|
||||
state Reference<TLSConnection> tlsConn(new TLSConnection( conn, policy, is_client, host ));
|
||||
if(is_client) {
|
||||
wait(tlsConn->handshook);
|
||||
}
|
||||
return tlsConn;
|
||||
} catch( Error &e ) {
|
||||
conn->close();
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
Future<Reference<IConnection>> TLSListener::accept() {
|
||||
return wrap( options->get_policy(TLSOptions::POLICY_VERIFY_PEERS), false, listener->accept(), "");
|
||||
}
|
||||
|
||||
TLSNetworkConnections::TLSNetworkConnections( Reference<TLSOptions> options ) : options(options) {
|
||||
network = INetworkConnections::net();
|
||||
g_network->setGlobal(INetwork::enumGlobal::enNetworkConnections, (flowGlobalType) this);
|
||||
}
|
||||
|
||||
ACTOR Future<Reference<IConnection>> waitAndFailConnection() {
|
||||
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT));
|
||||
throw connection_failed();
|
||||
}
|
||||
|
||||
Future<Reference<IConnection>> TLSNetworkConnections::connect( NetworkAddress toAddr, std::string host) {
|
||||
if ( toAddr.isTLS() ) {
|
||||
NetworkAddress clearAddr( toAddr.ip, toAddr.port, toAddr.isPublic(), false );
|
||||
std::pair<IPAddress,uint16_t> peerIP = std::make_pair(toAddr.ip, toAddr.port);
|
||||
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
|
||||
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
|
||||
if (now() < iter->second.second) {
|
||||
if(iter->second.first >= FLOW_KNOBS->TLS_CLIENT_CONNECTION_THROTTLE_ATTEMPTS) {
|
||||
TraceEvent("TLSOutgoingConnectionThrottlingWarning").suppressFor(1.0).detail("PeerIP", toAddr);
|
||||
return waitAndFailConnection();
|
||||
}
|
||||
} else {
|
||||
g_network->networkInfo.serverTLSConnectionThrottler.erase(peerIP);
|
||||
}
|
||||
}
|
||||
|
||||
TraceEvent("TLSConnectionConnecting").suppressFor(1.0).detail("ToAddr", toAddr);
|
||||
// For FDB<->FDB connections, we don't have hostnames and can't verify IP
|
||||
// addresses against certificates, so we have our own peer verifying logic
|
||||
// to use. For FDB<->external system connections, we can use the standard
|
||||
// hostname-based certificate verification logic.
|
||||
if (host.empty() || host == toAddr.ip.toString())
|
||||
return wrap(options->get_policy(TLSOptions::POLICY_VERIFY_PEERS), true, network->connect(clearAddr), std::string(""));
|
||||
else
|
||||
return wrap( options->get_policy(TLSOptions::POLICY_NO_VERIFY_PEERS), true, network->connect( clearAddr ), host );
|
||||
}
|
||||
return network->connect( toAddr );
|
||||
}
|
||||
|
||||
Future<std::vector<NetworkAddress>> TLSNetworkConnections::resolveTCPEndpoint( std::string host, std::string service) {
|
||||
return network->resolveTCPEndpoint( host, service );
|
||||
}
|
||||
|
||||
Reference<IListener> TLSNetworkConnections::listen( NetworkAddress localAddr ) {
|
||||
if ( localAddr.isTLS() ) {
|
||||
NetworkAddress clearAddr( localAddr.ip, localAddr.port, localAddr.isPublic(), false );
|
||||
TraceEvent("TLSConnectionListening").detail("OnAddr", localAddr);
|
||||
return Reference<IListener>(new TLSListener( options, network->listen( clearAddr ) ));
|
||||
}
|
||||
return network->listen( localAddr );
|
||||
}
|
||||
|
||||
// 5MB for loading files into memory
|
||||
#define CERT_FILE_MAX_SIZE (5 * 1024 * 1024)
|
||||
|
||||
void TLSOptions::set_cert_file( std::string const& cert_file ) {
|
||||
try {
|
||||
TraceEvent("TLSConnectionSettingCertFile").detail("CertFilePath", cert_file);
|
||||
policyInfo.cert_path = cert_file;
|
||||
set_cert_data( readFileBytes( cert_file, CERT_FILE_MAX_SIZE ) );
|
||||
} catch ( Error& e) {
|
||||
TraceEvent(SevError, "TLSOptionsSetCertFileError").detail("Filename", cert_file).error(e).GetLastError();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void TLSOptions::set_ca_file(std::string const& ca_file) {
|
||||
try {
|
||||
TraceEvent("TLSConnectionSettingCAFile").detail("CAPath", ca_file);
|
||||
policyInfo.ca_path = ca_file;
|
||||
set_ca_data(readFileBytes(ca_file, CERT_FILE_MAX_SIZE));
|
||||
}
|
||||
catch (Error& e) {
|
||||
TraceEvent(SevError, "TLSOptionsSetCertAError").detail("Filename", ca_file).error(e).GetLastError();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void TLSOptions::set_ca_data(std::string const& ca_data) {
|
||||
if (!policyVerifyPeersSet.get() || !policyVerifyPeersNotSet.get())
|
||||
init_plugin();
|
||||
|
||||
TraceEvent("TLSConnectionSettingCAData").detail("CADataSize", ca_data.size());
|
||||
policyInfo.ca_contents = Standalone<StringRef>(ca_data);
|
||||
if (!policyVerifyPeersSet.get()->set_ca_data((const uint8_t*)&ca_data[0], ca_data.size()))
|
||||
throw tls_error();
|
||||
if (!policyVerifyPeersNotSet.get()->set_ca_data((const uint8_t*)&ca_data[0], ca_data.size()))
|
||||
throw tls_error();
|
||||
|
||||
ca_set = true;
|
||||
}
|
||||
|
||||
void TLSOptions::set_cert_data( std::string const& cert_data ) {
|
||||
if (!policyVerifyPeersSet.get() || !policyVerifyPeersNotSet.get())
|
||||
init_plugin();
|
||||
|
||||
TraceEvent("TLSConnectionSettingCertData").detail("CertDataSize", cert_data.size());
|
||||
policyInfo.cert_contents = Standalone<StringRef>(cert_data);
|
||||
if ( !policyVerifyPeersSet.get()->set_cert_data( (const uint8_t*)&cert_data[0], cert_data.size() ) )
|
||||
throw tls_error();
|
||||
if (!policyVerifyPeersNotSet.get()->set_cert_data((const uint8_t*)&cert_data[0], cert_data.size()))
|
||||
throw tls_error();
|
||||
|
||||
certs_set = true;
|
||||
}
|
||||
|
||||
void TLSOptions::set_key_password(std::string const& password) {
|
||||
TraceEvent("TLSConnectionSettingPassword");
|
||||
policyInfo.keyPassword = password;
|
||||
}
|
||||
|
||||
void TLSOptions::set_key_file( std::string const& key_file ) {
|
||||
try {
|
||||
TraceEvent("TLSConnectionSettingKeyFile").detail("KeyFilePath", key_file);
|
||||
policyInfo.key_path = key_file;
|
||||
set_key_data( readFileBytes( key_file, CERT_FILE_MAX_SIZE ) );
|
||||
} catch ( Error& e) {
|
||||
TraceEvent(SevError, "TLSOptionsSetKeyFileError").detail("Filename", key_file).error(e).GetLastError();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void TLSOptions::set_key_data( std::string const& key_data ) {
|
||||
if (!policyVerifyPeersSet.get() || !policyVerifyPeersNotSet.get())
|
||||
init_plugin();
|
||||
const char *passphrase = policyInfo.keyPassword.empty() ? NULL : policyInfo.keyPassword.c_str();
|
||||
TraceEvent("TLSConnectionSettingKeyData").detail("KeyDataSize", key_data.size());
|
||||
policyInfo.key_contents = Standalone<StringRef>(key_data);
|
||||
if ( !policyVerifyPeersSet.get()->set_key_data( (const uint8_t*)&key_data[0], key_data.size(), passphrase) )
|
||||
throw tls_error();
|
||||
if (!policyVerifyPeersNotSet.get()->set_key_data((const uint8_t*)&key_data[0], key_data.size(), passphrase))
|
||||
throw tls_error();
|
||||
|
||||
key_set = true;
|
||||
}
|
||||
|
||||
void TLSOptions::set_verify_peers( std::vector<std::string> const& verify_peers ) {
|
||||
if (!policyVerifyPeersSet.get())
|
||||
init_plugin();
|
||||
{
|
||||
TraceEvent e("TLSConnectionSettingVerifyPeers");
|
||||
for (int i = 0; i < verify_peers.size(); i++)
|
||||
e.detail(std::string("Value" + std::to_string(i)).c_str(), verify_peers[i].c_str());
|
||||
}
|
||||
std::unique_ptr<const uint8_t *[]> verify_peers_arr(new const uint8_t*[verify_peers.size()]);
|
||||
std::unique_ptr<int[]> verify_peers_len(new int[verify_peers.size()]);
|
||||
for (int i = 0; i < verify_peers.size(); i++) {
|
||||
verify_peers_arr[i] = (const uint8_t *)&verify_peers[i][0];
|
||||
verify_peers_len[i] = verify_peers[i].size();
|
||||
}
|
||||
|
||||
if (!policyVerifyPeersSet.get()->set_verify_peers(verify_peers.size(), verify_peers_arr.get(), verify_peers_len.get()))
|
||||
throw tls_error();
|
||||
|
||||
policyInfo.verify_peers = verify_peers;
|
||||
verify_peers_set = true;
|
||||
}
|
||||
|
||||
void TLSOptions::register_network() {
|
||||
// Simulation relies upon being able to call this multiple times, and have it override g_network
|
||||
// each time it's called.
|
||||
new TLSNetworkConnections( Reference<TLSOptions>::addRef( this ) );
|
||||
}
|
||||
|
||||
ACTOR static Future<ErrorOr<Standalone<StringRef>>> readEntireFile( std::string filename ) {
|
||||
state Reference<IAsyncFile> file = wait(IAsyncFileSystem::filesystem()->open(filename, IAsyncFile::OPEN_READONLY | IAsyncFile::OPEN_UNCACHED, 0));
|
||||
state int64_t filesize = wait(file->size());
|
||||
state Standalone<StringRef> buf = makeString(filesize);
|
||||
int rc = wait(file->read(mutateString(buf), filesize, 0));
|
||||
if (rc != filesize) {
|
||||
// File modified during read, probably. The mtime should change, and thus we'll be called again.
|
||||
return tls_error();
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> watchFileForChanges( std::string filename, AsyncVar<Standalone<StringRef>> *contents_var ) {
|
||||
state std::time_t lastModTime = wait(IAsyncFileSystem::filesystem()->lastWriteTime(filename));
|
||||
loop {
|
||||
wait(delay(FLOW_KNOBS->TLS_CERT_REFRESH_DELAY_SECONDS));
|
||||
std::time_t modtime = wait(IAsyncFileSystem::filesystem()->lastWriteTime(filename));
|
||||
if (lastModTime != modtime) {
|
||||
lastModTime = modtime;
|
||||
ErrorOr<Standalone<StringRef>> contents = wait(readEntireFile(filename));
|
||||
if (contents.present()) {
|
||||
contents_var->set(contents.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> reloadConfigurationOnChange( TLSOptions::PolicyInfo *pci, Reference<ITLSPlugin> plugin, AsyncVar<Reference<ITLSPolicy>> *realVerifyPeersPolicy, AsyncVar<Reference<ITLSPolicy>> *realNoVerifyPeersPolicy ) {
|
||||
if (FLOW_KNOBS->TLS_CERT_REFRESH_DELAY_SECONDS <= 0) {
|
||||
return Void();
|
||||
}
|
||||
loop {
|
||||
// Early in bootup, the filesystem might not be initialized yet. Wait until it is.
|
||||
if (IAsyncFileSystem::filesystem() != nullptr) {
|
||||
break;
|
||||
}
|
||||
wait(delay(1.0));
|
||||
}
|
||||
state int mismatches = 0;
|
||||
state AsyncVar<Standalone<StringRef>> ca_var;
|
||||
state AsyncVar<Standalone<StringRef>> key_var;
|
||||
state AsyncVar<Standalone<StringRef>> cert_var;
|
||||
state std::vector<Future<Void>> lifetimes;
|
||||
if (!pci->ca_path.empty()) lifetimes.push_back(watchFileForChanges(pci->ca_path, &ca_var));
|
||||
if (!pci->key_path.empty()) lifetimes.push_back(watchFileForChanges(pci->key_path, &key_var));
|
||||
if (!pci->cert_path.empty()) lifetimes.push_back(watchFileForChanges(pci->cert_path, &cert_var));
|
||||
loop {
|
||||
state Future<Void> ca_changed = ca_var.onChange();
|
||||
state Future<Void> key_changed = key_var.onChange();
|
||||
state Future<Void> cert_changed = cert_var.onChange();
|
||||
wait( ca_changed || key_changed || cert_changed );
|
||||
if (ca_changed.isReady()) {
|
||||
TraceEvent(SevInfo, "TLSRefreshCAChanged").detail("path", pci->ca_path).detail("length", ca_var.get().size());
|
||||
pci->ca_contents = ca_var.get();
|
||||
}
|
||||
if (key_changed.isReady()) {
|
||||
TraceEvent(SevInfo, "TLSRefreshKeyChanged").detail("path", pci->key_path).detail("length", key_var.get().size());
|
||||
pci->key_contents = key_var.get();
|
||||
}
|
||||
if (cert_changed.isReady()) {
|
||||
TraceEvent(SevInfo, "TLSRefreshCertChanged").detail("path", pci->cert_path).detail("length", cert_var.get().size());
|
||||
pci->cert_contents = cert_var.get();
|
||||
}
|
||||
bool rc = true;
|
||||
Reference<ITLSPolicy> verifypeers = Reference<ITLSPolicy>(plugin->create_policy());
|
||||
Reference<ITLSPolicy> noverifypeers = Reference<ITLSPolicy>(plugin->create_policy());
|
||||
loop {
|
||||
// Don't actually loop. We're just using loop/break as a `goto err`.
|
||||
// This loop always ends with an unconditional break.
|
||||
rc = verifypeers->set_ca_data(pci->ca_contents.begin(), pci->ca_contents.size());
|
||||
if (!rc) break;
|
||||
rc = verifypeers->set_key_data(pci->key_contents.begin(), pci->key_contents.size(), pci->keyPassword.c_str());
|
||||
if (!rc) break;
|
||||
rc = verifypeers->set_cert_data(pci->cert_contents.begin(), pci->cert_contents.size());
|
||||
if (!rc) break;
|
||||
{
|
||||
std::unique_ptr<const uint8_t *[]> verify_peers_arr(new const uint8_t*[pci->verify_peers.size()]);
|
||||
std::unique_ptr<int[]> verify_peers_len(new int[pci->verify_peers.size()]);
|
||||
for (int i = 0; i < pci->verify_peers.size(); i++) {
|
||||
verify_peers_arr[i] = (const uint8_t *)&pci->verify_peers[i][0];
|
||||
verify_peers_len[i] = pci->verify_peers[i].size();
|
||||
}
|
||||
rc = verifypeers->set_verify_peers(pci->verify_peers.size(), verify_peers_arr.get(), verify_peers_len.get());
|
||||
if (!rc) break;
|
||||
}
|
||||
rc = noverifypeers->set_ca_data(pci->ca_contents.begin(), pci->ca_contents.size());
|
||||
if (!rc) break;
|
||||
rc = noverifypeers->set_key_data(pci->key_contents.begin(), pci->key_contents.size(), pci->keyPassword.c_str());
|
||||
if (!rc) break;
|
||||
rc = noverifypeers->set_cert_data(pci->cert_contents.begin(), pci->cert_contents.size());
|
||||
if (!rc) break;
|
||||
break;
|
||||
}
|
||||
|
||||
if (rc) {
|
||||
TraceEvent(SevInfo, "TLSCertificateRefreshSucceeded");
|
||||
realVerifyPeersPolicy->set(verifypeers);
|
||||
realNoVerifyPeersPolicy->set(noverifypeers);
|
||||
mismatches = 0;
|
||||
} else {
|
||||
// Some files didn't match up, they should in the future, and we'll retry then.
|
||||
mismatches++;
|
||||
TraceEvent(SevWarn, "TLSCertificateRefreshMismatch").detail("mismatches", mismatches);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char *defaultCertFileName = "fdb.pem";
|
||||
|
||||
Reference<ITLSPolicy> TLSOptions::get_policy(PolicyType type) {
|
||||
if ( !certs_set ) {
|
||||
if ( !platform::getEnvironmentVar( "FDB_TLS_CERTIFICATE_FILE", policyInfo.cert_path ) )
|
||||
policyInfo.cert_path = fileExists(defaultCertFileName) ? defaultCertFileName : joinPath(platform::getDefaultConfigPath(), defaultCertFileName);
|
||||
set_cert_file( policyInfo.cert_path );
|
||||
}
|
||||
if ( !key_set ) {
|
||||
if ( policyInfo.keyPassword.empty() )
|
||||
platform::getEnvironmentVar( "FDB_TLS_PASSWORD", policyInfo.keyPassword );
|
||||
if ( !platform::getEnvironmentVar( "FDB_TLS_KEY_FILE", policyInfo.key_path ) )
|
||||
policyInfo.key_path = fileExists(defaultCertFileName) ? defaultCertFileName : joinPath(platform::getDefaultConfigPath(), defaultCertFileName);
|
||||
set_key_file( policyInfo.key_path );
|
||||
}
|
||||
if( !verify_peers_set ) {
|
||||
std::string verify_peers;
|
||||
if (platform::getEnvironmentVar("FDB_TLS_VERIFY_PEERS", verify_peers))
|
||||
set_verify_peers({ verify_peers });
|
||||
else
|
||||
set_verify_peers({ std::string("Check.Valid=1")});
|
||||
}
|
||||
if (!ca_set) {
|
||||
if (platform::getEnvironmentVar("FDB_TLS_CA_FILE", policyInfo.ca_path))
|
||||
set_ca_file(policyInfo.ca_path);
|
||||
}
|
||||
|
||||
if (!configurationReloader.present()) {
|
||||
configurationReloader = reloadConfigurationOnChange(&policyInfo, plugin, &policyVerifyPeersSet, &policyVerifyPeersNotSet);
|
||||
}
|
||||
|
||||
Reference<ITLSPolicy> policy;
|
||||
switch (type) {
|
||||
case POLICY_VERIFY_PEERS:
|
||||
policy = policyVerifyPeersSet.get();
|
||||
break;
|
||||
case POLICY_NO_VERIFY_PEERS:
|
||||
policy = policyVerifyPeersNotSet.get();
|
||||
break;
|
||||
default:
|
||||
ASSERT_ABORT(0);
|
||||
}
|
||||
return policy;
|
||||
}
|
||||
|
||||
void TLSOptions::init_plugin() {
|
||||
|
||||
TraceEvent("TLSConnectionLoadingPlugin").detail("Plugin", tlsPluginName);
|
||||
|
||||
plugin = loadPlugin<ITLSPlugin>( tlsPluginName );
|
||||
|
||||
if ( !plugin ) {
|
||||
TraceEvent(SevError, "TLSConnectionPluginInitError").detail("Plugin", tlsPluginName).GetLastError();
|
||||
throw tls_error();
|
||||
}
|
||||
|
||||
policyVerifyPeersSet = AsyncVar<Reference<ITLSPolicy>>(Reference<ITLSPolicy>(plugin->create_policy()));
|
||||
if ( !policyVerifyPeersSet.get()) {
|
||||
// Hopefully create_policy logged something with the log func
|
||||
TraceEvent(SevError, "TLSConnectionCreatePolicyVerifyPeersSetError");
|
||||
throw tls_error();
|
||||
}
|
||||
|
||||
policyVerifyPeersNotSet = AsyncVar<Reference<ITLSPolicy>>(Reference<ITLSPolicy>(plugin->create_policy()));
|
||||
if (!policyVerifyPeersNotSet.get()) {
|
||||
// Hopefully create_policy logged something with the log func
|
||||
TraceEvent(SevError, "TLSConnectionCreatePolicyVerifyPeersNotSetError");
|
||||
throw tls_error();
|
||||
}
|
||||
}
|
||||
|
||||
bool TLSOptions::enabled() {
|
||||
return policyVerifyPeersSet.get().isValid() && policyVerifyPeersNotSet.get().isValid();
|
||||
}
|
|
@ -1,174 +0,0 @@
|
|||
/*
|
||||
* TLSConnection.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FLOW_TLSCONNECTION_H
|
||||
#define FLOW_TLSCONNECTION_H
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "flow/Platform.h"
|
||||
|
||||
#include "fdbrpc/ITLSPlugin.h"
|
||||
|
||||
struct TLSConnection : IConnection, ReferenceCounted<TLSConnection> {
|
||||
Reference<IConnection> conn;
|
||||
Reference<ITLSSession> session;
|
||||
|
||||
Future<Void> handshook;
|
||||
|
||||
int write_wants, read_wants;
|
||||
|
||||
UID uid;
|
||||
bool is_client;
|
||||
|
||||
virtual void addref() { ReferenceCounted<TLSConnection>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<TLSConnection>::delref(); }
|
||||
|
||||
TLSConnection( Reference<IConnection> const& conn, Reference<ITLSPolicy> const& policy, bool is_client, std::string host);
|
||||
~TLSConnection() {
|
||||
// Here for ordering to make sure we delref the ITLSSession
|
||||
// which has a pointer to this object
|
||||
session.clear();
|
||||
}
|
||||
|
||||
virtual void close() { conn->close(); }
|
||||
|
||||
virtual Future<Void> onWritable();
|
||||
|
||||
virtual Future<Void> onReadable();
|
||||
|
||||
virtual int read( uint8_t* begin, uint8_t* end );
|
||||
|
||||
virtual int write( SendBuffer const* buffer, int limit);
|
||||
|
||||
virtual NetworkAddress getPeerAddress() {
|
||||
NetworkAddress a = conn->getPeerAddress();
|
||||
return NetworkAddress(a.ip, a.port, a.isPublic(), true);
|
||||
}
|
||||
|
||||
virtual UID getDebugID() { return uid; }
|
||||
};
|
||||
|
||||
struct TLSOptions : ReferenceCounted<TLSOptions> {
|
||||
enum { OPT_TLS = 100000, OPT_TLS_PLUGIN, OPT_TLS_CERTIFICATES, OPT_TLS_KEY, OPT_TLS_VERIFY_PEERS, OPT_TLS_CA_FILE, OPT_TLS_PASSWORD };
|
||||
enum PolicyType { POLICY_VERIFY_PEERS = 1, POLICY_NO_VERIFY_PEERS };
|
||||
TLSOptions() : certs_set(false), key_set(false), verify_peers_set(false), ca_set(false) {
|
||||
#ifndef TLS_DISABLED
|
||||
init_plugin( );
|
||||
#endif
|
||||
}
|
||||
|
||||
void set_cert_file( std::string const& cert_file );
|
||||
void set_cert_data( std::string const& cert_data );
|
||||
void set_ca_file(std::string const& ca_file);
|
||||
void set_ca_data(std::string const& ca_data);
|
||||
// If there is a passphrase, this api should be called prior to setting key for the passphrase to be used
|
||||
void set_key_password( std::string const& password );
|
||||
void set_key_file( std::string const& key_file );
|
||||
void set_key_data( std::string const& key_data );
|
||||
void set_verify_peers( std::vector<std::string> const& verify_peers );
|
||||
|
||||
void register_network();
|
||||
|
||||
Reference<ITLSPolicy> get_policy(PolicyType type);
|
||||
bool enabled();
|
||||
|
||||
struct PolicyInfo {
|
||||
std::string ca_path;
|
||||
Standalone<StringRef> ca_contents;
|
||||
std::string key_path;
|
||||
std::string keyPassword;
|
||||
Standalone<StringRef> key_contents;
|
||||
std::string cert_path;
|
||||
Standalone<StringRef> cert_contents;
|
||||
std::vector<std::string> verify_peers;
|
||||
};
|
||||
|
||||
private:
|
||||
void init_plugin();
|
||||
|
||||
Reference<ITLSPlugin> plugin;
|
||||
PolicyInfo policyInfo;
|
||||
AsyncVar<Reference<ITLSPolicy>> policyVerifyPeersSet;
|
||||
AsyncVar<Reference<ITLSPolicy>> policyVerifyPeersNotSet;
|
||||
Optional<Future<Void>> configurationReloader;
|
||||
|
||||
bool certs_set, key_set, verify_peers_set, ca_set;
|
||||
};
|
||||
|
||||
struct TLSListener : IListener, ReferenceCounted<TLSListener> {
|
||||
Reference<IListener> listener;
|
||||
Reference<TLSOptions> options;
|
||||
|
||||
TLSListener( Reference<TLSOptions> options, Reference<IListener> listener ) : options(options), listener(listener) {}
|
||||
|
||||
virtual void addref() { ReferenceCounted<TLSListener>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<TLSListener>::delref(); }
|
||||
|
||||
virtual Future<Reference<IConnection>> accept();
|
||||
|
||||
virtual NetworkAddress getListenAddress() { return listener->getListenAddress(); }
|
||||
};
|
||||
|
||||
struct TLSNetworkConnections : INetworkConnections {
|
||||
INetworkConnections *network;
|
||||
|
||||
explicit TLSNetworkConnections( Reference<TLSOptions> options );
|
||||
|
||||
virtual Future<Reference<IConnection>> connect( NetworkAddress toAddr, std::string host );
|
||||
virtual Future<std::vector<NetworkAddress>> resolveTCPEndpoint( std::string host, std::string service);
|
||||
|
||||
virtual Reference<IListener> listen( NetworkAddress localAddr );
|
||||
|
||||
private:
|
||||
Reference<TLSOptions> options;
|
||||
};
|
||||
|
||||
#define TLS_PLUGIN_FLAG "--tls_plugin"
|
||||
#define TLS_CERTIFICATE_FILE_FLAG "--tls_certificate_file"
|
||||
#define TLS_KEY_FILE_FLAG "--tls_key_file"
|
||||
#define TLS_VERIFY_PEERS_FLAG "--tls_verify_peers"
|
||||
#define TLS_CA_FILE_FLAG "--tls_ca_file"
|
||||
#define TLS_PASSWORD_FLAG "--tls_password"
|
||||
|
||||
#define TLS_OPTION_FLAGS \
|
||||
{ TLSOptions::OPT_TLS_PLUGIN, TLS_PLUGIN_FLAG, SO_REQ_SEP }, \
|
||||
{ TLSOptions::OPT_TLS_CERTIFICATES, TLS_CERTIFICATE_FILE_FLAG, SO_REQ_SEP }, \
|
||||
{ TLSOptions::OPT_TLS_KEY, TLS_KEY_FILE_FLAG, SO_REQ_SEP }, \
|
||||
{ TLSOptions::OPT_TLS_VERIFY_PEERS, TLS_VERIFY_PEERS_FLAG, SO_REQ_SEP }, \
|
||||
{ TLSOptions::OPT_TLS_PASSWORD, TLS_PASSWORD_FLAG, SO_REQ_SEP }, \
|
||||
{ TLSOptions::OPT_TLS_CA_FILE, TLS_CA_FILE_FLAG, SO_REQ_SEP },
|
||||
|
||||
#define TLS_HELP \
|
||||
" " TLS_CERTIFICATE_FILE_FLAG " CERTFILE\n" \
|
||||
" The path of a file containing the TLS certificate and CA\n" \
|
||||
" chain.\n" \
|
||||
" " TLS_CA_FILE_FLAG " CERTAUTHFILE\n" \
|
||||
" The path of a file containing the CA certificates chain.\n" \
|
||||
" " TLS_KEY_FILE_FLAG " KEYFILE\n" \
|
||||
" The path of a file containing the private key corresponding\n" \
|
||||
" to the TLS certificate.\n" \
|
||||
" " TLS_PASSWORD_FLAG " PASSCODE\n" \
|
||||
" The passphrase of encrypted private key\n" \
|
||||
" " TLS_VERIFY_PEERS_FLAG " CONSTRAINTS\n" \
|
||||
" The constraints by which to validate TLS peers. The contents\n" \
|
||||
" and format of CONSTRAINTS are plugin-specific.\n"
|
||||
|
||||
#endif /* FLOW_TLSCONNECTION_H */
|
|
@ -33,7 +33,6 @@
|
|||
<ClCompile Include="ReplicationTypes.cpp" />
|
||||
<ClCompile Include="ReplicationPolicy.cpp" />
|
||||
<ClCompile Include="sim_validation.cpp" />
|
||||
<ActorCompiler Include="TLSConnection.actor.cpp" />
|
||||
<ClCompile Include="TraceFileIO.cpp" />
|
||||
<ClCompile Include="zlib\gzwrite.c" />
|
||||
<ClCompile Include="zlib\gzclose.c" />
|
||||
|
@ -88,7 +87,6 @@
|
|||
<ClInclude Include="Platform.h" />
|
||||
<ClInclude Include="fdbrpc.h" />
|
||||
<ClInclude Include="FlowTransport.h" />
|
||||
<ClInclude Include="ITLSPlugin.h" />
|
||||
<ClInclude Include="libcoroutine\Base.h" />
|
||||
<ClInclude Include="libcoroutine\Common.h" />
|
||||
<ClInclude Include="libcoroutine\Coro.h" />
|
||||
|
@ -107,7 +105,6 @@
|
|||
<ClInclude Include="sim_validation.h" />
|
||||
<ClInclude Include="Smoother.h" />
|
||||
<ClInclude Include="TimedRequest.h" />
|
||||
<ClInclude Include="TLSConnection.h" />
|
||||
<ClInclude Include="TraceFileIO.h" />
|
||||
<ClInclude Include="zlib\zlib.h" />
|
||||
<ClInclude Include="zlib\deflate.h" />
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
<ActorCompiler Include="AsyncFileCached.actor.cpp" />
|
||||
<ActorCompiler Include="AsyncFileNonDurable.actor.h" />
|
||||
<ActorCompiler Include="AsyncFileNonDurable.actor.cpp" />
|
||||
<ActorCompiler Include="TLSConnection.actor.cpp" />
|
||||
<ActorCompiler Include="dsltest.actor.cpp" />
|
||||
<ActorCompiler Include="FlowTests.actor.cpp" />
|
||||
<ActorCompiler Include="genericactors.actor.cpp" />
|
||||
|
@ -128,7 +127,6 @@
|
|||
<ClInclude Include="zlib\inftrees.h">
|
||||
<Filter>zlib</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="ITLSPlugin.h" />
|
||||
<ClInclude Include="FailureMonitor.h" />
|
||||
<ClInclude Include="FlowTransport.h" />
|
||||
<ClInclude Include="IAsyncFile.h" />
|
||||
|
@ -143,7 +141,6 @@
|
|||
<ClInclude Include="RangeMap.h" />
|
||||
<ClInclude Include="Smoother.h" />
|
||||
<ClInclude Include="TraceFileIO.h" />
|
||||
<ClInclude Include="TLSConnection.h" />
|
||||
<ClInclude Include="IRateControl.h" />
|
||||
<ClInclude Include="Replication.h" />
|
||||
<ClInclude Include="ReplicationTypes.h" />
|
||||
|
|
|
@ -200,6 +200,9 @@ struct Sim2Conn : IConnection, ReferenceCounted<Sim2Conn> {
|
|||
virtual void delref() { ReferenceCounted<Sim2Conn>::delref(); }
|
||||
virtual void close() { closedByCaller = true; closeInternal(); }
|
||||
|
||||
virtual Future<Void> acceptHandshake() { return delay(0.01*deterministicRandom()->random01()); }
|
||||
virtual Future<Void> connectHandshake() { return delay(0.01*deterministicRandom()->random01()); }
|
||||
|
||||
virtual Future<Void> onWritable() { return whenWritable(this); }
|
||||
virtual Future<Void> onReadable() { return whenReadable(this); }
|
||||
|
||||
|
@ -756,6 +759,12 @@ public:
|
|||
// Everything actually network related is delegated to the Sim2Net class; Sim2 is only concerned with simulating machines and time
|
||||
virtual double now() { return time; }
|
||||
|
||||
// timer() can be up to one second ahead of now()
|
||||
virtual double timer() {
|
||||
timerTime += deterministicRandom()->random01()*(time+1.0-timerTime)/2.0;
|
||||
return timerTime;
|
||||
}
|
||||
|
||||
virtual Future<class Void> delay( double seconds, TaskPriority taskID ) {
|
||||
ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max);
|
||||
return delay( seconds, taskID, currentProcess );
|
||||
|
@ -806,7 +815,7 @@ public:
|
|||
}
|
||||
// Sets the taskID/priority of the current task, without yielding
|
||||
virtual Future<Reference<IConnection>> connect( NetworkAddress toAddr, std::string host ) {
|
||||
ASSERT( !toAddr.isTLS() && host.empty());
|
||||
ASSERT( host.empty());
|
||||
if (!addressMap.count( toAddr )) {
|
||||
return waitForProcessAndConnect( toAddr, this );
|
||||
}
|
||||
|
@ -824,7 +833,7 @@ public:
|
|||
} else {
|
||||
localIp = IPAddress(getCurrentProcess()->address.ip.toV4() + deterministicRandom()->randomInt(0, 256));
|
||||
}
|
||||
peerc->connect(myc, NetworkAddress(localIp, deterministicRandom()->randomInt(40000, 60000)));
|
||||
peerc->connect(myc, NetworkAddress(localIp, deterministicRandom()->randomInt(40000, 60000), false, toAddr.isTLS()));
|
||||
|
||||
((Sim2Listener*)peerp->getListener(toAddr).getPtr())->incomingConnection( 0.5*deterministicRandom()->random01(), Reference<IConnection>(peerc) );
|
||||
return onConnect( ::delay(0.5*deterministicRandom()->random01()), myc );
|
||||
|
@ -845,7 +854,6 @@ public:
|
|||
return conn;
|
||||
}
|
||||
virtual Reference<IListener> listen( NetworkAddress localAddr ) {
|
||||
ASSERT( !localAddr.isTLS() );
|
||||
Reference<IListener> listener( getCurrentProcess()->getListener(localAddr) );
|
||||
ASSERT(listener);
|
||||
return listener;
|
||||
|
@ -994,7 +1002,7 @@ public:
|
|||
Future<Void> loopFuture = runLoop(this);
|
||||
net2->run();
|
||||
}
|
||||
virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, uint16_t listenPerProcess,
|
||||
virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, bool sslEnabled, uint16_t listenPerProcess,
|
||||
LocalityData locality, ProcessClass startingClass, const char* dataFolder,
|
||||
const char* coordinationFolder) {
|
||||
ASSERT( locality.machineId().present() );
|
||||
|
@ -1023,14 +1031,14 @@ public:
|
|||
}
|
||||
|
||||
NetworkAddressList addresses;
|
||||
addresses.address = NetworkAddress(ip, port, true, false);
|
||||
addresses.address = NetworkAddress(ip, port, true, sslEnabled);
|
||||
if(listenPerProcess == 2) {
|
||||
addresses.secondaryAddress = NetworkAddress(ip, port+1, true, false);
|
||||
}
|
||||
|
||||
ProcessInfo* m = new ProcessInfo(name, locality, startingClass, addresses, this, dataFolder, coordinationFolder);
|
||||
for (int processPort = port; processPort < port + listenPerProcess; ++processPort) {
|
||||
NetworkAddress address(ip, processPort, true, false); // SOMEDAY see above about becoming SSL!
|
||||
NetworkAddress address(ip, processPort, true, sslEnabled && processPort == port);
|
||||
m->listenerMap[address] = Reference<IListener>( new Sim2Listener(m, address) );
|
||||
addressMap[address] = m;
|
||||
}
|
||||
|
@ -1563,7 +1571,7 @@ public:
|
|||
return processes;
|
||||
}
|
||||
virtual ProcessInfo* getProcessByAddress( NetworkAddress const& address ) {
|
||||
NetworkAddress normalizedAddress(address.ip, address.port, true, false);
|
||||
NetworkAddress normalizedAddress(address.ip, address.port, true, address.isTLS());
|
||||
ASSERT( addressMap.count( normalizedAddress ) );
|
||||
return addressMap[ normalizedAddress ];
|
||||
}
|
||||
|
@ -1587,7 +1595,7 @@ public:
|
|||
machines.erase(machineId);
|
||||
}
|
||||
|
||||
Sim2() : time(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(TaskPriority::Zero) {
|
||||
Sim2() : time(0.0), timerTime(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(TaskPriority::Zero) {
|
||||
// Not letting currentProcess be NULL eliminates some annoying special cases
|
||||
currentProcess = new ProcessInfo("NoMachine", LocalityData(Optional<Standalone<StringRef>>(), StringRef(), StringRef(), StringRef()), ProcessClass(), {NetworkAddress()}, this, "", "");
|
||||
g_network = net2 = newNet2(false, true);
|
||||
|
@ -1623,6 +1631,7 @@ public:
|
|||
else {
|
||||
mutex.enter();
|
||||
this->time = t.time;
|
||||
this->timerTime = std::max(this->timerTime, this->time);
|
||||
mutex.leave();
|
||||
|
||||
this->currentProcess = t.machine;
|
||||
|
@ -1675,6 +1684,7 @@ public:
|
|||
//time is guarded by ISimulator::mutex. It is not necessary to guard reads on the main thread because
|
||||
//time should only be modified from the main thread.
|
||||
double time;
|
||||
double timerTime;
|
||||
TaskPriority currentTaskID;
|
||||
|
||||
//taskCount is guarded by ISimulator::mutex
|
||||
|
@ -1717,7 +1727,7 @@ ACTOR void doReboot( ISimulator::ProcessInfo *p, ISimulator::KillType kt ) {
|
|||
TEST( kt == ISimulator::RebootAndDelete ); // Simulated machine rebooted with data and coordination state deletion
|
||||
TEST( kt == ISimulator::RebootProcessAndDelete ); // Simulated process rebooted with data and coordination state deletion
|
||||
|
||||
if( p->rebooting )
|
||||
if( p->rebooting || !p->isReliable() )
|
||||
return;
|
||||
TraceEvent("RebootingProcess").detail("KillType", kt).detail("Address", p->address).detail("ZoneId", p->locality.zoneId()).detail("DataHall", p->locality.dataHallId()).detail("Locality", p->locality.toString()).detail("Failed", p->failed).detail("Excluded", p->excluded).detail("Cleared", p->cleared).backtrace();
|
||||
p->rebooting = true;
|
||||
|
|
|
@ -142,7 +142,7 @@ public:
|
|||
virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0;
|
||||
virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0;
|
||||
|
||||
virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, uint16_t listenPerProcess,
|
||||
virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, bool sslEnabled, uint16_t listenPerProcess,
|
||||
LocalityData locality, ProcessClass startingClass, const char* dataFolder,
|
||||
const char* coordinationFolder) = 0;
|
||||
virtual void killProcess( ProcessInfo* machine, KillType ) = 0;
|
||||
|
|
|
@ -57,7 +57,6 @@ struct WorkerInfo : NonCopyable {
|
|||
ReplyPromise<RegisterWorkerReply> reply;
|
||||
Generation gen;
|
||||
int reboots;
|
||||
double lastAvailableTime;
|
||||
ProcessClass initialClass;
|
||||
ClusterControllerPriorityInfo priorityInfo;
|
||||
WorkerDetails details;
|
||||
|
@ -65,19 +64,18 @@ struct WorkerInfo : NonCopyable {
|
|||
Future<Void> haltDistributor;
|
||||
Optional<uint16_t> storageCacheInfo;
|
||||
|
||||
WorkerInfo() : gen(-1), reboots(0), lastAvailableTime(now()), priorityInfo(ProcessClass::UnsetFit, false, ClusterControllerPriorityInfo::FitnessUnknown) {}
|
||||
WorkerInfo() : gen(-1), reboots(0), priorityInfo(ProcessClass::UnsetFit, false, ClusterControllerPriorityInfo::FitnessUnknown) {}
|
||||
WorkerInfo( Future<Void> watcher, ReplyPromise<RegisterWorkerReply> reply, Generation gen, WorkerInterface interf, ProcessClass initialClass, ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo, bool degraded ) :
|
||||
watcher(watcher), reply(reply), gen(gen), reboots(0), lastAvailableTime(now()), initialClass(initialClass), priorityInfo(priorityInfo), details(interf, processClass, degraded) {}
|
||||
watcher(watcher), reply(reply), gen(gen), reboots(0), initialClass(initialClass), priorityInfo(priorityInfo), details(interf, processClass, degraded) {}
|
||||
|
||||
WorkerInfo( WorkerInfo&& r ) BOOST_NOEXCEPT : watcher(std::move(r.watcher)), reply(std::move(r.reply)), gen(r.gen),
|
||||
reboots(r.reboots), lastAvailableTime(r.lastAvailableTime), initialClass(r.initialClass), priorityInfo(r.priorityInfo), details(std::move(r.details)),
|
||||
reboots(r.reboots), initialClass(r.initialClass), priorityInfo(r.priorityInfo), details(std::move(r.details)),
|
||||
haltRatekeeper(r.haltRatekeeper), haltDistributor(r.haltDistributor), storageCacheInfo(r.storageCacheInfo) {}
|
||||
void operator=( WorkerInfo&& r ) BOOST_NOEXCEPT {
|
||||
watcher = std::move(r.watcher);
|
||||
reply = std::move(r.reply);
|
||||
gen = r.gen;
|
||||
reboots = r.reboots;
|
||||
lastAvailableTime = r.lastAvailableTime;
|
||||
initialClass = r.initialClass;
|
||||
priorityInfo = r.priorityInfo;
|
||||
details = std::move(r.details);
|
||||
|
@ -392,7 +390,8 @@ public:
|
|||
std::vector<LocalityData> tLocalities;
|
||||
|
||||
// Try to find the best team of servers to fulfill the policy
|
||||
if (findBestPolicySet(bestSet, logServerSet, policy, desired, SERVER_KNOBS->POLICY_RATING_TESTS, SERVER_KNOBS->POLICY_GENERATIONS)) {
|
||||
if (findBestPolicySet(bestSet, logServerSet, policy, desired, SERVER_KNOBS->POLICY_RATING_TESTS,
|
||||
SERVER_KNOBS->POLICY_GENERATIONS)) {
|
||||
results.reserve(results.size() + bestSet.size());
|
||||
for (auto& entry : bestSet) {
|
||||
auto object = logServerMap->getObject(entry);
|
||||
|
@ -434,8 +433,6 @@ public:
|
|||
TraceEvent("GetTLogTeamDone").detail("Completed", bCompleted).detail("Policy", policy->info()).detail("Results", results.size()).detail("Processes", logServerSet->size()).detail("Workers", id_worker.size())
|
||||
.detail("Required", required).detail("Desired", desired).detail("RatingTests",SERVER_KNOBS->POLICY_RATING_TESTS).detail("PolicyGenerations",SERVER_KNOBS->POLICY_GENERATIONS);
|
||||
|
||||
logServerSet->clear();
|
||||
logServerSet.clear();
|
||||
|
||||
return results;
|
||||
}
|
||||
|
@ -448,7 +445,7 @@ public:
|
|||
if(satelliteFallback || region.satelliteTLogUsableDcsFallback == 0) {
|
||||
throw no_more_servers();
|
||||
} else {
|
||||
if(now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) {
|
||||
if(!goodRecruitmentTime.isReady()) {
|
||||
throw operation_failed();
|
||||
}
|
||||
satelliteFallback = true;
|
||||
|
@ -692,18 +689,8 @@ public:
|
|||
result.logRouters.push_back(logRouters[i].interf);
|
||||
}
|
||||
|
||||
if(!remoteStartTime.present()) {
|
||||
double maxAvailableTime = 0;
|
||||
for(auto& it : result.remoteTLogs) {
|
||||
maxAvailableTime = std::max(maxAvailableTime, id_worker[it.locality.processId()].lastAvailableTime);
|
||||
}
|
||||
for(auto& it : result.logRouters) {
|
||||
maxAvailableTime = std::max(maxAvailableTime, id_worker[it.locality.processId()].lastAvailableTime);
|
||||
}
|
||||
remoteStartTime = maxAvailableTime;
|
||||
}
|
||||
|
||||
if( now() - remoteStartTime.get() < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY &&
|
||||
if( !goodRemoteRecruitmentTime.isReady() &&
|
||||
( ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredRemoteLogs(), ProcessClass::TLog).betterCount(RoleFitness(remoteLogs, ProcessClass::TLog)) ) ||
|
||||
( RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount, ProcessClass::LogRouter).betterCount(RoleFitness(logRouters, ProcessClass::LogRouter)) ) ) ) {
|
||||
throw operation_failed();
|
||||
|
@ -790,7 +777,7 @@ public:
|
|||
[](const WorkerDetails& w) { return w.interf; });
|
||||
}
|
||||
|
||||
if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY &&
|
||||
if( !goodRecruitmentTime.isReady() &&
|
||||
( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
|
||||
( region.satelliteTLogReplicationFactor > 0 && RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId), ProcessClass::TLog).betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog)) ) ||
|
||||
RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), ProcessClass::Proxy).betterCount(RoleFitness(proxies, ProcessClass::Proxy)) ||
|
||||
|
@ -827,7 +814,7 @@ public:
|
|||
}
|
||||
throw no_more_servers();
|
||||
} catch( Error& e ) {
|
||||
if (now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY && regions[1].dcId != clusterControllerDcId.get()) {
|
||||
if (!goodRemoteRecruitmentTime.isReady() && regions[1].dcId != clusterControllerDcId.get()) {
|
||||
throw operation_failed();
|
||||
}
|
||||
|
||||
|
@ -955,7 +942,7 @@ public:
|
|||
.detail("DesiredProxies", req.configuration.getDesiredProxies()).detail("ActualProxies", result.proxies.size())
|
||||
.detail("DesiredResolvers", req.configuration.getDesiredResolvers()).detail("ActualResolvers", result.resolvers.size());
|
||||
|
||||
if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY &&
|
||||
if( !goodRecruitmentTime.isReady() &&
|
||||
( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
|
||||
RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), ProcessClass::Proxy).betterCount(bestFitness.proxy) ||
|
||||
RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers(), ProcessClass::Resolver).betterCount(bestFitness.resolver) ) ) {
|
||||
|
@ -1347,11 +1334,13 @@ public:
|
|||
ActorCollection ac;
|
||||
UpdateWorkerList updateWorkerList;
|
||||
Future<Void> outstandingRequestChecker;
|
||||
Future<Void> outstandingRemoteRequestChecker;
|
||||
|
||||
DBInfo db;
|
||||
Database cx;
|
||||
double startTime;
|
||||
Optional<double> remoteStartTime;
|
||||
Future<Void> goodRecruitmentTime;
|
||||
Future<Void> goodRemoteRecruitmentTime;
|
||||
Version datacenterVersionDifference;
|
||||
PromiseStream<Future<Void>> addActor;
|
||||
bool versionDifferenceUpdated;
|
||||
|
@ -1375,8 +1364,9 @@ public:
|
|||
|
||||
ClusterControllerData( ClusterControllerFullInterface const& ccInterface, LocalityData const& locality )
|
||||
: clusterControllerProcessId(locality.processId()), clusterControllerDcId(locality.dcId()),
|
||||
id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), gotProcessClasses(false),
|
||||
gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0),
|
||||
id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), outstandingRemoteRequestChecker(Void()), gotProcessClasses(false),
|
||||
gotFullyRecoveredConfig(false), startTime(now()), goodRecruitmentTime(Never()),
|
||||
goodRemoteRecruitmentTime(Never()), datacenterVersionDifference(0),
|
||||
versionDifferenceUpdated(false), recruitingDistributor(false), recruitRatekeeper(false),
|
||||
clusterControllerMetrics("ClusterController", id.toString()),
|
||||
openDatabaseRequests("OpenDatabaseRequests", clusterControllerMetrics),
|
||||
|
@ -1424,7 +1414,7 @@ ACTOR Future<Void> clusterWatchDatabase( ClusterControllerData* cluster, Cluster
|
|||
id_used[cluster->clusterControllerProcessId]++;
|
||||
state WorkerFitnessInfo masterWorker = cluster->getWorkerForRoleInDatacenter(cluster->clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db->config, id_used);
|
||||
if( ( masterWorker.worker.processClass.machineClassFitness( ProcessClass::Master ) > SERVER_KNOBS->EXPECTED_MASTER_FITNESS || masterWorker.worker.interf.locality.processId() == cluster->clusterControllerProcessId )
|
||||
&& now() - cluster->startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY ) {
|
||||
&& !cluster->goodRecruitmentTime.isReady() ) {
|
||||
TraceEvent("CCWDB", cluster->id).detail("Fitness", masterWorker.worker.processClass.machineClassFitness( ProcessClass::Master ));
|
||||
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||
continue;
|
||||
|
@ -1703,9 +1693,11 @@ void checkBetterDDOrRK(ClusterControllerData* self) {
|
|||
ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) {
|
||||
try {
|
||||
wait( delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL) );
|
||||
while( !self->goodRecruitmentTime.isReady() ) {
|
||||
wait(self->goodRecruitmentTime);
|
||||
}
|
||||
|
||||
checkOutstandingRecruitmentRequests( self );
|
||||
checkOutstandingRemoteRecruitmentRequests( self );
|
||||
checkOutstandingStorageRequests( self );
|
||||
checkBetterDDOrRK(self);
|
||||
|
||||
|
@ -1715,7 +1707,23 @@ ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) {
|
|||
TraceEvent("MasterRegistrationKill", self->id).detail("MasterId", self->db.serverInfo->get().read().master.id());
|
||||
}
|
||||
} catch( Error &e ) {
|
||||
if(e.code() != error_code_operation_failed && e.code() != error_code_no_more_servers) {
|
||||
if(e.code() != error_code_no_more_servers) {
|
||||
TraceEvent(SevError, "CheckOutstandingError").error(e);
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> doCheckOutstandingRemoteRequests( ClusterControllerData* self ) {
|
||||
try {
|
||||
wait( delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL) );
|
||||
while( !self->goodRemoteRecruitmentTime.isReady() ) {
|
||||
wait(self->goodRemoteRecruitmentTime);
|
||||
}
|
||||
|
||||
checkOutstandingRemoteRecruitmentRequests( self );
|
||||
} catch( Error &e ) {
|
||||
if(e.code() != error_code_no_more_servers) {
|
||||
TraceEvent(SevError, "CheckOutstandingError").error(e);
|
||||
}
|
||||
}
|
||||
|
@ -1723,10 +1731,13 @@ ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) {
|
|||
}
|
||||
|
||||
void checkOutstandingRequests( ClusterControllerData* self ) {
|
||||
if( !self->outstandingRequestChecker.isReady() )
|
||||
return;
|
||||
if( self->outstandingRemoteRequestChecker.isReady() ) {
|
||||
self->outstandingRemoteRequestChecker = doCheckOutstandingRemoteRequests(self);
|
||||
}
|
||||
|
||||
self->outstandingRequestChecker = doCheckOutstandingRequests(self);
|
||||
if( self->outstandingRequestChecker.isReady() ) {
|
||||
self->outstandingRequestChecker = doCheckOutstandingRequests(self);
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> rebootAndCheck( ClusterControllerData* cluster, Optional<Standalone<StringRef>> processID ) {
|
||||
|
@ -1734,7 +1745,6 @@ ACTOR Future<Void> rebootAndCheck( ClusterControllerData* cluster, Optional<Stan
|
|||
auto watcher = cluster->id_worker.find(processID);
|
||||
ASSERT(watcher != cluster->id_worker.end());
|
||||
|
||||
watcher->second.lastAvailableTime = now();
|
||||
watcher->second.reboots++;
|
||||
wait( delay( g_network->isSimulated() ? SERVER_KNOBS->SIM_SHUTDOWN_TIMEOUT : SERVER_KNOBS->SHUTDOWN_TIMEOUT ) );
|
||||
}
|
||||
|
@ -1998,7 +2008,7 @@ ACTOR Future<Void> clusterRecruitFromConfiguration( ClusterControllerData* self,
|
|||
req.reply.send( rep );
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_no_more_servers && now() - self->startTime >= SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) {
|
||||
if (e.code() == error_code_no_more_servers && self->goodRecruitmentTime.isReady()) {
|
||||
self->outstandingRecruitmentRequests.push_back( req );
|
||||
TraceEvent(SevWarn, "RecruitFromConfigurationNotAvailable", self->id).error(e);
|
||||
return Void();
|
||||
|
@ -2010,7 +2020,7 @@ ACTOR Future<Void> clusterRecruitFromConfiguration( ClusterControllerData* self,
|
|||
throw; // goodbye, cluster controller
|
||||
}
|
||||
}
|
||||
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||
wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2026,7 +2036,7 @@ ACTOR Future<Void> clusterRecruitRemoteFromConfiguration( ClusterControllerData*
|
|||
req.reply.send( rep );
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
if (e.code() == error_code_no_more_servers && self->remoteStartTime.present() && now() - self->remoteStartTime.get() >= SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) {
|
||||
if (e.code() == error_code_no_more_servers && self->goodRemoteRecruitmentTime.isReady()) {
|
||||
self->outstandingRemoteRecruitmentRequests.push_back( req );
|
||||
TraceEvent(SevWarn, "RecruitRemoteFromConfigurationNotAvailable", self->id).error(e);
|
||||
return Void();
|
||||
|
@ -2038,7 +2048,7 @@ ACTOR Future<Void> clusterRecruitRemoteFromConfiguration( ClusterControllerData*
|
|||
throw; // goodbye, cluster controller
|
||||
}
|
||||
}
|
||||
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||
wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2141,6 +2151,8 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) {
|
|||
|
||||
if(info == self->id_worker.end()) {
|
||||
TraceEvent("ClusterControllerActualWorkers", self->id).detail("WorkerId",w.id()).detail("ProcessId", w.locality.processId()).detail("ZoneId", w.locality.zoneId()).detail("DataHall", w.locality.dataHallId()).detail("PClass", req.processClass.toString()).detail("Workers", self->id_worker.size());
|
||||
self->goodRecruitmentTime = lowPriorityDelay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY);
|
||||
self->goodRemoteRecruitmentTime = lowPriorityDelay(SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY);
|
||||
} else {
|
||||
TraceEvent("ClusterControllerWorkerAlreadyRegistered", self->id).suppressFor(1.0).detail("WorkerId",w.id()).detail("ProcessId", w.locality.processId()).detail("ZoneId", w.locality.zoneId()).detail("DataHall", w.locality.dataHallId()).detail("PClass", req.processClass.toString()).detail("Workers", self->id_worker.size());
|
||||
}
|
||||
|
@ -2928,7 +2940,7 @@ ACTOR Future<DataDistributorInterface> startDataDistributor( ClusterControllerDa
|
|||
throw;
|
||||
}
|
||||
}
|
||||
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||
wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3002,7 +3014,7 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData *self) {
|
|||
throw;
|
||||
}
|
||||
}
|
||||
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||
wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -401,7 +401,7 @@ struct LeaderRegisterCollection {
|
|||
if( !self->pStore->exists() )
|
||||
return Void();
|
||||
OnDemandStore &store = *self->pStore;
|
||||
Standalone<VectorRef<KeyValueRef>> forwardingInfo = wait( store->readRange( fwdKeys ) );
|
||||
Standalone<RangeResultRef> forwardingInfo = wait( store->readRange( fwdKeys ) );
|
||||
for( int i = 0; i < forwardingInfo.size(); i++ ) {
|
||||
LeaderInfo forwardInfo;
|
||||
forwardInfo.forward = true;
|
||||
|
|
|
@ -251,63 +251,68 @@ public:
|
|||
|
||||
virtual int64_t getLoadBytes( bool includeInFlight = true, double inflightPenalty = 1.0 ) {
|
||||
int64_t physicalBytes = getLoadAverage();
|
||||
double minFreeSpaceRatio = getMinFreeSpaceRatio(includeInFlight);
|
||||
double minAvailableSpaceRatio = getMinAvailableSpaceRatio(includeInFlight);
|
||||
int64_t inFlightBytes = includeInFlight ? getDataInFlightToTeam() / servers.size() : 0;
|
||||
double freeSpaceMultiplier = SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF / ( std::max( std::min( SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF, minFreeSpaceRatio ), 0.000001 ) );
|
||||
double availableSpaceMultiplier = SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF / ( std::max( std::min( SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF, minAvailableSpaceRatio ), 0.000001 ) );
|
||||
if(servers.size()>2) {
|
||||
//make sure in triple replication the penalty is high enough that you will always avoid a team with a member at 20% free space
|
||||
availableSpaceMultiplier = availableSpaceMultiplier * availableSpaceMultiplier;
|
||||
}
|
||||
|
||||
if(freeSpaceMultiplier > 1 && deterministicRandom()->random01() < 0.001)
|
||||
TraceEvent(SevWarn, "DiskNearCapacity").detail("FreeSpaceRatio", minFreeSpaceRatio);
|
||||
if(minAvailableSpaceRatio < SERVER_KNOBS->TARGET_AVAILABLE_SPACE_RATIO) {
|
||||
TraceEvent(SevWarn, "DiskNearCapacity").suppressFor(1.0).detail("AvailableSpaceRatio", minAvailableSpaceRatio);
|
||||
}
|
||||
|
||||
return (physicalBytes + (inflightPenalty*inFlightBytes)) * freeSpaceMultiplier;
|
||||
return (physicalBytes + (inflightPenalty*inFlightBytes)) * availableSpaceMultiplier;
|
||||
}
|
||||
|
||||
virtual int64_t getMinFreeSpace( bool includeInFlight = true ) {
|
||||
int64_t minFreeSpace = std::numeric_limits<int64_t>::max();
|
||||
virtual int64_t getMinAvailableSpace( bool includeInFlight = true ) {
|
||||
int64_t minAvailableSpace = std::numeric_limits<int64_t>::max();
|
||||
for(int i=0; i<servers.size(); i++) {
|
||||
if( servers[i]->serverMetrics.present() ) {
|
||||
auto& replyValue = servers[i]->serverMetrics.get();
|
||||
|
||||
ASSERT(replyValue.free.bytes >= 0);
|
||||
ASSERT(replyValue.available.bytes >= 0);
|
||||
ASSERT(replyValue.capacity.bytes >= 0);
|
||||
|
||||
int64_t bytesFree = replyValue.free.bytes;
|
||||
int64_t bytesAvailable = replyValue.available.bytes;
|
||||
if(includeInFlight) {
|
||||
bytesFree -= servers[i]->dataInFlightToServer;
|
||||
bytesAvailable -= servers[i]->dataInFlightToServer;
|
||||
}
|
||||
|
||||
minFreeSpace = std::min(bytesFree, minFreeSpace);
|
||||
minAvailableSpace = std::min(bytesAvailable, minAvailableSpace);
|
||||
}
|
||||
}
|
||||
|
||||
return minFreeSpace; // Could be negative
|
||||
return minAvailableSpace; // Could be negative
|
||||
}
|
||||
|
||||
virtual double getMinFreeSpaceRatio( bool includeInFlight = true ) {
|
||||
virtual double getMinAvailableSpaceRatio( bool includeInFlight = true ) {
|
||||
double minRatio = 1.0;
|
||||
for(int i=0; i<servers.size(); i++) {
|
||||
if( servers[i]->serverMetrics.present() ) {
|
||||
auto& replyValue = servers[i]->serverMetrics.get();
|
||||
|
||||
ASSERT(replyValue.free.bytes >= 0);
|
||||
ASSERT(replyValue.available.bytes >= 0);
|
||||
ASSERT(replyValue.capacity.bytes >= 0);
|
||||
|
||||
int64_t bytesFree = replyValue.free.bytes;
|
||||
int64_t bytesAvailable = replyValue.available.bytes;
|
||||
if(includeInFlight) {
|
||||
bytesFree = std::max((int64_t)0, bytesFree - servers[i]->dataInFlightToServer);
|
||||
bytesAvailable = std::max((int64_t)0, bytesAvailable - servers[i]->dataInFlightToServer);
|
||||
}
|
||||
|
||||
if(replyValue.capacity.bytes == 0)
|
||||
minRatio = 0;
|
||||
else
|
||||
minRatio = std::min( minRatio, ((double)bytesFree) / replyValue.capacity.bytes );
|
||||
minRatio = std::min( minRatio, ((double)bytesAvailable) / replyValue.capacity.bytes );
|
||||
}
|
||||
}
|
||||
|
||||
return minRatio;
|
||||
}
|
||||
|
||||
virtual bool hasHealthyFreeSpace() {
|
||||
return getMinFreeSpaceRatio() > SERVER_KNOBS->MIN_FREE_SPACE_RATIO && getMinFreeSpace() > SERVER_KNOBS->MIN_FREE_SPACE;
|
||||
virtual bool hasHealthyAvailableSpace(double minRatio) {
|
||||
return getMinAvailableSpaceRatio() >= minRatio && getMinAvailableSpace() > SERVER_KNOBS->MIN_AVAILABLE_SPACE;
|
||||
}
|
||||
|
||||
virtual Future<Void> updateStorageMetrics() {
|
||||
|
@ -638,6 +643,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
std::vector<DDTeamCollection*> teamCollections;
|
||||
AsyncVar<Optional<Key>> healthyZone;
|
||||
Future<bool> clearHealthyZoneFuture;
|
||||
double medianAvailableSpace;
|
||||
double lastMedianAvailableSpaceUpdate;
|
||||
// clang-format on
|
||||
|
||||
void resetLocalitySet() {
|
||||
|
@ -682,8 +689,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)),
|
||||
optimalTeamCount(0), recruitingStream(0), restartRecruiting(SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY),
|
||||
unhealthyServers(0), includedDCs(includedDCs), otherTrackedDCs(otherTrackedDCs),
|
||||
zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary),
|
||||
processingUnhealthy(processingUnhealthy) {
|
||||
zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary), medianAvailableSpace(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO),
|
||||
lastMedianAvailableSpaceUpdate(0), processingUnhealthy(processingUnhealthy) {
|
||||
if(!primary || configuration.usableRegions == 1) {
|
||||
TraceEvent("DDTrackerStarting", distributorId)
|
||||
.detail( "State", "Inactive" )
|
||||
|
@ -757,6 +764,24 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
ACTOR static Future<Void> getTeam( DDTeamCollection* self, GetTeamRequest req ) {
|
||||
try {
|
||||
wait( self->checkBuildTeams( self ) );
|
||||
if(now() - self->lastMedianAvailableSpaceUpdate > SERVER_KNOBS->AVAILABLE_SPACE_UPDATE_DELAY) {
|
||||
self->lastMedianAvailableSpaceUpdate = now();
|
||||
std::vector<double> teamAvailableSpace;
|
||||
teamAvailableSpace.reserve(self->teams.size());
|
||||
for( int i = 0; i < self->teams.size(); i++ ) {
|
||||
if (self->teams[i]->isHealthy()) {
|
||||
teamAvailableSpace.push_back(self->teams[i]->getMinAvailableSpaceRatio());
|
||||
}
|
||||
}
|
||||
|
||||
size_t pivot = teamAvailableSpace.size()/2;
|
||||
if (teamAvailableSpace.size() > 1) {
|
||||
std::nth_element(teamAvailableSpace.begin(), teamAvailableSpace.begin()+pivot, teamAvailableSpace.end());
|
||||
self->medianAvailableSpace = std::max(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO, std::min(SERVER_KNOBS->TARGET_AVAILABLE_SPACE_RATIO, teamAvailableSpace[pivot]));
|
||||
} else {
|
||||
self->medianAvailableSpace = SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO;
|
||||
}
|
||||
}
|
||||
|
||||
// Select the best team
|
||||
// Currently the metric is minimum used disk space (adjusted for data in flight)
|
||||
|
@ -777,6 +802,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
std::vector<Reference<IDataDistributionTeam>> randomTeams;
|
||||
const std::set<UID> completeSources(req.completeSources.begin(), req.completeSources.end());
|
||||
|
||||
// Note: this block does not apply any filters from the request
|
||||
if( !req.wantsNewServers ) {
|
||||
for( int i = 0; i < req.completeSources.size(); i++ ) {
|
||||
if( !self->server_info.count( req.completeSources[i] ) ) {
|
||||
|
@ -803,7 +829,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
if( req.wantsTrueBest ) {
|
||||
ASSERT( !bestOption.present() );
|
||||
for( int i = 0; i < self->teams.size(); i++ ) {
|
||||
if( self->teams[i]->isHealthy() && (!req.preferLowerUtilization || self->teams[i]->hasHealthyFreeSpace()) ) {
|
||||
if (self->teams[i]->isHealthy() &&
|
||||
(!req.preferLowerUtilization || self->teams[i]->hasHealthyAvailableSpace(self->medianAvailableSpace)) &&
|
||||
(!req.teamMustHaveShards || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(self->teams[i]->getServerIDs(), self->primary)).size() > 0))
|
||||
{
|
||||
int64_t loadBytes = self->teams[i]->getLoadBytes(true, req.inflightPenalty);
|
||||
if( !bestOption.present() || ( req.preferLowerUtilization && loadBytes < bestLoadBytes ) || ( !req.preferLowerUtilization && loadBytes > bestLoadBytes ) ) {
|
||||
bestLoadBytes = loadBytes;
|
||||
|
@ -818,7 +847,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
// If unhealthy team is majority, we may not find an ok dest in this while loop
|
||||
Reference<IDataDistributionTeam> dest = deterministicRandom()->randomChoice(self->teams);
|
||||
|
||||
bool ok = dest->isHealthy() && (!req.preferLowerUtilization || dest->hasHealthyFreeSpace());
|
||||
bool ok = dest->isHealthy() &&
|
||||
(!req.preferLowerUtilization || dest->hasHealthyAvailableSpace(self->medianAvailableSpace)) &&
|
||||
(!req.teamMustHaveShards || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(dest->getServerIDs(), self->primary)).size() > 0);
|
||||
|
||||
for(int i=0; ok && i<randomTeams.size(); i++) {
|
||||
if (randomTeams[i]->getServerIDs() == dest->getServerIDs()) {
|
||||
ok = false;
|
||||
|
@ -848,6 +880,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
|
||||
// Note: req.completeSources can be empty and all servers (and server teams) can be unhealthy.
|
||||
// We will get stuck at this! This only happens when a DC fails. No need to consider it right now.
|
||||
// Note: this block does not apply any filters from the request
|
||||
if(!bestOption.present() && self->zeroHealthyTeams->get()) {
|
||||
//Attempt to find the unhealthy source server team and return it
|
||||
for( int i = 0; i < req.completeSources.size(); i++ ) {
|
||||
|
@ -1317,7 +1350,6 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
TraceEvent("ServerTeamInfo", distributorId)
|
||||
.detail("TeamIndex", i++)
|
||||
.detail("Healthy", team->isHealthy())
|
||||
.detail("HasHealthyFreeSpace", team->hasHealthyFreeSpace())
|
||||
.detail("TeamSize", team->size())
|
||||
.detail("MemberIDs", team->getServerIDsStr());
|
||||
}
|
||||
|
|
|
@ -45,9 +45,9 @@ struct IDataDistributionTeam {
|
|||
virtual void addDataInFlightToTeam( int64_t delta ) = 0;
|
||||
virtual int64_t getDataInFlightToTeam() = 0;
|
||||
virtual int64_t getLoadBytes( bool includeInFlight = true, double inflightPenalty = 1.0 ) = 0;
|
||||
virtual int64_t getMinFreeSpace( bool includeInFlight = true ) = 0;
|
||||
virtual double getMinFreeSpaceRatio( bool includeInFlight = true ) = 0;
|
||||
virtual bool hasHealthyFreeSpace() = 0;
|
||||
virtual int64_t getMinAvailableSpace( bool includeInFlight = true ) = 0;
|
||||
virtual double getMinAvailableSpaceRatio( bool includeInFlight = true ) = 0;
|
||||
virtual bool hasHealthyAvailableSpace( double minRatio ) = 0;
|
||||
virtual Future<Void> updateStorageMetrics() = 0;
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
|
@ -75,18 +75,22 @@ struct GetTeamRequest {
|
|||
bool wantsNewServers;
|
||||
bool wantsTrueBest;
|
||||
bool preferLowerUtilization;
|
||||
bool teamMustHaveShards;
|
||||
double inflightPenalty;
|
||||
std::vector<UID> completeSources;
|
||||
Promise< Optional< Reference<IDataDistributionTeam> > > reply;
|
||||
|
||||
GetTeamRequest() {}
|
||||
GetTeamRequest( bool wantsNewServers, bool wantsTrueBest, bool preferLowerUtilization, double inflightPenalty = 1.0 ) : wantsNewServers( wantsNewServers ), wantsTrueBest( wantsTrueBest ), preferLowerUtilization( preferLowerUtilization ), inflightPenalty( inflightPenalty ) {}
|
||||
|
||||
GetTeamRequest( bool wantsNewServers, bool wantsTrueBest, bool preferLowerUtilization, bool teamMustHaveShards, double inflightPenalty = 1.0 )
|
||||
: wantsNewServers( wantsNewServers ), wantsTrueBest( wantsTrueBest ), preferLowerUtilization( preferLowerUtilization ), teamMustHaveShards( teamMustHaveShards ), inflightPenalty( inflightPenalty ) {}
|
||||
|
||||
std::string getDesc() {
|
||||
std::stringstream ss;
|
||||
|
||||
ss << "WantsNewServers:" << wantsNewServers << " WantsTrueBest:" << wantsTrueBest
|
||||
<< " PreferLowerUtilization:" << preferLowerUtilization << " inflightPenalty:" << inflightPenalty << ";";
|
||||
<< " PreferLowerUtilization:" << preferLowerUtilization
|
||||
<< " teamMustHaveShards:" << teamMustHaveShards
|
||||
<< " inflightPenalty:" << inflightPenalty << ";";
|
||||
ss << "CompleteSources:";
|
||||
for (auto& cs : completeSources) {
|
||||
ss << cs.toString() << ",";
|
||||
|
|
|
@ -170,25 +170,25 @@ public:
|
|||
});
|
||||
}
|
||||
|
||||
virtual int64_t getMinFreeSpace(bool includeInFlight = true) {
|
||||
virtual int64_t getMinAvailableSpace(bool includeInFlight = true) {
|
||||
int64_t result = std::numeric_limits<int64_t>::max();
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
result = std::min(result, (*it)->getMinFreeSpace(includeInFlight));
|
||||
result = std::min(result, (*it)->getMinAvailableSpace(includeInFlight));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual double getMinFreeSpaceRatio(bool includeInFlight = true) {
|
||||
virtual double getMinAvailableSpaceRatio(bool includeInFlight = true) {
|
||||
double result = std::numeric_limits<double>::max();
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
result = std::min(result, (*it)->getMinFreeSpaceRatio(includeInFlight));
|
||||
result = std::min(result, (*it)->getMinAvailableSpaceRatio(includeInFlight));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual bool hasHealthyFreeSpace() {
|
||||
return all([](Reference<IDataDistributionTeam> team) {
|
||||
return team->hasHealthyFreeSpace();
|
||||
virtual bool hasHealthyAvailableSpace(double minRatio) {
|
||||
return all([minRatio](Reference<IDataDistributionTeam> team) {
|
||||
return team->hasHealthyAvailableSpace(minRatio);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -938,7 +938,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
|
|||
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
|
||||
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;
|
||||
|
||||
auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, inflightPenalty);
|
||||
auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, false, inflightPenalty);
|
||||
req.completeSources = rd.completeSources;
|
||||
Optional<Reference<IDataDistributionTeam>> bestTeam = wait(brokenPromiseToNever(self->teamCollections[tciIndex].getTeam.getReply(req)));
|
||||
// If a DC has no healthy team, we stop checking the other DCs until
|
||||
|
@ -1136,8 +1136,10 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
|
|||
}
|
||||
|
||||
// Move a random shard of sourceTeam's to destTeam if sourceTeam has much more data than destTeam
|
||||
ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<IDataDistributionTeam> sourceTeam, Reference<IDataDistributionTeam> destTeam, bool primary ) {
|
||||
ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<IDataDistributionTeam> sourceTeam,
|
||||
Reference<IDataDistributionTeam> destTeam, bool primary, TraceEvent *traceEvent ) {
|
||||
if(g_network->isSimulated() && g_simulator.speedUpSimulation) {
|
||||
traceEvent->detail("CancelingDueToSimulationSpeedup", true);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1147,6 +1149,9 @@ ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<ID
|
|||
state int64_t averageShardBytes = wait(req.getFuture());
|
||||
state std::vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) );
|
||||
|
||||
traceEvent->detail("AverageShardBytes", averageShardBytes)
|
||||
.detail("ShardsInSource", shards.size());
|
||||
|
||||
if( !shards.size() )
|
||||
return false;
|
||||
|
||||
|
@ -1168,28 +1173,28 @@ ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<ID
|
|||
|
||||
int64_t sourceBytes = sourceTeam->getLoadBytes(false);
|
||||
int64_t destBytes = destTeam->getLoadBytes();
|
||||
if( sourceBytes - destBytes <= 3 * std::max<int64_t>( SERVER_KNOBS->MIN_SHARD_BYTES, metrics.bytes ) || metrics.bytes == 0 )
|
||||
|
||||
bool sourceAndDestTooSimilar = sourceBytes - destBytes <= 3 * std::max<int64_t>(SERVER_KNOBS->MIN_SHARD_BYTES, metrics.bytes);
|
||||
traceEvent->detail("SourceBytes", sourceBytes)
|
||||
.detail("DestBytes", destBytes)
|
||||
.detail("ShardBytes", metrics.bytes)
|
||||
.detail("SourceAndDestTooSimilar", sourceAndDestTooSimilar);
|
||||
|
||||
if( sourceAndDestTooSimilar || metrics.bytes == 0 ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
//verify the shard is still in sabtf
|
||||
std::vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) );
|
||||
for( int i = 0; i < shards.size(); i++ ) {
|
||||
if( moveShard == shards[i] ) {
|
||||
TraceEvent(priority == SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM ? "BgDDMountainChopper" : "BgDDValleyFiller", self->distributorId)
|
||||
.detail("SourceBytes", sourceBytes)
|
||||
.detail("DestBytes", destBytes)
|
||||
.detail("ShardBytes", metrics.bytes)
|
||||
.detail("AverageShardBytes", averageShardBytes)
|
||||
.detail("SourceTeam", sourceTeam->getDesc())
|
||||
.detail("DestTeam", destTeam->getDesc());
|
||||
|
||||
self->output.send( RelocateShard( moveShard, priority ) );
|
||||
return true;
|
||||
}
|
||||
//verify the shard is still in sabtf
|
||||
shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) );
|
||||
for( int i = 0; i < shards.size(); i++ ) {
|
||||
if( moveShard == shards[i] ) {
|
||||
traceEvent->detail("ShardStillPresent", true);
|
||||
self->output.send( RelocateShard( moveShard, priority ) );
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
traceEvent->detail("ShardStillPresent", false);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1200,6 +1205,15 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
|
|||
state double lastRead = 0;
|
||||
state bool skipCurrentLoop = false;
|
||||
loop {
|
||||
state bool moved = false;
|
||||
state TraceEvent traceEvent("BgDDMountainChopper", self->distributorId);
|
||||
traceEvent.suppressFor(5.0)
|
||||
.detail("PollingInterval", rebalancePollingInterval);
|
||||
|
||||
if(*self->lastLimited > 0) {
|
||||
traceEvent.detail("SecondsSinceLastLimited", now() - *self->lastLimited);
|
||||
}
|
||||
|
||||
try {
|
||||
state Future<Void> delayF = delay(rebalancePollingInterval, TaskPriority::DataDistributionLaunch);
|
||||
if ((now() - lastRead) > SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL) {
|
||||
|
@ -1212,6 +1226,9 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
|
|||
}
|
||||
skipCurrentLoop = val.present();
|
||||
}
|
||||
|
||||
traceEvent.detail("Enabled", !skipCurrentLoop);
|
||||
|
||||
wait(delayF);
|
||||
if (skipCurrentLoop) {
|
||||
// set loop interval to avoid busy wait here.
|
||||
|
@ -1219,26 +1236,35 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
|
|||
std::max(rebalancePollingInterval, SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL);
|
||||
continue;
|
||||
}
|
||||
|
||||
traceEvent.detail("QueuedRelocations", self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM]);
|
||||
if (self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM] <
|
||||
SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
|
||||
state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever(
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, true))));
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, true, false))));
|
||||
|
||||
traceEvent.detail("DestTeam", printable(randomTeam.map<std::string>([](const Reference<IDataDistributionTeam>& team){
|
||||
return team->getDesc();
|
||||
})));
|
||||
|
||||
if (randomTeam.present()) {
|
||||
// Destination team must be healthy and have healthyFreeSpace, otherwise, BestTeamStuck may occur
|
||||
if (randomTeam.get()->getMinFreeSpaceRatio() > SERVER_KNOBS->FREE_SPACE_RATIO_DD_CUTOFF &&
|
||||
randomTeam.get()->hasHealthyFreeSpace()) {
|
||||
state Optional<Reference<IDataDistributionTeam>> loadedTeam =
|
||||
wait(brokenPromiseToNever(self->teamCollections[teamCollectionIndex].getTeam.getReply(
|
||||
GetTeamRequest(true, true, false))));
|
||||
if (loadedTeam.present()) {
|
||||
bool moved =
|
||||
wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM, loadedTeam.get(),
|
||||
randomTeam.get(), teamCollectionIndex == 0));
|
||||
if (moved) {
|
||||
resetCount = 0;
|
||||
} else {
|
||||
resetCount++;
|
||||
}
|
||||
state Optional<Reference<IDataDistributionTeam>> loadedTeam =
|
||||
wait(brokenPromiseToNever(self->teamCollections[teamCollectionIndex].getTeam.getReply(
|
||||
GetTeamRequest(true, true, false, true))));
|
||||
|
||||
traceEvent.detail("SourceTeam", printable(loadedTeam.map<std::string>([](const Reference<IDataDistributionTeam>& team){
|
||||
return team->getDesc();
|
||||
})));
|
||||
|
||||
if (loadedTeam.present()) {
|
||||
bool _moved =
|
||||
wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM, loadedTeam.get(),
|
||||
randomTeam.get(), teamCollectionIndex == 0, &traceEvent));
|
||||
moved = _moved;
|
||||
if (moved) {
|
||||
resetCount = 0;
|
||||
} else {
|
||||
resetCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1257,10 +1283,16 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
|
|||
rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL;
|
||||
resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
|
||||
}
|
||||
|
||||
traceEvent.detail("ResetCount", resetCount);
|
||||
tr.reset();
|
||||
} catch (Error& e) {
|
||||
traceEvent.error(e, true); // Log actor_cancelled because it's not legal to suppress an event that's initialized
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
|
||||
traceEvent.detail("Moved", moved);
|
||||
traceEvent.log();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1271,6 +1303,15 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
|
|||
state double lastRead = 0;
|
||||
state bool skipCurrentLoop = false;
|
||||
loop {
|
||||
state bool moved = false;
|
||||
state TraceEvent traceEvent("BgDDValleyFiller", self->distributorId);
|
||||
traceEvent.suppressFor(5.0)
|
||||
.detail("PollingInterval", rebalancePollingInterval);
|
||||
|
||||
if(*self->lastLimited > 0) {
|
||||
traceEvent.detail("SecondsSinceLastLimited", now() - *self->lastLimited);
|
||||
}
|
||||
|
||||
try {
|
||||
state Future<Void> delayF = delay(rebalancePollingInterval, TaskPriority::DataDistributionLaunch);
|
||||
if ((now() - lastRead) > SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL) {
|
||||
|
@ -1283,6 +1324,9 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
|
|||
}
|
||||
skipCurrentLoop = val.present();
|
||||
}
|
||||
|
||||
traceEvent.detail("Enabled", !skipCurrentLoop);
|
||||
|
||||
wait(delayF);
|
||||
if (skipCurrentLoop) {
|
||||
// set loop interval to avoid busy wait here.
|
||||
|
@ -1290,25 +1334,34 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
|
|||
std::max(rebalancePollingInterval, SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL);
|
||||
continue;
|
||||
}
|
||||
|
||||
traceEvent.detail("QueuedRelocations", self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM]);
|
||||
if (self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM] <
|
||||
SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
|
||||
state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever(
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, false))));
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, false, true))));
|
||||
|
||||
traceEvent.detail("SourceTeam", printable(randomTeam.map<std::string>([](const Reference<IDataDistributionTeam>& team){
|
||||
return team->getDesc();
|
||||
})));
|
||||
|
||||
if (randomTeam.present()) {
|
||||
state Optional<Reference<IDataDistributionTeam>> unloadedTeam = wait(brokenPromiseToNever(
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, true, true))));
|
||||
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, true, true, false))));
|
||||
|
||||
traceEvent.detail("DestTeam", printable(unloadedTeam.map<std::string>([](const Reference<IDataDistributionTeam>& team){
|
||||
return team->getDesc();
|
||||
})));
|
||||
|
||||
if (unloadedTeam.present()) {
|
||||
// Destination team must be healthy and healthyFreeSpace, otherwise, BestTeamStuck may occur
|
||||
if (unloadedTeam.get()->getMinFreeSpaceRatio() > SERVER_KNOBS->FREE_SPACE_RATIO_DD_CUTOFF &&
|
||||
unloadedTeam.get()->hasHealthyFreeSpace()) {
|
||||
bool moved =
|
||||
wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, randomTeam.get(),
|
||||
unloadedTeam.get(), teamCollectionIndex == 0));
|
||||
if (moved) {
|
||||
resetCount = 0;
|
||||
} else {
|
||||
resetCount++;
|
||||
}
|
||||
bool _moved =
|
||||
wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, randomTeam.get(),
|
||||
unloadedTeam.get(), teamCollectionIndex == 0, &traceEvent));
|
||||
moved = _moved;
|
||||
if (moved) {
|
||||
resetCount = 0;
|
||||
} else {
|
||||
resetCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1327,10 +1380,16 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
|
|||
rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL;
|
||||
resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
|
||||
}
|
||||
|
||||
traceEvent.detail("ResetCount", resetCount);
|
||||
tr.reset();
|
||||
} catch (Error& e) {
|
||||
traceEvent.error(e, true); // Log actor_cancelled because it's not legal to suppress an event that's initialized
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
|
||||
traceEvent.detail("Moved", moved);
|
||||
traceEvent.log();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ public:
|
|||
|
||||
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
|
||||
// The total size of the returned value (less the last entry) will be less than byteLimit
|
||||
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) = 0;
|
||||
virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) = 0;
|
||||
|
||||
// To debug MEMORY_RADIXTREE type ONLY
|
||||
// Returns (1) how many key & value pairs have been inserted (2) how many nodes have been created (3) how many
|
||||
|
|
|
@ -77,12 +77,12 @@ struct KeyValueStoreCompressTestData : IKeyValueStore {
|
|||
|
||||
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
|
||||
// The total size of the returned value (less the last entry) will be less than byteLimit
|
||||
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) {
|
||||
virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) {
|
||||
return doReadRange(store, keys, rowLimit, byteLimit);
|
||||
}
|
||||
ACTOR Future<Standalone<VectorRef<KeyValueRef>>> doReadRange( IKeyValueStore* store, KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
||||
Standalone<VectorRef<KeyValueRef>> _vs = wait( store->readRange(keys, rowLimit, byteLimit) );
|
||||
Standalone<VectorRef<KeyValueRef>> vs = _vs; // Get rid of implicit const& from wait statement
|
||||
ACTOR Future<Standalone<RangeResultRef>> doReadRange( IKeyValueStore* store, KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
||||
Standalone<RangeResultRef> _vs = wait( store->readRange(keys, rowLimit, byteLimit) );
|
||||
Standalone<RangeResultRef> vs = _vs; // Get rid of implicit const& from wait statement
|
||||
Arena& a = vs.arena();
|
||||
for(int i=0; i<vs.size(); i++)
|
||||
vs[i].value = ValueRef( a, (ValueRef const&)unpack(vs[i].value) );
|
||||
|
|
|
@ -209,15 +209,18 @@ public:
|
|||
|
||||
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
|
||||
// The total size of the returned value (less the last entry) will be less than byteLimit
|
||||
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange(KeyRangeRef keys, int rowLimit = 1 << 30,
|
||||
int byteLimit = 1 << 30) {
|
||||
if (recovering.isError()) throw recovering.getError();
|
||||
virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) {
|
||||
if(recovering.isError()) throw recovering.getError();
|
||||
if (!recovering.isReady()) return waitAndReadRange(this, keys, rowLimit, byteLimit);
|
||||
|
||||
Standalone<VectorRef<KeyValueRef>> result;
|
||||
if (rowLimit >= 0) {
|
||||
Standalone<RangeResultRef> result;
|
||||
if (rowLimit == 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (rowLimit > 0) {
|
||||
auto it = data.lower_bound(keys.begin);
|
||||
while (it != data.end() && rowLimit && byteLimit >= 0) {
|
||||
while (it != data.end() && rowLimit && byteLimit > 0) {
|
||||
StringRef tempKey = it.getKey(reserved_buffer);
|
||||
if (tempKey >= keys.end) break;
|
||||
|
||||
|
@ -229,7 +232,7 @@ public:
|
|||
} else {
|
||||
rowLimit = -rowLimit;
|
||||
auto it = data.previous(data.lower_bound(keys.end));
|
||||
while (it != data.end() && rowLimit && byteLimit >= 0) {
|
||||
while (it != data.end() && rowLimit && byteLimit > 0) {
|
||||
StringRef tempKey = it.getKey(reserved_buffer);
|
||||
if (tempKey < keys.begin) break;
|
||||
|
||||
|
@ -239,6 +242,12 @@ public:
|
|||
--rowLimit;
|
||||
}
|
||||
}
|
||||
|
||||
result.more = rowLimit == 0 || byteLimit <= 0;
|
||||
if(result.more) {
|
||||
ASSERT(result.size() > 0);
|
||||
result.readThrough = result[result.size()-1].key;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -689,7 +698,7 @@ private:
|
|||
wait( self->recovering );
|
||||
return self->readValuePrefix(key, maxLength).get();
|
||||
}
|
||||
ACTOR static Future<Standalone<VectorRef<KeyValueRef>>> waitAndReadRange( KeyValueStoreMemory* self, KeyRange keys, int rowLimit, int byteLimit ) {
|
||||
ACTOR static Future<Standalone<RangeResultRef>> waitAndReadRange( KeyValueStoreMemory* self, KeyRange keys, int rowLimit, int byteLimit ) {
|
||||
wait( self->recovering );
|
||||
return self->readRange(keys, rowLimit, byteLimit).get();
|
||||
}
|
||||
|
|
|
@ -1076,21 +1076,26 @@ struct RawCursor {
|
|||
}
|
||||
return Optional<Value>();
|
||||
}
|
||||
Standalone<VectorRef<KeyValueRef>> getRange( KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
||||
Standalone<VectorRef<KeyValueRef>> result;
|
||||
Standalone<RangeResultRef> getRange( KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
||||
Standalone<RangeResultRef> result;
|
||||
int accumulatedBytes = 0;
|
||||
ASSERT( byteLimit > 0 );
|
||||
if(rowLimit == 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if(db.fragment_values) {
|
||||
if(rowLimit >= 0) {
|
||||
if(rowLimit > 0) {
|
||||
int r = moveTo(keys.begin);
|
||||
if (r < 0)
|
||||
moveNext();
|
||||
|
||||
DefragmentingReader i(*this, result.arena(), true);
|
||||
Optional<KeyRef> nextKey = i.peek();
|
||||
while(nextKey.present() && nextKey.get() < keys.end && rowLimit-- && accumulatedBytes < byteLimit) {
|
||||
while(nextKey.present() && nextKey.get() < keys.end && rowLimit != 0 && accumulatedBytes < byteLimit) {
|
||||
Optional<KeyValueRef> kv = i.getNext();
|
||||
result.push_back(result.arena(), kv.get());
|
||||
--rowLimit;
|
||||
accumulatedBytes += sizeof(KeyValueRef) + kv.get().expectedSize();
|
||||
nextKey = i.peek();
|
||||
}
|
||||
|
@ -1101,37 +1106,45 @@ struct RawCursor {
|
|||
movePrevious();
|
||||
DefragmentingReader i(*this, result.arena(), false);
|
||||
Optional<KeyRef> nextKey = i.peek();
|
||||
while(nextKey.present() && nextKey.get() >= keys.begin && rowLimit++ && accumulatedBytes < byteLimit) {
|
||||
while(nextKey.present() && nextKey.get() >= keys.begin && rowLimit != 0 && accumulatedBytes < byteLimit) {
|
||||
Optional<KeyValueRef> kv = i.getNext();
|
||||
result.push_back(result.arena(), kv.get());
|
||||
++rowLimit;
|
||||
accumulatedBytes += sizeof(KeyValueRef) + kv.get().expectedSize();
|
||||
nextKey = i.peek();
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (rowLimit >= 0) {
|
||||
if (rowLimit > 0) {
|
||||
int r = moveTo( keys.begin );
|
||||
if (r < 0) moveNext();
|
||||
while (this->valid && rowLimit-- && accumulatedBytes < byteLimit) {
|
||||
while (this->valid && rowLimit != 0 && accumulatedBytes < byteLimit) {
|
||||
KeyValueRef kv = decodeKV( getEncodedRow( result.arena() ) );
|
||||
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
|
||||
if (kv.key >= keys.end) break;
|
||||
--rowLimit;
|
||||
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
|
||||
result.push_back( result.arena(), kv );
|
||||
moveNext();
|
||||
}
|
||||
} else {
|
||||
int r = moveTo( keys.end );
|
||||
if (r >= 0) movePrevious();
|
||||
while (this->valid && rowLimit++ && accumulatedBytes < byteLimit) {
|
||||
while (this->valid && rowLimit != 0 && accumulatedBytes < byteLimit) {
|
||||
KeyValueRef kv = decodeKV( getEncodedRow( result.arena() ) );
|
||||
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
|
||||
if (kv.key < keys.begin) break;
|
||||
++rowLimit;
|
||||
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
|
||||
result.push_back( result.arena(), kv );
|
||||
movePrevious();
|
||||
}
|
||||
}
|
||||
}
|
||||
result.more = rowLimit == 0 || accumulatedBytes >= byteLimit;
|
||||
if(result.more) {
|
||||
ASSERT(result.size() > 0);
|
||||
result.readThrough = result[result.size()-1].key;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -1451,7 +1464,7 @@ public:
|
|||
|
||||
virtual Future<Optional<Value>> readValue( KeyRef key, Optional<UID> debugID );
|
||||
virtual Future<Optional<Value>> readValuePrefix( KeyRef key, int maxLength, Optional<UID> debugID );
|
||||
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 );
|
||||
virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 );
|
||||
|
||||
KeyValueStoreSQLite(std::string const& filename, UID logID, KeyValueStoreType type, bool checkChecksums, bool checkIntegrity);
|
||||
~KeyValueStoreSQLite();
|
||||
|
@ -1550,7 +1563,7 @@ private:
|
|||
struct ReadRangeAction : TypedAction<Reader, ReadRangeAction>, FastAllocated<ReadRangeAction> {
|
||||
KeyRange keys;
|
||||
int rowLimit, byteLimit;
|
||||
ThreadReturnPromise<Standalone<VectorRef<KeyValueRef>>> result;
|
||||
ThreadReturnPromise<Standalone<RangeResultRef>> result;
|
||||
ReadRangeAction(KeyRange keys, int rowLimit, int byteLimit) : keys(keys), rowLimit(rowLimit), byteLimit(byteLimit) {}
|
||||
virtual double getTimeEstimate() { return SERVER_KNOBS->READ_RANGE_TIME_ESTIMATE; }
|
||||
};
|
||||
|
@ -2000,7 +2013,7 @@ Future<Optional<Value>> KeyValueStoreSQLite::readValuePrefix( KeyRef key, int ma
|
|||
readThreads->post(p);
|
||||
return f;
|
||||
}
|
||||
Future<Standalone<VectorRef<KeyValueRef>>> KeyValueStoreSQLite::readRange( KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
||||
Future<Standalone<RangeResultRef>> KeyValueStoreSQLite::readRange( KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
||||
++readsRequested;
|
||||
auto p = new Reader::ReadRangeAction(keys, rowLimit, byteLimit);
|
||||
auto f = p->result.getFuture();
|
||||
|
|
|
@ -79,7 +79,6 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( DISK_QUEUE_FILE_EXTENSION_BYTES, 10<<20 ); // BUGGIFYd per file within the DiskQueue
|
||||
init( DISK_QUEUE_FILE_SHRINK_BYTES, 100<<20 ); // BUGGIFYd per file within the DiskQueue
|
||||
init( DISK_QUEUE_MAX_TRUNCATE_BYTES, 2<<30 ); if ( randomize && BUGGIFY ) DISK_QUEUE_MAX_TRUNCATE_BYTES = 0;
|
||||
init( TLOG_DEGRADED_DELAY_COUNT, 5 );
|
||||
init( TLOG_DEGRADED_DURATION, 5.0 );
|
||||
init( MAX_CACHE_VERSIONS, 10e6 );
|
||||
init( TLOG_IGNORE_POP_AUTO_ENABLE_DELAY, 300.0 );
|
||||
|
@ -91,8 +90,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
// Data distribution queue
|
||||
init( HEALTH_POLL_TIME, 1.0 );
|
||||
init( BEST_TEAM_STUCK_DELAY, 1.0 );
|
||||
init(BG_REBALANCE_POLLING_INTERVAL, 10.0);
|
||||
init(BG_REBALANCE_SWITCH_CHECK_INTERVAL, 5.0); if (randomize && BUGGIFY) BG_REBALANCE_SWITCH_CHECK_INTERVAL = 1.0;
|
||||
init( BG_REBALANCE_POLLING_INTERVAL, 10.0 );
|
||||
init( BG_REBALANCE_SWITCH_CHECK_INTERVAL, 5.0 ); if (randomize && BUGGIFY) BG_REBALANCE_SWITCH_CHECK_INTERVAL = 1.0;
|
||||
init( DD_QUEUE_LOGGING_INTERVAL, 5.0 );
|
||||
init( RELOCATION_PARALLELISM_PER_SOURCE_SERVER, 2 ); if( randomize && BUGGIFY ) RELOCATION_PARALLELISM_PER_SOURCE_SERVER = 1;
|
||||
init( DD_QUEUE_MAX_KEY_SERVERS, 100 ); if( randomize && BUGGIFY ) DD_QUEUE_MAX_KEY_SERVERS = 1;
|
||||
|
@ -104,7 +103,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( BG_DD_DECREASE_RATE, 1.02 );
|
||||
init( BG_DD_SATURATION_DELAY, 1.0 );
|
||||
init( INFLIGHT_PENALTY_HEALTHY, 1.0 );
|
||||
init( INFLIGHT_PENALTY_UNHEALTHY, 10.0 );
|
||||
init( INFLIGHT_PENALTY_UNHEALTHY, 500.0 );
|
||||
init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 );
|
||||
|
||||
init( PRIORITY_RECOVER_MOVE, 110 );
|
||||
|
@ -175,7 +174,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
If this value is too small relative to SHARD_MIN_BYTES_PER_KSEC immediate merging work will be generated.
|
||||
*/
|
||||
|
||||
init( STORAGE_METRIC_TIMEOUT, 600.0 ); if( randomize && BUGGIFY ) STORAGE_METRIC_TIMEOUT = deterministicRandom()->coinflip() ? 10.0 : 60.0;
|
||||
init( STORAGE_METRIC_TIMEOUT, isSimulated ? 60.0 : 600.0 ); if( randomize && BUGGIFY ) STORAGE_METRIC_TIMEOUT = deterministicRandom()->coinflip() ? 10.0 : 30.0;
|
||||
init( METRIC_DELAY, 0.1 ); if( randomize && BUGGIFY ) METRIC_DELAY = 1.0;
|
||||
init( ALL_DATA_REMOVED_DELAY, 1.0 );
|
||||
init( INITIAL_FAILURE_REACTION_DELAY, 30.0 ); if( randomize && BUGGIFY ) INITIAL_FAILURE_REACTION_DELAY = 0.0;
|
||||
|
@ -190,12 +189,11 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( DATA_DISTRIBUTION_LOGGING_INTERVAL, 5.0 );
|
||||
init( DD_ENABLED_CHECK_DELAY, 1.0 );
|
||||
init( DD_STALL_CHECK_DELAY, 0.4 ); //Must be larger than 2*MAX_BUGGIFIED_DELAY
|
||||
init( DD_LOW_BANDWIDTH_DELAY, isSimulated ? 90.0 : 240.0 ); if( randomize && BUGGIFY ) DD_LOW_BANDWIDTH_DELAY = 0; //Because of delayJitter, this should be less than 0.9 * DD_MERGE_COALESCE_DELAY
|
||||
init( DD_MERGE_COALESCE_DELAY, isSimulated ? 120.0 : 300.0 ); if( randomize && BUGGIFY ) DD_MERGE_COALESCE_DELAY = 0.001;
|
||||
init( DD_LOW_BANDWIDTH_DELAY, isSimulated ? 15.0 : 240.0 ); if( randomize && BUGGIFY ) DD_LOW_BANDWIDTH_DELAY = 0; //Because of delayJitter, this should be less than 0.9 * DD_MERGE_COALESCE_DELAY
|
||||
init( DD_MERGE_COALESCE_DELAY, isSimulated ? 30.0 : 300.0 ); if( randomize && BUGGIFY ) DD_MERGE_COALESCE_DELAY = 0.001;
|
||||
init( STORAGE_METRICS_POLLING_DELAY, 2.0 ); if( randomize && BUGGIFY ) STORAGE_METRICS_POLLING_DELAY = 15.0;
|
||||
init( STORAGE_METRICS_RANDOM_DELAY, 0.2 );
|
||||
init( FREE_SPACE_RATIO_CUTOFF, 0.1 );
|
||||
init( FREE_SPACE_RATIO_DD_CUTOFF, 0.2 );
|
||||
init( FREE_SPACE_RATIO_CUTOFF, 0.35 );
|
||||
init( DESIRED_TEAMS_PER_SERVER, 5 ); if( randomize && BUGGIFY ) DESIRED_TEAMS_PER_SERVER = 1;
|
||||
init( MAX_TEAMS_PER_SERVER, 5*DESIRED_TEAMS_PER_SERVER );
|
||||
init( DD_SHARD_SIZE_GRANULARITY, 5000000 );
|
||||
|
@ -215,10 +213,10 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( DD_CHECK_INVALID_LOCALITY_DELAY, 60 ); if( randomize && BUGGIFY ) DD_CHECK_INVALID_LOCALITY_DELAY = 1 + deterministicRandom()->random01() * 600;
|
||||
|
||||
// TeamRemover
|
||||
TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = false; if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
|
||||
init( TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER, false ); if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
|
||||
init( TR_REMOVE_MACHINE_TEAM_DELAY, 60.0 ); if( randomize && BUGGIFY ) TR_REMOVE_MACHINE_TEAM_DELAY = deterministicRandom()->random01() * 60.0;
|
||||
TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS = true; if( randomize && BUGGIFY ) TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS = deterministicRandom()->random01() < 0.1 ? true : false;
|
||||
TR_FLAG_DISABLE_SERVER_TEAM_REMOVER = false; if( randomize && BUGGIFY ) TR_FLAG_DISABLE_SERVER_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
|
||||
init( TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS, true ); if( randomize && BUGGIFY ) TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS = deterministicRandom()->random01() < 0.1 ? true : false;
|
||||
init( TR_FLAG_DISABLE_SERVER_TEAM_REMOVER, false ); if( randomize && BUGGIFY ) TR_FLAG_DISABLE_SERVER_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
|
||||
init( TR_REMOVE_SERVER_TEAM_DELAY, 60.0 ); if( randomize && BUGGIFY ) TR_REMOVE_SERVER_TEAM_DELAY = deterministicRandom()->random01() * 60.0;
|
||||
init( TR_REMOVE_SERVER_TEAM_EXTRA_DELAY, 5.0 ); if( randomize && BUGGIFY ) TR_REMOVE_SERVER_TEAM_EXTRA_DELAY = deterministicRandom()->random01() * 10.0;
|
||||
|
||||
|
@ -226,7 +224,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
|
||||
// Redwood Storage Engine
|
||||
init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT, 30 );
|
||||
init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_MIN, 0 );
|
||||
init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_MIN, 0 );
|
||||
|
||||
// KeyValueStore SQLITE
|
||||
init( CLEAR_BUFFER_SIZE, 20000 );
|
||||
|
@ -331,6 +329,9 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( REQUIRED_MIN_RECOVERY_DURATION, 0.080 ); if( shortRecoveryDuration ) REQUIRED_MIN_RECOVERY_DURATION = 0.01;
|
||||
init( ALWAYS_CAUSAL_READ_RISKY, false );
|
||||
init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1;
|
||||
init( MIN_PROXY_COMPUTE, 0.001 );
|
||||
init( PROXY_COMPUTE_BUCKETS, 5000 );
|
||||
init( PROXY_COMPUTE_GROWTH_RATE, 0.01 );
|
||||
|
||||
// Master Server
|
||||
// masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution)
|
||||
|
@ -387,11 +388,11 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( CLIENT_REGISTER_INTERVAL, 600.0 );
|
||||
|
||||
init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0;
|
||||
init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit );
|
||||
init( EXPECTED_TLOG_FITNESS, ProcessClass::UnsetFit );
|
||||
init( EXPECTED_LOG_ROUTER_FITNESS, ProcessClass::UnsetFit );
|
||||
init( EXPECTED_PROXY_FITNESS, ProcessClass::UnsetFit );
|
||||
init( EXPECTED_RESOLVER_FITNESS, ProcessClass::UnsetFit );
|
||||
init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit );
|
||||
init( EXPECTED_TLOG_FITNESS, ProcessClass::UnsetFit );
|
||||
init( EXPECTED_LOG_ROUTER_FITNESS, ProcessClass::UnsetFit );
|
||||
init( EXPECTED_PROXY_FITNESS, ProcessClass::UnsetFit );
|
||||
init( EXPECTED_RESOLVER_FITNESS, ProcessClass::UnsetFit );
|
||||
init( RECRUITMENT_TIMEOUT, 600 ); if( randomize && BUGGIFY ) RECRUITMENT_TIMEOUT = deterministicRandom()->coinflip() ? 60.0 : 1.0;
|
||||
|
||||
init( POLICY_RATING_TESTS, 200 ); if( randomize && BUGGIFY ) POLICY_RATING_TESTS = 20;
|
||||
|
@ -412,7 +413,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( MIN_REBOOT_TIME, 4.0 ); if( longReboots ) MIN_REBOOT_TIME = 10.0;
|
||||
init( MAX_REBOOT_TIME, 5.0 ); if( longReboots ) MAX_REBOOT_TIME = 20.0;
|
||||
init( LOG_DIRECTORY, "."); // Will be set to the command line flag.
|
||||
init(SERVER_MEM_LIMIT, 8LL << 30);
|
||||
init( SERVER_MEM_LIMIT, 8LL << 30 );
|
||||
|
||||
//Ratekeeper
|
||||
bool slowRatekeeper = randomize && BUGGIFY;
|
||||
|
@ -443,8 +444,10 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
|
||||
init( MAX_TRANSACTIONS_PER_BYTE, 1000 );
|
||||
|
||||
init( MIN_FREE_SPACE, 1e8 );
|
||||
init( MIN_FREE_SPACE_RATIO, 0.05 );
|
||||
init( MIN_AVAILABLE_SPACE, 1e8 );
|
||||
init( MIN_AVAILABLE_SPACE_RATIO, 0.05 );
|
||||
init( TARGET_AVAILABLE_SPACE_RATIO, 0.30 );
|
||||
init( AVAILABLE_SPACE_UPDATE_DELAY, 5.0 );
|
||||
|
||||
init( MAX_TL_SS_VERSION_DIFFERENCE, 1e99 ); // if( randomize && BUGGIFY ) MAX_TL_SS_VERSION_DIFFERENCE = std::max(1.0, 0.25 * VERSIONS_PER_SECOND); // spring starts at half this value //FIXME: this knob causes ratekeeper to clamp on idle cluster in simulation that have a large number of logs
|
||||
init( MAX_TL_SS_VERSION_DIFFERENCE_BATCH, 1e99 );
|
||||
|
@ -497,7 +500,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( BEHIND_CHECK_DELAY, 2.0 );
|
||||
init( BEHIND_CHECK_COUNT, 2 );
|
||||
init( BEHIND_CHECK_VERSIONS, 5 * VERSIONS_PER_SECOND );
|
||||
init( WAIT_METRICS_WRONG_SHARD_CHANCE, 0.1 );
|
||||
init( WAIT_METRICS_WRONG_SHARD_CHANCE, isSimulated ? 1.0 : 0.1 );
|
||||
|
||||
//Wait Failure
|
||||
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
||||
|
@ -519,7 +522,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
|
||||
// Buggification
|
||||
init( BUGGIFIED_EVENTUAL_CONSISTENCY, 1.0 );
|
||||
BUGGIFY_ALL_COORDINATION = false; if( randomize && BUGGIFY ) BUGGIFY_ALL_COORDINATION = true;
|
||||
init( BUGGIFY_ALL_COORDINATION, false ); if( randomize && BUGGIFY ) BUGGIFY_ALL_COORDINATION = true;
|
||||
|
||||
// Status
|
||||
init( STATUS_MIN_TIME_BETWEEN_REQUESTS, 0.0 );
|
||||
|
@ -537,7 +540,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
|
||||
// Timekeeper
|
||||
init( TIME_KEEPER_DELAY, 10 );
|
||||
init( TIME_KEEPER_MAX_ENTRIES, 3600 * 24 * 30 * 6); if( randomize && BUGGIFY ) { TIME_KEEPER_MAX_ENTRIES = 2; }
|
||||
init( TIME_KEEPER_MAX_ENTRIES, 3600 * 24 * 30 * 6 ); if( randomize && BUGGIFY ) { TIME_KEEPER_MAX_ENTRIES = 2; }
|
||||
|
||||
// Fast Restore
|
||||
init( FASTRESTORE_FAILURE_TIMEOUT, 3600 );
|
||||
|
|
|
@ -82,7 +82,6 @@ public:
|
|||
int64_t DISK_QUEUE_FILE_EXTENSION_BYTES; // When we grow the disk queue, by how many bytes should it grow?
|
||||
int64_t DISK_QUEUE_FILE_SHRINK_BYTES; // When we shrink the disk queue, by how many bytes should it shrink?
|
||||
int DISK_QUEUE_MAX_TRUNCATE_BYTES; // A truncate larger than this will cause the file to be replaced instead.
|
||||
int TLOG_DEGRADED_DELAY_COUNT;
|
||||
double TLOG_DEGRADED_DURATION;
|
||||
int64_t MAX_CACHE_VERSIONS;
|
||||
double TXS_POPPED_MAX_DELAY;
|
||||
|
@ -155,7 +154,7 @@ public:
|
|||
double STORAGE_METRICS_POLLING_DELAY;
|
||||
double STORAGE_METRICS_RANDOM_DELAY;
|
||||
double FREE_SPACE_RATIO_CUTOFF;
|
||||
double FREE_SPACE_RATIO_DD_CUTOFF;
|
||||
double FREE_SPACE_CUTOFF_PENALTY;
|
||||
int DESIRED_TEAMS_PER_SERVER;
|
||||
int MAX_TEAMS_PER_SERVER;
|
||||
int64_t DD_SHARD_SIZE_GRANULARITY;
|
||||
|
@ -272,6 +271,9 @@ public:
|
|||
double REQUIRED_MIN_RECOVERY_DURATION;
|
||||
bool ALWAYS_CAUSAL_READ_RISKY;
|
||||
int MAX_COMMIT_UPDATES;
|
||||
double MIN_PROXY_COMPUTE;
|
||||
int PROXY_COMPUTE_BUCKETS;
|
||||
double PROXY_COMPUTE_GROWTH_RATE;
|
||||
|
||||
// Master Server
|
||||
double COMMIT_SLEEP_TIME;
|
||||
|
@ -378,8 +380,10 @@ public:
|
|||
|
||||
double MAX_TRANSACTIONS_PER_BYTE;
|
||||
|
||||
int64_t MIN_FREE_SPACE;
|
||||
double MIN_FREE_SPACE_RATIO;
|
||||
int64_t MIN_AVAILABLE_SPACE;
|
||||
double MIN_AVAILABLE_SPACE_RATIO;
|
||||
double TARGET_AVAILABLE_SPACE_RATIO;
|
||||
double AVAILABLE_SPACE_UPDATE_DELAY;
|
||||
|
||||
double MAX_TL_SS_VERSION_DIFFERENCE; // spring starts at half this value
|
||||
double MAX_TL_SS_VERSION_DIFFERENCE_BATCH;
|
||||
|
|
|
@ -1055,7 +1055,12 @@ ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, TaskPriori
|
|||
loop {
|
||||
wait( allLoaders || delay(SERVER_KNOBS->DESIRED_GET_MORE_DELAY, taskID) );
|
||||
minVersion = self->end;
|
||||
for(auto cursor : self->cursors) {
|
||||
for(int i = 0; i < self->cursors.size(); i++) {
|
||||
auto cursor = self->cursors[i];
|
||||
while(cursor->hasMessage()) {
|
||||
self->cursorMessages[i].push_back(ILogSystem::BufferedCursor::BufferedMessage(cursor->arena(), (!self->withTags || self->collectTags) ? cursor->getMessage() : cursor->getMessageWithTags(), !self->withTags ? VectorRef<Tag>() : cursor->getTags(), cursor->version()));
|
||||
cursor->nextMessage();
|
||||
}
|
||||
minVersion = std::min(minVersion, cursor->version().version);
|
||||
}
|
||||
if(minVersion > self->messageVersion.version) {
|
||||
|
|
|
@ -279,6 +279,8 @@ struct ProxyCommitData {
|
|||
int updateCommitRequests = 0;
|
||||
NotifiedDouble lastCommitTime;
|
||||
|
||||
vector<double> commitComputePerOperation;
|
||||
|
||||
//The tag related to a storage server rarely change, so we keep a vector of tags for each key range to be slightly more CPU efficient.
|
||||
//When a tag related to a storage server does change, we empty out all of these vectors to signify they must be repopulated.
|
||||
//We do not repopulate them immediately to avoid a slow task.
|
||||
|
@ -345,7 +347,9 @@ struct ProxyCommitData {
|
|||
localCommitBatchesStarted(0), locked(false), commitBatchInterval(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_MIN),
|
||||
firstProxy(firstProxy), cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, true, true)), db(db),
|
||||
singleKeyMutationEvent(LiteralStringRef("SingleKeyMutation")), commitBatchesMemBytesCount(0), lastTxsPop(0), lastStartCommit(0), lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION), lastCommitTime(0)
|
||||
{}
|
||||
{
|
||||
commitComputePerOperation.resize(SERVER_KNOBS->PROXY_COMPUTE_BUCKETS,0.0);
|
||||
}
|
||||
};
|
||||
|
||||
struct ResolutionRequestBuilder {
|
||||
|
@ -528,7 +532,7 @@ bool isWhitelisted(const vector<Standalone<StringRef>>& binPathVec, StringRef bi
|
|||
}
|
||||
|
||||
ACTOR Future<Void> addBackupMutations(ProxyCommitData* self, std::map<Key, MutationListRef>* logRangeMutations,
|
||||
LogPushData* toCommit, Version commitVersion) {
|
||||
LogPushData* toCommit, Version commitVersion, double* computeDuration, double* computeStart) {
|
||||
state std::map<Key, MutationListRef>::iterator logRangeMutation = logRangeMutations->begin();
|
||||
state int32_t version = commitVersion / CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE;
|
||||
state int yieldBytes = 0;
|
||||
|
@ -545,7 +549,11 @@ ACTOR Future<Void> addBackupMutations(ProxyCommitData* self, std::map<Key, Mutat
|
|||
while(blobIter) {
|
||||
if(yieldBytes > SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
|
||||
yieldBytes = 0;
|
||||
wait(yield(TaskPriority::ProxyCommitYield2));
|
||||
if(g_network->check_yield(TaskPriority::ProxyCommitYield1)) {
|
||||
*computeDuration += g_network->timer() - *computeStart;
|
||||
wait(delay(0, TaskPriority::ProxyCommitYield1));
|
||||
*computeStart = g_network->timer();
|
||||
}
|
||||
}
|
||||
valueWriter.serializeBytes(blobIter->data);
|
||||
yieldBytes += blobIter->data.size();
|
||||
|
@ -603,6 +611,13 @@ ACTOR Future<Void> addBackupMutations(ProxyCommitData* self, std::map<Key, Mutat
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> releaseResolvingAfter(ProxyCommitData* self, Future<Void> releaseDelay, int64_t localBatchNumber) {
|
||||
wait(releaseDelay);
|
||||
ASSERT(self->latestLocalCommitBatchResolving.get() == localBatchNumber-1);
|
||||
self->latestLocalCommitBatchResolving.set(localBatchNumber);
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> commitBatch(
|
||||
ProxyCommitData* self,
|
||||
vector<CommitTransactionRequest> trs,
|
||||
|
@ -613,6 +628,13 @@ ACTOR Future<Void> commitBatch(
|
|||
state double t1 = now();
|
||||
state Optional<UID> debugID;
|
||||
state bool forceRecovery = false;
|
||||
state int batchOperations = 0;
|
||||
int64_t batchBytes = 0;
|
||||
for (int t = 0; t<trs.size(); t++) {
|
||||
batchOperations += trs[t].transaction.mutations.size();
|
||||
batchBytes += trs[t].transaction.mutations.expectedSize();
|
||||
}
|
||||
state int latencyBucket = batchOperations == 0 ? 0 : std::min<int>(SERVER_KNOBS->PROXY_COMPUTE_BUCKETS-1,SERVER_KNOBS->PROXY_COMPUTE_BUCKETS*batchBytes/(batchOperations*(CLIENT_KNOBS->VALUE_SIZE_LIMIT+CLIENT_KNOBS->KEY_SIZE_LIMIT)));
|
||||
|
||||
ASSERT(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS <= SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT); // since we are using just the former to limit the number of versions actually in flight!
|
||||
|
||||
|
@ -644,7 +666,7 @@ ACTOR Future<Void> commitBatch(
|
|||
// Queuing pre-resolution commit processing
|
||||
TEST(self->latestLocalCommitBatchResolving.get() < localBatchNumber - 1);
|
||||
wait(self->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber-1));
|
||||
wait(yield(TaskPriority::ProxyCommitYield1));
|
||||
state Future<Void> releaseDelay = delay(batchOperations*self->commitComputePerOperation[latencyBucket], TaskPriority::ProxyMasterVersionReply);
|
||||
|
||||
if (debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.GettingCommitVersion");
|
||||
|
@ -695,9 +717,7 @@ ACTOR Future<Void> commitBatch(
|
|||
}
|
||||
|
||||
state vector<vector<int>> transactionResolverMap = std::move( requests.transactionResolverMap );
|
||||
|
||||
ASSERT(self->latestLocalCommitBatchResolving.get() == localBatchNumber-1);
|
||||
self->latestLocalCommitBatchResolving.set(localBatchNumber);
|
||||
state Future<Void> releaseFuture = releaseResolvingAfter(self, releaseDelay, localBatchNumber);
|
||||
|
||||
/////// Phase 2: Resolution (waiting on the network; pipelined)
|
||||
state vector<ResolveTransactionBatchReply> resolution = wait( getAll(replies) );
|
||||
|
@ -708,8 +728,10 @@ ACTOR Future<Void> commitBatch(
|
|||
////// Phase 3: Post-resolution processing (CPU bound except for very rare situations; ordered; currently atomic but doesn't need to be)
|
||||
TEST(self->latestLocalCommitBatchLogging.get() < localBatchNumber - 1); // Queuing post-resolution commit processing
|
||||
wait(self->latestLocalCommitBatchLogging.whenAtLeast(localBatchNumber-1));
|
||||
wait(yield(TaskPriority::ProxyCommitYield2));
|
||||
wait(yield(TaskPriority::ProxyCommitYield1));
|
||||
|
||||
state double computeStart = g_network->timer();
|
||||
state double computeDuration = 0;
|
||||
self->stats.txnCommitResolved += trs.size();
|
||||
|
||||
if (debugID.present())
|
||||
|
@ -866,7 +888,11 @@ ACTOR Future<Void> commitBatch(
|
|||
for (; mutationNum < pMutations->size(); mutationNum++) {
|
||||
if(yieldBytes > SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
|
||||
yieldBytes = 0;
|
||||
wait(yield(TaskPriority::ProxyCommitYield2));
|
||||
if(g_network->check_yield(TaskPriority::ProxyCommitYield1)) {
|
||||
computeDuration += g_network->timer() - computeStart;
|
||||
wait(delay(0, TaskPriority::ProxyCommitYield1));
|
||||
computeStart = g_network->timer();
|
||||
}
|
||||
}
|
||||
|
||||
auto& m = (*pMutations)[mutationNum];
|
||||
|
@ -968,7 +994,7 @@ ACTOR Future<Void> commitBatch(
|
|||
|
||||
// Serialize and backup the mutations as a single mutation
|
||||
if ((self->vecBackupKeys.size() > 1) && logRangeMutations.size()) {
|
||||
wait( addBackupMutations(self, &logRangeMutations, &toCommit, commitVersion) );
|
||||
wait( addBackupMutations(self, &logRangeMutations, &toCommit, commitVersion, &computeDuration, &computeStart) );
|
||||
}
|
||||
|
||||
self->stats.mutations += mutationCount;
|
||||
|
@ -976,29 +1002,33 @@ ACTOR Future<Void> commitBatch(
|
|||
|
||||
// Storage servers mustn't make durable versions which are not fully committed (because then they are impossible to roll back)
|
||||
// We prevent this by limiting the number of versions which are semi-committed but not fully committed to be less than the MVCC window
|
||||
while (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) {
|
||||
// This should be *extremely* rare in the real world, but knob buggification should make it happen in simulation
|
||||
TEST(true); // Semi-committed pipeline limited by MVCC window
|
||||
//TraceEvent("ProxyWaitingForCommitted", self->dbgid).detail("CommittedVersion", self->committedVersion.get()).detail("NeedToCommit", commitVersion);
|
||||
choose{
|
||||
when(wait(self->committedVersion.whenAtLeast(commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS))) {
|
||||
wait(yield());
|
||||
break;
|
||||
}
|
||||
when(GetReadVersionReply v = wait(self->getConsistentReadVersion.getReply(GetReadVersionRequest(0, GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE | GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY)))) {
|
||||
if(v.version > self->committedVersion.get()) {
|
||||
self->locked = v.locked;
|
||||
self->metadataVersion = v.metadataVersion;
|
||||
self->committedVersion.set(v.version);
|
||||
if(self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) {
|
||||
computeDuration += g_network->timer() - computeStart;
|
||||
while (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) {
|
||||
// This should be *extremely* rare in the real world, but knob buggification should make it happen in simulation
|
||||
TEST(true); // Semi-committed pipeline limited by MVCC window
|
||||
//TraceEvent("ProxyWaitingForCommitted", self->dbgid).detail("CommittedVersion", self->committedVersion.get()).detail("NeedToCommit", commitVersion);
|
||||
choose{
|
||||
when(wait(self->committedVersion.whenAtLeast(commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS))) {
|
||||
wait(yield());
|
||||
break;
|
||||
}
|
||||
when(GetReadVersionReply v = wait(self->getConsistentReadVersion.getReply(GetReadVersionRequest(0, GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE | GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY)))) {
|
||||
if(v.version > self->committedVersion.get()) {
|
||||
self->locked = v.locked;
|
||||
self->metadataVersion = v.metadataVersion;
|
||||
self->committedVersion.set(v.version);
|
||||
}
|
||||
|
||||
if (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS)
|
||||
wait(delay(SERVER_KNOBS->PROXY_SPIN_DELAY));
|
||||
}
|
||||
|
||||
if (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS)
|
||||
wait(delay(SERVER_KNOBS->PROXY_SPIN_DELAY));
|
||||
}
|
||||
}
|
||||
computeStart = g_network->timer();
|
||||
}
|
||||
|
||||
state LogSystemDiskQueueAdapter::CommitMessage msg = wait(storeCommits.back().first); // Should just be doing yields
|
||||
state LogSystemDiskQueueAdapter::CommitMessage msg = storeCommits.back().first.get();
|
||||
|
||||
if (debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.AfterStoreCommits");
|
||||
|
@ -1031,6 +1061,16 @@ ACTOR Future<Void> commitBatch(
|
|||
self->latestLocalCommitBatchLogging.set(localBatchNumber);
|
||||
}
|
||||
|
||||
computeDuration += g_network->timer() - computeStart;
|
||||
if(computeDuration > SERVER_KNOBS->MIN_PROXY_COMPUTE && batchOperations > 0) {
|
||||
double computePerOperation = computeDuration/batchOperations;
|
||||
if(computePerOperation <= self->commitComputePerOperation[latencyBucket] || self->commitComputePerOperation[latencyBucket] == 0.0) {
|
||||
self->commitComputePerOperation[latencyBucket] = computePerOperation;
|
||||
} else {
|
||||
self->commitComputePerOperation[latencyBucket] = SERVER_KNOBS->PROXY_COMPUTE_GROWTH_RATE*computePerOperation + ((1.0-SERVER_KNOBS->PROXY_COMPUTE_GROWTH_RATE)*self->commitComputePerOperation[latencyBucket]);
|
||||
}
|
||||
}
|
||||
|
||||
/////// Phase 4: Logging (network bound; pipelined up to MAX_READ_TRANSACTION_LIFE_VERSIONS (limited by loop above))
|
||||
|
||||
try {
|
||||
|
@ -1048,7 +1088,7 @@ ACTOR Future<Void> commitBatch(
|
|||
}
|
||||
self->lastCommitLatency = now()-commitStartTime;
|
||||
self->lastCommitTime = std::max(self->lastCommitTime.get(), commitStartTime);
|
||||
wait(yield(TaskPriority::ProxyCommitYield3));
|
||||
wait(yield(TaskPriority::ProxyCommitYield2));
|
||||
|
||||
if( self->popRemoteTxs && msg.popTo > ( self->txsPopVersions.size() ? self->txsPopVersions.back().second : self->lastTxsPop ) ) {
|
||||
if(self->txsPopVersions.size() >= SERVER_KNOBS->MAX_TXS_POP_VERSION_HISTORY) {
|
||||
|
@ -1087,7 +1127,7 @@ ACTOR Future<Void> commitBatch(
|
|||
}
|
||||
|
||||
// Send replies to clients
|
||||
double endTime = timer();
|
||||
double endTime = g_network->timer();
|
||||
for (int t = 0; t < trs.size(); t++) {
|
||||
if (committed[t] == ConflictBatch::TransactionCommitted && (!locked || trs[t].isLockAware())) {
|
||||
ASSERT_WE_THINK(commitVersion != invalidVersion);
|
||||
|
@ -1138,6 +1178,7 @@ ACTOR Future<Void> commitBatch(
|
|||
|
||||
self->commitBatchesMemBytesCount -= currentBatchMemBytesCount;
|
||||
ASSERT_ABORT(self->commitBatchesMemBytesCount >= 0);
|
||||
wait(releaseFuture);
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -1201,8 +1242,7 @@ ACTOR Future<GetReadVersionReply> getLiveCommittedVersion(ProxyCommitData* commi
|
|||
ACTOR Future<Void> sendGrvReplies(Future<GetReadVersionReply> replyFuture, std::vector<GetReadVersionRequest> requests,
|
||||
ProxyStats* stats, Version minKnownCommittedVersion) {
|
||||
GetReadVersionReply reply = wait(replyFuture);
|
||||
|
||||
double end = timer();
|
||||
double end = g_network->timer();
|
||||
for(GetReadVersionRequest const& request : requests) {
|
||||
if(request.priority() >= GetReadVersionRequest::PRIORITY_DEFAULT) {
|
||||
stats->grvLatencyBands.addMeasurement(end - request.requestTime());
|
||||
|
@ -1410,7 +1450,7 @@ ACTOR static Future<Void> rejoinServer( MasterProxyInterface proxy, ProxyCommitD
|
|||
GetStorageServerRejoinInfoReply rep;
|
||||
rep.version = commitData->version;
|
||||
rep.tag = decodeServerTagValue( commitData->txnStateStore->readValue(serverTagKeyFor(req.id)).get().get() );
|
||||
Standalone<VectorRef<KeyValueRef>> history = commitData->txnStateStore->readRange(serverTagHistoryRangeFor(req.id)).get();
|
||||
Standalone<RangeResultRef> history = commitData->txnStateStore->readRange(serverTagHistoryRangeFor(req.id)).get();
|
||||
for(int i = history.size()-1; i >= 0; i-- ) {
|
||||
rep.history.push_back(std::make_pair(decodeServerTagHistoryKey(history[i].key), decodeServerTagValue(history[i].value)));
|
||||
}
|
||||
|
@ -1794,7 +1834,7 @@ ACTOR Future<Void> masterProxyServerCore(
|
|||
state KeyRange txnKeys = allKeys;
|
||||
loop {
|
||||
wait(yield());
|
||||
Standalone<VectorRef<KeyValueRef>> data = commitData.txnStateStore->readRange(txnKeys, SERVER_KNOBS->BUGGIFIED_ROW_LIMIT, SERVER_KNOBS->APPLY_MUTATION_BYTES).get();
|
||||
Standalone<RangeResultRef> data = commitData.txnStateStore->readRange(txnKeys, SERVER_KNOBS->BUGGIFIED_ROW_LIMIT, SERVER_KNOBS->APPLY_MUTATION_BYTES).get();
|
||||
if(!data.size()) break;
|
||||
((KeyRangeRef&)txnKeys) = KeyRangeRef( keyAfter(data.back().key, txnKeys.arena()), txnKeys.end );
|
||||
|
||||
|
|
|
@ -270,6 +270,7 @@ namespace oldTLog_4_6 {
|
|||
std::map<UID, Reference<struct LogData>> id_data;
|
||||
|
||||
UID dbgid;
|
||||
UID workerID;
|
||||
|
||||
IKeyValueStore* persistentData;
|
||||
IDiskQueue* rawPersistentQueue;
|
||||
|
@ -303,8 +304,8 @@ namespace oldTLog_4_6 {
|
|||
PromiseStream<Future<Void>> sharedActors;
|
||||
bool terminated;
|
||||
|
||||
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> const& dbInfo)
|
||||
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> const& dbInfo)
|
||||
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
||||
dbInfo(dbInfo), queueCommitBegin(0), queueCommitEnd(0), prevVersion(0),
|
||||
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false),
|
||||
|
@ -412,7 +413,7 @@ namespace oldTLog_4_6 {
|
|||
// These are initialized differently on init() or recovery
|
||||
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), recovery(Void())
|
||||
{
|
||||
startRole(Role::TRANSACTION_LOG,interf.id(), UID());
|
||||
startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, "Restored");
|
||||
|
||||
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
|
||||
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
|
||||
|
@ -954,7 +955,7 @@ namespace oldTLog_4_6 {
|
|||
|
||||
peekMessagesFromMemory( logData, req, messages2, endVersion );
|
||||
|
||||
Standalone<VectorRef<KeyValueRef>> kvs = wait(
|
||||
Standalone<RangeResultRef> kvs = wait(
|
||||
self->persistentData->readRange(KeyRangeRef(
|
||||
persistTagMessagesKey(logData->logId, oldTag, req.begin),
|
||||
persistTagMessagesKey(logData->logId, oldTag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||
|
@ -1101,7 +1102,7 @@ namespace oldTLog_4_6 {
|
|||
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
|
||||
TLogRejoinRequest req;
|
||||
req.myInterface = tli;
|
||||
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id());
|
||||
TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id());
|
||||
choose {
|
||||
when(TLogRejoinReply rep =
|
||||
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
|
||||
|
@ -1249,8 +1250,8 @@ namespace oldTLog_4_6 {
|
|||
|
||||
IKeyValueStore *storage = self->persistentData;
|
||||
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
||||
state Future<Standalone<RangeResultRef>> fVers = storage->readRange(persistCurrentVersionKeys);
|
||||
state Future<Standalone<RangeResultRef>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
||||
|
||||
// FIXME: metadata in queue?
|
||||
|
||||
|
@ -1263,7 +1264,7 @@ namespace oldTLog_4_6 {
|
|||
}
|
||||
|
||||
if (!fFormat.get().present()) {
|
||||
Standalone<VectorRef<KeyValueRef>> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
||||
Standalone<RangeResultRef> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
||||
if (!v.size()) {
|
||||
TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
|
||||
throw worker_removed();
|
||||
|
@ -1316,7 +1317,7 @@ namespace oldTLog_4_6 {
|
|||
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
|
||||
loop {
|
||||
if(logData->removed.isReady()) break;
|
||||
Standalone<VectorRef<KeyValueRef>> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
||||
Standalone<RangeResultRef> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
||||
if (!data.size()) break;
|
||||
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
|
||||
|
||||
|
@ -1402,9 +1403,9 @@ namespace oldTLog_4_6 {
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId )
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId, UID workerID )
|
||||
{
|
||||
state TLogData self( tlogId, persistentData, persistentQueue, db );
|
||||
state TLogData self( tlogId, workerID, persistentData, persistentQueue, db );
|
||||
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
||||
|
||||
TraceEvent("SharedTlog", tlogId);
|
||||
|
|
|
@ -245,6 +245,7 @@ struct TLogData : NonCopyable {
|
|||
std::map<UID, Reference<struct LogData>> id_data;
|
||||
|
||||
UID dbgid;
|
||||
UID workerID;
|
||||
|
||||
IKeyValueStore* persistentData;
|
||||
IDiskQueue* rawPersistentQueue;
|
||||
|
@ -286,8 +287,8 @@ struct TLogData : NonCopyable {
|
|||
Reference<AsyncVar<bool>> degraded;
|
||||
std::vector<TagsAndMessage> tempTagMessages;
|
||||
|
||||
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
||||
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
|
||||
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
|
||||
|
@ -439,14 +440,15 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
bool execOpCommitInProgress;
|
||||
int txsTags;
|
||||
|
||||
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
||||
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID),
|
||||
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
|
||||
// These are initialized differently on init() or recovery
|
||||
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
|
||||
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
|
||||
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, std::vector<Tag> tags, std::string context)
|
||||
: tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
||||
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID),
|
||||
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
|
||||
// These are initialized differently on init() or recovery
|
||||
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
|
||||
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
|
||||
{
|
||||
startRole(Role::TRANSACTION_LOG, interf.id(), UID());
|
||||
startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, context);
|
||||
|
||||
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
|
||||
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
|
||||
|
@ -1172,7 +1174,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
|
|||
peekMessagesFromMemory( logData, req, messages2, endVersion );
|
||||
}
|
||||
|
||||
Standalone<VectorRef<KeyValueRef>> kvs = wait(
|
||||
Standalone<RangeResultRef> kvs = wait(
|
||||
self->persistentData->readRange(KeyRangeRef(
|
||||
persistTagMessagesKey(logData->logId, req.tag, req.begin),
|
||||
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||
|
@ -1236,12 +1238,8 @@ ACTOR Future<Void> watchDegraded(TLogData* self) {
|
|||
return Void();
|
||||
}
|
||||
|
||||
//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask
|
||||
state int loopCount = 0;
|
||||
while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {
|
||||
wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskPriority::Low));
|
||||
loopCount++;
|
||||
}
|
||||
wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
|
||||
|
||||
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
|
||||
TEST(true); //6.0 TLog degraded
|
||||
self->degraded->set(true);
|
||||
|
@ -1482,7 +1480,7 @@ ACTOR Future<Void> rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC
|
|||
if ( self->dbInfo->get().master.id() != lastMasterID) {
|
||||
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
|
||||
TLogRejoinRequest req(tli);
|
||||
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id());
|
||||
TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id());
|
||||
choose {
|
||||
when(TLogRejoinReply rep =
|
||||
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
|
||||
|
@ -1930,12 +1928,12 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
state IKeyValueStore *storage = self->persistentData;
|
||||
wait(storage->init());
|
||||
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fTxsTags = storage->readRange(persistTxsTagsKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
||||
state Future<Standalone<RangeResultRef>> fVers = storage->readRange(persistCurrentVersionKeys);
|
||||
state Future<Standalone<RangeResultRef>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
|
||||
state Future<Standalone<RangeResultRef>> fLocality = storage->readRange(persistLocalityKeys);
|
||||
state Future<Standalone<RangeResultRef>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
|
||||
state Future<Standalone<RangeResultRef>> fTxsTags = storage->readRange(persistTxsTagsKeys);
|
||||
state Future<Standalone<RangeResultRef>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
||||
|
||||
// FIXME: metadata in queue?
|
||||
|
||||
|
@ -1954,7 +1952,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
}
|
||||
|
||||
if (!fFormat.get().present()) {
|
||||
Standalone<VectorRef<KeyValueRef>> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
||||
Standalone<RangeResultRef> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
||||
if (!v.size()) {
|
||||
TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
|
||||
throw worker_removed();
|
||||
|
@ -1976,7 +1974,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
tlogRequests.getFuture().pop().reply.sendError(recruitment_failed());
|
||||
}
|
||||
|
||||
wait( oldTLog_4_6::tLog(self->persistentData, self->rawPersistentQueue, self->dbInfo, locality, self->dbgid) );
|
||||
wait( oldTLog_4_6::tLog(self->persistentData, self->rawPersistentQueue, self->dbInfo, locality, self->dbgid, self->workerID) );
|
||||
throw internal_error();
|
||||
}
|
||||
|
||||
|
@ -2022,7 +2020,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
DUMPTOKEN( recruited.confirmRunning );
|
||||
|
||||
//We do not need the remoteTag, because we will not be loading any additional data
|
||||
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), std::vector<Tag>()) );
|
||||
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), std::vector<Tag>(), "Restored") );
|
||||
logData->locality = id_locality[id1];
|
||||
logData->stopped = true;
|
||||
self->id_data[id1] = logData;
|
||||
|
@ -2044,7 +2042,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
|
||||
loop {
|
||||
if(logData->removed.isReady()) break;
|
||||
Standalone<VectorRef<KeyValueRef>> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
||||
Standalone<RangeResultRef> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
||||
if (!data.size()) break;
|
||||
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
|
||||
|
||||
|
@ -2205,7 +2203,8 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
|||
it.second->stopCommit.trigger();
|
||||
}
|
||||
|
||||
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, req.allTags) );
|
||||
bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
|
||||
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, req.allTags, recovering ? "Recovered" : "Recruited") );
|
||||
self->id_data[recruited.id()] = logData;
|
||||
logData->locality = req.locality;
|
||||
logData->recoveryCount = req.epoch;
|
||||
|
@ -2220,7 +2219,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
|||
throw logData->removed.getError();
|
||||
}
|
||||
|
||||
if (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned) {
|
||||
if (recovering) {
|
||||
logData->unrecoveredBefore = req.startVersion;
|
||||
logData->recoveredAt = req.recoverAt;
|
||||
logData->knownCommittedVersion = req.startVersion - 1;
|
||||
|
@ -2326,13 +2325,11 @@ ACTOR Future<Void> startSpillingInTenSeconds(TLogData* self, UID tlogId, Referen
|
|||
}
|
||||
|
||||
// New tLog (if !recoverFrom.size()) or restore from network
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog) {
|
||||
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder );
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog) {
|
||||
state TLogData self( tlogId, workerID, persistentData, persistentQueue, db, degraded, folder );
|
||||
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
||||
|
||||
TraceEvent("SharedTlog", tlogId);
|
||||
// FIXME: Pass the worker id instead of stubbing it
|
||||
startRole(Role::SHARED_TRANSACTION_LOG, tlogId, UID());
|
||||
try {
|
||||
if(restoreFromDisk) {
|
||||
wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) );
|
||||
|
@ -2373,7 +2370,6 @@ ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ
|
|||
} catch (Error& e) {
|
||||
self.terminated.send(Void());
|
||||
TraceEvent("TLogError", tlogId).error(e, true);
|
||||
endRole(Role::SHARED_TRANSACTION_LOG, tlogId, "Error", true);
|
||||
if(recovered.canBeSet()) recovered.send(Void());
|
||||
|
||||
while(!tlogRequests.isEmpty()) {
|
||||
|
|
|
@ -306,6 +306,7 @@ struct TLogData : NonCopyable {
|
|||
std::map<UID, Reference<struct LogData>> id_data;
|
||||
|
||||
UID dbgid;
|
||||
UID workerID;
|
||||
|
||||
IKeyValueStore* persistentData; // Durable data on disk that were spilled.
|
||||
IDiskQueue* rawPersistentQueue; // The physical queue the persistentQueue below stores its data. Ideally, log interface should work without directly accessing rawPersistentQueue
|
||||
|
@ -347,8 +348,8 @@ struct TLogData : NonCopyable {
|
|||
// that came when ignorePopRequest was set
|
||||
Reference<AsyncVar<bool>> degraded;
|
||||
|
||||
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
||||
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
|
||||
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
|
||||
|
@ -511,7 +512,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
bool execOpCommitInProgress;
|
||||
int txsTags;
|
||||
|
||||
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
||||
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, std::vector<Tag> tags, std::string context) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
||||
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion),
|
||||
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
|
||||
minPoppedTagVersion(0), minPoppedTag(invalidTag),
|
||||
|
@ -519,7 +520,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
|
||||
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
|
||||
{
|
||||
startRole(Role::TRANSACTION_LOG, interf.id(), UID());
|
||||
startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, context);
|
||||
|
||||
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
|
||||
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
|
||||
|
@ -697,7 +698,7 @@ ACTOR Future<Void> updatePoppedLocation( TLogData* self, Reference<LogData> logD
|
|||
// us to remove data that still is pointed to by SpilledData in the btree.
|
||||
if (data->persistentPopped <= logData->persistentDataVersion) {
|
||||
// Recover the next needed location in the Disk Queue from the index.
|
||||
Standalone<VectorRef<KeyValueRef>> kvrefs = wait(
|
||||
Standalone<RangeResultRef> kvrefs = wait(
|
||||
self->persistentData->readRange(KeyRangeRef(
|
||||
persistTagMessageRefsKey(logData->logId, data->tag, data->persistentPopped),
|
||||
persistTagMessageRefsKey(logData->logId, data->tag, logData->persistentDataVersion + 1)), 1));
|
||||
|
@ -1479,7 +1480,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
|
|||
}
|
||||
|
||||
if (req.tag.locality == tagLocalityTxs || req.tag == txsTag) {
|
||||
Standalone<VectorRef<KeyValueRef>> kvs = wait(
|
||||
Standalone<RangeResultRef> kvs = wait(
|
||||
self->persistentData->readRange(KeyRangeRef(
|
||||
persistTagMessagesKey(logData->logId, req.tag, req.begin),
|
||||
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||
|
@ -1498,7 +1499,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
|
|||
}
|
||||
} else {
|
||||
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
|
||||
Standalone<VectorRef<KeyValueRef>> kvrefs = wait(
|
||||
Standalone<RangeResultRef> kvrefs = wait(
|
||||
self->persistentData->readRange(KeyRangeRef(
|
||||
persistTagMessageRefsKey(logData->logId, req.tag, req.begin),
|
||||
persistTagMessageRefsKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)),
|
||||
|
@ -1630,12 +1631,8 @@ ACTOR Future<Void> watchDegraded(TLogData* self) {
|
|||
return Void();
|
||||
}
|
||||
|
||||
//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask
|
||||
state int loopCount = 0;
|
||||
while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {
|
||||
wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskPriority::Low));
|
||||
loopCount++;
|
||||
}
|
||||
wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
|
||||
|
||||
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
|
||||
TEST(true); //TLog degraded
|
||||
self->degraded->set(true);
|
||||
|
@ -1876,7 +1873,7 @@ ACTOR Future<Void> rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC
|
|||
if ( self->dbInfo->get().master.id() != lastMasterID) {
|
||||
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
|
||||
TLogRejoinRequest req(tli);
|
||||
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id());
|
||||
TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id());
|
||||
choose {
|
||||
when(TLogRejoinReply rep =
|
||||
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
|
||||
|
@ -2340,13 +2337,13 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
wait(storage->init());
|
||||
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
|
||||
state Future<Optional<Value>> fRecoveryLocation = storage->readValue(persistRecoveryLocationKey);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fTxsTags = storage->readRange(persistTxsTagsKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fProtocolVersions = storage->readRange(persistProtocolVersionKeys);
|
||||
state Future<Standalone<RangeResultRef>> fVers = storage->readRange(persistCurrentVersionKeys);
|
||||
state Future<Standalone<RangeResultRef>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
|
||||
state Future<Standalone<RangeResultRef>> fLocality = storage->readRange(persistLocalityKeys);
|
||||
state Future<Standalone<RangeResultRef>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
|
||||
state Future<Standalone<RangeResultRef>> fTxsTags = storage->readRange(persistTxsTagsKeys);
|
||||
state Future<Standalone<RangeResultRef>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
||||
state Future<Standalone<RangeResultRef>> fProtocolVersions = storage->readRange(persistProtocolVersionKeys);
|
||||
|
||||
// FIXME: metadata in queue?
|
||||
|
||||
|
@ -2365,7 +2362,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
}
|
||||
|
||||
if (!fFormat.get().present()) {
|
||||
Standalone<VectorRef<KeyValueRef>> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
||||
Standalone<RangeResultRef> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
||||
if (!v.size()) {
|
||||
TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
|
||||
throw worker_removed();
|
||||
|
@ -2431,7 +2428,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
ProtocolVersion protocolVersion = BinaryReader::fromStringRef<ProtocolVersion>( fProtocolVersions.get()[idx].value, Unversioned() );
|
||||
|
||||
//We do not need the remoteTag, because we will not be loading any additional data
|
||||
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, std::vector<Tag>()) );
|
||||
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, std::vector<Tag>(), "Restored") );
|
||||
logData->locality = id_locality[id1];
|
||||
logData->stopped = true;
|
||||
self->id_data[id1] = logData;
|
||||
|
@ -2453,7 +2450,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
|
||||
loop {
|
||||
if(logData->removed.isReady()) break;
|
||||
Standalone<VectorRef<KeyValueRef>> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
||||
Standalone<RangeResultRef> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
||||
if (!data.size()) break;
|
||||
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
|
||||
|
||||
|
@ -2635,7 +2632,9 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
|||
it.second->stopCommit.trigger();
|
||||
}
|
||||
|
||||
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.allTags) );
|
||||
bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
|
||||
|
||||
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.allTags, recovering ? "Recovered" : "Recruited") );
|
||||
self->id_data[recruited.id()] = logData;
|
||||
logData->locality = req.locality;
|
||||
logData->recoveryCount = req.epoch;
|
||||
|
@ -2652,7 +2651,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
|||
throw logData->removed.getError();
|
||||
}
|
||||
|
||||
if (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned) {
|
||||
if (recovering) {
|
||||
logData->unrecoveredBefore = req.startVersion;
|
||||
logData->recoveredAt = req.recoverAt;
|
||||
logData->knownCommittedVersion = req.startVersion - 1;
|
||||
|
@ -2760,13 +2759,11 @@ ACTOR Future<Void> startSpillingInTenSeconds(TLogData* self, UID tlogId, Referen
|
|||
}
|
||||
|
||||
// New tLog (if !recoverFrom.size()) or restore from network
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog ) {
|
||||
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder );
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog ) {
|
||||
state TLogData self( tlogId, workerID, persistentData, persistentQueue, db, degraded, folder );
|
||||
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
||||
|
||||
TraceEvent("SharedTlog", tlogId);
|
||||
// FIXME: Pass the worker id instead of stubbing it
|
||||
startRole(Role::SHARED_TRANSACTION_LOG, tlogId, UID());
|
||||
try {
|
||||
if(restoreFromDisk) {
|
||||
wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) );
|
||||
|
@ -2805,7 +2802,6 @@ ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ
|
|||
} catch (Error& e) {
|
||||
self.terminated.send(Void());
|
||||
TraceEvent("TLogError", tlogId).error(e, true);
|
||||
endRole(Role::SHARED_TRANSACTION_LOG, tlogId, "Error", true);
|
||||
if(recovered.canBeSet()) recovered.send(Void());
|
||||
|
||||
while(!tlogRequests.isEmpty()) {
|
||||
|
|
|
@ -390,14 +390,14 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
|||
|
||||
limitReason_t ssLimitReason = limitReason_t::unlimited;
|
||||
|
||||
int64_t minFreeSpace = std::max(SERVER_KNOBS->MIN_FREE_SPACE, (int64_t)(SERVER_KNOBS->MIN_FREE_SPACE_RATIO * ss.smoothTotalSpace.smoothTotal()));
|
||||
int64_t minFreeSpace = std::max(SERVER_KNOBS->MIN_AVAILABLE_SPACE, (int64_t)(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO * ss.smoothTotalSpace.smoothTotal()));
|
||||
|
||||
worstFreeSpaceStorageServer = std::min(worstFreeSpaceStorageServer, (int64_t)ss.smoothFreeSpace.smoothTotal() - minFreeSpace);
|
||||
|
||||
int64_t springBytes = std::max<int64_t>(1, std::min<int64_t>(limits->storageSpringBytes, (ss.smoothFreeSpace.smoothTotal() - minFreeSpace) * 0.2));
|
||||
int64_t targetBytes = std::max<int64_t>(1, std::min(limits->storageTargetBytes, (int64_t)ss.smoothFreeSpace.smoothTotal() - minFreeSpace));
|
||||
if (targetBytes != limits->storageTargetBytes) {
|
||||
if (minFreeSpace == SERVER_KNOBS->MIN_FREE_SPACE) {
|
||||
if (minFreeSpace == SERVER_KNOBS->MIN_AVAILABLE_SPACE) {
|
||||
ssLimitReason = limitReason_t::storage_server_min_free_space;
|
||||
} else {
|
||||
ssLimitReason = limitReason_t::storage_server_min_free_space_ratio;
|
||||
|
@ -574,14 +574,14 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
|
|||
|
||||
limitReason_t tlogLimitReason = limitReason_t::log_server_write_queue;
|
||||
|
||||
int64_t minFreeSpace = std::max( SERVER_KNOBS->MIN_FREE_SPACE, (int64_t)(SERVER_KNOBS->MIN_FREE_SPACE_RATIO * tl.smoothTotalSpace.smoothTotal()));
|
||||
int64_t minFreeSpace = std::max( SERVER_KNOBS->MIN_AVAILABLE_SPACE, (int64_t)(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO * tl.smoothTotalSpace.smoothTotal()));
|
||||
|
||||
worstFreeSpaceTLog = std::min(worstFreeSpaceTLog, (int64_t)tl.smoothFreeSpace.smoothTotal() - minFreeSpace);
|
||||
|
||||
int64_t springBytes = std::max<int64_t>(1, std::min<int64_t>(limits->logSpringBytes, (tl.smoothFreeSpace.smoothTotal() - minFreeSpace) * 0.2));
|
||||
int64_t targetBytes = std::max<int64_t>(1, std::min(limits->logTargetBytes, (int64_t)tl.smoothFreeSpace.smoothTotal() - minFreeSpace));
|
||||
if (targetBytes != limits->logTargetBytes) {
|
||||
if (minFreeSpace == SERVER_KNOBS->MIN_FREE_SPACE) {
|
||||
if (minFreeSpace == SERVER_KNOBS->MIN_AVAILABLE_SPACE) {
|
||||
tlogLimitReason = limitReason_t::log_server_min_free_space;
|
||||
} else {
|
||||
tlogLimitReason = limitReason_t::log_server_min_free_space_ratio;
|
||||
|
|
|
@ -29,7 +29,6 @@
|
|||
#include "fdbserver/CoordinationInterface.h"
|
||||
#include "fdbmonitor/SimpleIni.h"
|
||||
#include "fdbrpc/AsyncFileNonDurable.actor.h"
|
||||
#include "fdbrpc/TLSConnection.h"
|
||||
#include "fdbclient/ManagementAPI.actor.h"
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbclient/BackupAgent.actor.h"
|
||||
|
@ -48,60 +47,6 @@ const int MACHINE_REBOOT_TIME = 10;
|
|||
|
||||
bool destructed = false;
|
||||
|
||||
static const char* certBytes =
|
||||
"-----BEGIN CERTIFICATE-----\n"
|
||||
"MIIEGzCCAwOgAwIBAgIJANUQj1rRA2XMMA0GCSqGSIb3DQEBBQUAMIGjMQswCQYD\n"
|
||||
"VQQGEwJVUzELMAkGA1UECAwCVkExDzANBgNVBAcMBlZpZW5uYTEaMBgGA1UECgwR\n"
|
||||
"Rm91bmRhdGlvbkRCLCBMTEMxGTAXBgNVBAsMEFRlc3QgZW5naW5lZXJpbmcxFTAT\n"
|
||||
"BgNVBAMMDE1yLiBCaWcgVHVuYTEoMCYGCSqGSIb3DQEJARYZYmlnLnR1bmFAZm91\n"
|
||||
"bmRhdGlvbmRiLmNvbTAeFw0xNDEyMDUxNTEyMjFaFw0yNDEyMDIxNTEyMjFaMIGj\n"
|
||||
"MQswCQYDVQQGEwJVUzELMAkGA1UECAwCVkExDzANBgNVBAcMBlZpZW5uYTEaMBgG\n"
|
||||
"A1UECgwRRm91bmRhdGlvbkRCLCBMTEMxGTAXBgNVBAsMEFRlc3QgZW5naW5lZXJp\n"
|
||||
"bmcxFTATBgNVBAMMDE1yLiBCaWcgVHVuYTEoMCYGCSqGSIb3DQEJARYZYmlnLnR1\n"
|
||||
"bmFAZm91bmRhdGlvbmRiLmNvbTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC\n"
|
||||
"ggEBAKZTL2edDkiet4HBTZnjysn6gOVZH2MP02KVBIv/H7e+3w7ZOIRvcPzhZe9M\n"
|
||||
"3cGH1t/pkr9DSXvzIb42EffMVlpLD2VQn2H8VC2QSdJCIQcf802u+Taf+XtW6K1h\n"
|
||||
"p/YPL1uhdopUs3c1oon8ykKwnOfrQYgv5pUa7jQdMkltI2MQJU3uFq3Z/LHTvIKe\n"
|
||||
"FN+bqK0iYhZthwMG7Rld4+RgKZoT4u1B6w/duEWk9KLjgs7fTf3Oe6JHCYNqwBJi\n"
|
||||
"78sJalwXz9Wf8wmMaYSG0XNA7vBOdpTFhVPSsh6e3rkydf5HydMade/II98MWpMe\n"
|
||||
"hFg7FFMaJP6ig8p5iL+9QP2VMCkCAwEAAaNQME4wHQYDVR0OBBYEFIXGmIcKptBP\n"
|
||||
"v3i9WS/mK78o5E/MMB8GA1UdIwQYMBaAFIXGmIcKptBPv3i9WS/mK78o5E/MMAwG\n"
|
||||
"A1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADggEBAJkVgNGOXT+ZHCNEYLjr/6OM\n"
|
||||
"UCHvwlMeaEyqxaOmK26J2kAADPhjBZ7lZOHWb2Wzb+BiQUIFGwNIMoRvsg8skpJa\n"
|
||||
"OCqpVciHVXY/U8BiYY70DKozRza93Ab9om3pySGDJ/akdCjqbMT1Cb7Kloyw+hNh\n"
|
||||
"XD4MML0lYiUE9KK35xyK6FgTx4A7IXl4b3lWBgglqTh4+P5J1+xy8AYJ0VfPoP7y\n"
|
||||
"OoZgwAmkpkMnalReNkN7LALHGqMzv/qH04ODlkU/HUGgExtnINMxK9VEDIe/yLGm\n"
|
||||
"DHy7gcQMj5Hyymack/d4ZF8CSrYpGZQeZGXoxOmTDwWcXgnYA+2o7lOYPb5Uu08=\n"
|
||||
"-----END CERTIFICATE-----\n"
|
||||
"-----BEGIN PRIVATE KEY-----\n"
|
||||
"MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQCmUy9nnQ5InreB\n"
|
||||
"wU2Z48rJ+oDlWR9jD9NilQSL/x+3vt8O2TiEb3D84WXvTN3Bh9bf6ZK/Q0l78yG+\n"
|
||||
"NhH3zFZaSw9lUJ9h/FQtkEnSQiEHH/NNrvk2n/l7VuitYaf2Dy9boXaKVLN3NaKJ\n"
|
||||
"/MpCsJzn60GIL+aVGu40HTJJbSNjECVN7hat2fyx07yCnhTfm6itImIWbYcDBu0Z\n"
|
||||
"XePkYCmaE+LtQesP3bhFpPSi44LO3039znuiRwmDasASYu/LCWpcF8/Vn/MJjGmE\n"
|
||||
"htFzQO7wTnaUxYVT0rIent65MnX+R8nTGnXvyCPfDFqTHoRYOxRTGiT+ooPKeYi/\n"
|
||||
"vUD9lTApAgMBAAECggEBAIYCmDtfq9aPK0P8v82yX/4FPD2OZV+nrKXNc3BpCuE9\n"
|
||||
"hPOtyX/LWrol0b/Rqwr3rAWVaIt6Z4bbCuD7J9cEaL8voyP6pbCJYjmj/BbQ+VOI\n"
|
||||
"Rrzcsid1Fcpu5+JqwK3c5kdp/NzQChmOuXt8lmrNal7iilZ0YdDZdfu/WnkW2mBB\n"
|
||||
"oQHkujlnWr4PNYdwMOnBU6TwdOuz+inPVMLohOO0Vr585OxPsGzG2Ud3yQ/t34Cq\n"
|
||||
"F9nmOXQoszftGKsL1yuh/3fGj/O86g/CRsUy05qZhDDBEYQD6qZCvD5+yp8oOWIR\n"
|
||||
"SljM3GXDBnJqRPhP+Nyf6e6/GoQtfVZ9MPRzDDPzIBECgYEA2kX/zAs6taOiNqCb\n"
|
||||
"6nVGe7/3uQJz/CkmOSKIFKUu7lCEUjmMYpK3Xzp26RTUR9cT+g9y+cnJO1Vbaxtf\n"
|
||||
"Qidje6K+Oi1pQyUGQ6W+U8cPJHz43PVa7IB5Az5i/sS2tu0BGhvGo9G6iYQjxXeD\n"
|
||||
"1197DRACgnm5AORQMum616XvSPMCgYEAwxKbkAzJzfZF6A3Ys+/0kycNfDP8xZoC\n"
|
||||
"1zV3d1b2JncsdAPCHYSKtpniRrQN9ASa3RMdkh+wrMN/KlbtU9Ddoc4NHxSTFV7F\n"
|
||||
"wypFMzLZslqkQ6uHnVVewHV7prfoKsMci2c9iHO7W8TEv4aqW8XDd8OozP3/q2j4\n"
|
||||
"hvL7VIAVqXMCgYEAwAFnfOQ75uBkp00tGlfDgsRhc5vWz3CbMRNRRWfxGq41V+dL\n"
|
||||
"uMJ7EAfr5ijue6uU5RmF+HkqzUjOvC894oGnn3CPibm8qNX+5q7799JZXa2ZdTVX\n"
|
||||
"oEd7LAFLL/V3DP77Qy4/1Id/Ycydcu0pSuGw6tK0gnX06fXtHnxAYcaT8UUCgYAE\n"
|
||||
"MytcP5o8r/ezVlD7Fsh6PpYAvZHMo1M6VPFchWfJTjmLyeTtA8SEx+1iPlAql8rJ\n"
|
||||
"xbaWRc5k+dSMEdEMQ+vxpuELcUL1a9PwLsHMp2SefWsZ9eB2l7bxh9YAsebyvL6p\n"
|
||||
"lbBydqNrB2KBCSIz1Z8uveytdS6C/0CSjzqwCA3vVwKBgQDAXqjo3xrzMlHeXm5o\n"
|
||||
"qH/OjajjqbnPXHolHDitbLubyQ4E6KhMBMxfChBe/8VptB/Gs0efVbMVGuabxY7Q\n"
|
||||
"iastGId8HyONy3UPGPxCn4b95cIxKvdpt+hvWtYHIBCfHXluQK7zsDMgvtXjYNiz\n"
|
||||
"peZRikYlwmu1K2YRTf7oLE2Ogw==\n"
|
||||
"-----END PRIVATE KEY-----\n";
|
||||
|
||||
template <class T>
|
||||
T simulate( const T& in ) {
|
||||
BinaryWriter writer(AssumeVersion(currentProtocolVersion));
|
||||
|
@ -112,13 +57,6 @@ T simulate( const T& in ) {
|
|||
return out;
|
||||
}
|
||||
|
||||
static void simInitTLS(Reference<TLSOptions> tlsOptions) {
|
||||
tlsOptions->set_cert_data( certBytes );
|
||||
tlsOptions->set_key_data( certBytes );
|
||||
tlsOptions->set_verify_peers(std::vector<std::string>(1, "Check.Valid=0"));
|
||||
tlsOptions->register_network();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> runBackup( Reference<ClusterConnectionFile> connFile ) {
|
||||
state std::vector<Future<Void>> agentFutures;
|
||||
|
||||
|
@ -195,7 +133,7 @@ enum AgentMode {
|
|||
// a loop{} will be needed around the waiting on simulatedFDBD(). For now this simply
|
||||
// takes care of house-keeping such as context switching and file closing.
|
||||
ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnectionFile> connFile, IPAddress ip,
|
||||
bool sslEnabled, Reference<TLSOptions> tlsOptions,
|
||||
bool sslEnabled,
|
||||
uint16_t port, uint16_t listenPerProcess,
|
||||
LocalityData localities, ProcessClass processClass,
|
||||
std::string* dataFolder, std::string* coordFolder,
|
||||
|
@ -217,7 +155,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
|
|||
wait( delay( waitTime ) );
|
||||
|
||||
state ISimulator::ProcessInfo* process =
|
||||
g_simulator.newProcess("Server", ip, port, listenPerProcess, localities, processClass, dataFolder->c_str(),
|
||||
g_simulator.newProcess("Server", ip, port, sslEnabled, listenPerProcess, localities, processClass, dataFolder->c_str(),
|
||||
coordFolder->c_str());
|
||||
wait(g_simulator.onProcess(process,
|
||||
TaskPriority::DefaultYield)); // Now switch execution to the process on which we will run
|
||||
|
@ -246,9 +184,6 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
|
|||
//SOMEDAY: test lower memory limits, without making them too small and causing the database to stop making progress
|
||||
FlowTransport::createInstance(processClass == ProcessClass::TesterClass || runBackupAgents == AgentOnly, 1);
|
||||
Sim2FileSystem::newFileSystem();
|
||||
if (sslEnabled) {
|
||||
tlsOptions->register_network();
|
||||
}
|
||||
|
||||
vector<Future<Void>> futures;
|
||||
for (int listenPort = port; listenPort < port + listenPerProcess; ++listenPort) {
|
||||
|
@ -362,8 +297,7 @@ std::string describe(int const& val) {
|
|||
// Since a datacenter kill is considered to be the same as killing a machine, files cannot be swapped across datacenters
|
||||
std::map< Optional<Standalone<StringRef>>, std::vector< std::vector< std::string > > > availableFolders;
|
||||
// process count is no longer needed because it is now the length of the vector of ip's, because it was one ip per process
|
||||
ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector<IPAddress> ips, bool sslEnabled,
|
||||
Reference<TLSOptions> tlsOptions, LocalityData localities,
|
||||
ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector<IPAddress> ips, bool sslEnabled, LocalityData localities,
|
||||
ProcessClass processClass, std::string baseFolder, bool restarting,
|
||||
bool useSeedFile, AgentMode runBackupAgents, bool sslOnly, std::string whitelistBinPaths) {
|
||||
state int bootCount = 0;
|
||||
|
@ -408,7 +342,7 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector
|
|||
Reference<ClusterConnectionFile> clusterFile(useSeedFile ? new ClusterConnectionFile(path, connStr.toString()) : new ClusterConnectionFile(path));
|
||||
const int listenPort = i*listenPerProcess + 1;
|
||||
AgentMode agentMode = runBackupAgents == AgentOnly ? ( i == ips.size()-1 ? AgentOnly : AgentNone ) : runBackupAgents;
|
||||
processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, tlsOptions, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, agentMode, whitelistBinPaths));
|
||||
processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, agentMode, whitelistBinPaths));
|
||||
TraceEvent("SimulatedMachineProcess", randomId).detail("Address", NetworkAddress(ips[i], listenPort, true, false)).detail("ZoneId", localities.zoneId()).detail("DataHall", localities.dataHallId()).detail("Folder", myFolders[i]);
|
||||
}
|
||||
|
||||
|
@ -613,7 +547,7 @@ IPAddress makeIPAddressForSim(bool isIPv6, std::array<int, 4> parts) {
|
|||
ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount,
|
||||
Optional<ClusterConnectionString>* pConnString,
|
||||
Standalone<StringRef>* pStartingConfiguration,
|
||||
Reference<TLSOptions> tlsOptions, int extraDB, std::string whitelistBinPaths) {
|
||||
int extraDB, std::string whitelistBinPaths) {
|
||||
CSimpleIni ini;
|
||||
ini.SetUnicode();
|
||||
ini.LoadFile(joinPath(baseFolder, "restartInfo.ini").c_str());
|
||||
|
@ -709,7 +643,7 @@ ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors, st
|
|||
|
||||
// SOMEDAY: parse backup agent from test file
|
||||
systemActors->push_back(reportErrors(
|
||||
simulatedMachine(conn, ipAddrs, usingSSL, tlsOptions, localities, processClass, baseFolder, true,
|
||||
simulatedMachine(conn, ipAddrs, usingSSL, localities, processClass, baseFolder, true,
|
||||
i == useSeedForMachine, enableExtraDB ? AgentAddition : AgentNone,
|
||||
usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass), whitelistBinPaths),
|
||||
processClass == ProcessClass::TesterClass ? "SimulatedTesterMachine" : "SimulatedMachine"));
|
||||
|
@ -1108,12 +1042,14 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
|
|||
|
||||
void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount,
|
||||
Optional<ClusterConnectionString>* pConnString, Standalone<StringRef>* pStartingConfiguration,
|
||||
int extraDB, int minimumReplication, int minimumRegions, Reference<TLSOptions> tlsOptions,
|
||||
std::string whitelistBinPaths) {
|
||||
int extraDB, int minimumReplication, int minimumRegions, std::string whitelistBinPaths, bool configureLocked) {
|
||||
// SOMEDAY: this does not test multi-interface configurations
|
||||
SimulationConfig simconfig(extraDB, minimumReplication, minimumRegions);
|
||||
StatusObject startingConfigJSON = simconfig.db.toJSON(true);
|
||||
std::string startingConfigString = "new";
|
||||
if (configureLocked) {
|
||||
startingConfigString += " locked";
|
||||
}
|
||||
for( auto kv : startingConfigJSON) {
|
||||
startingConfigString += " ";
|
||||
if( kv.second.type() == json_spirit::int_type ) {
|
||||
|
@ -1180,7 +1116,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
|
|||
bool assignClasses = machineCount - dataCenters > 4 && deterministicRandom()->random01() < 0.5;
|
||||
|
||||
// Use SSL 5% of the time
|
||||
bool sslEnabled = deterministicRandom()->random01() < 0.10 && tlsOptions->enabled();
|
||||
bool sslEnabled = deterministicRandom()->random01() < 0.10;
|
||||
bool sslOnly = sslEnabled && deterministicRandom()->coinflip();
|
||||
g_simulator.listenersPerProcess = sslEnabled && !sslOnly ? 2 : 1;
|
||||
TEST( sslEnabled ); // SSL enabled
|
||||
|
@ -1239,9 +1175,9 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
|
|||
.detail("Address", coordinatorAddresses[i])
|
||||
.detail("Coordinators", describe(coordinatorAddresses));
|
||||
g_simulator.protectedAddresses.insert(
|
||||
NetworkAddress(coordinatorAddresses[i].ip, coordinatorAddresses[i].port, true, false));
|
||||
NetworkAddress(coordinatorAddresses[i].ip, coordinatorAddresses[i].port, true, coordinatorAddresses[i].isTLS()));
|
||||
if(coordinatorAddresses[i].port==2) {
|
||||
g_simulator.protectedAddresses.insert(NetworkAddress(coordinatorAddresses[i].ip, 1, true, false));
|
||||
g_simulator.protectedAddresses.insert(NetworkAddress(coordinatorAddresses[i].ip, 1, true, true));
|
||||
}
|
||||
}
|
||||
deterministicRandom()->randomShuffle(coordinatorAddresses);
|
||||
|
@ -1324,7 +1260,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
|
|||
// check the sslEnablementMap using only one ip(
|
||||
LocalityData localities(Optional<Standalone<StringRef>>(), zoneId, machineId, dcUID);
|
||||
localities.set(LiteralStringRef("data_hall"), dcUID);
|
||||
systemActors->push_back(reportErrors(simulatedMachine(conn, ips, sslEnabled, tlsOptions,
|
||||
systemActors->push_back(reportErrors(simulatedMachine(conn, ips, sslEnabled,
|
||||
localities, processClass, baseFolder, false, machine == useSeedForMachine, requiresExtraDBMachines ? AgentOnly : AgentAddition, sslOnly, whitelistBinPaths ), "SimulatedMachine"));
|
||||
|
||||
if (requiresExtraDBMachines) {
|
||||
|
@ -1337,7 +1273,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
|
|||
|
||||
LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newMachineId, dcUID);
|
||||
localities.set(LiteralStringRef("data_hall"), dcUID);
|
||||
systemActors->push_back(reportErrors(simulatedMachine(*g_simulator.extraDB, extraIps, sslEnabled, tlsOptions,
|
||||
systemActors->push_back(reportErrors(simulatedMachine(*g_simulator.extraDB, extraIps, sslEnabled,
|
||||
localities,
|
||||
processClass, baseFolder, false, machine == useSeedForMachine, AgentNone, sslOnly, whitelistBinPaths ), "SimulatedMachine"));
|
||||
}
|
||||
|
@ -1365,9 +1301,9 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
|
|||
Standalone<StringRef> newZoneId = Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString());
|
||||
LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newZoneId, Optional<Standalone<StringRef>>());
|
||||
systemActors->push_back( reportErrors( simulatedMachine(
|
||||
conn, ips, sslEnabled, tlsOptions,
|
||||
conn, ips, sslEnabled && sslOnly,
|
||||
localities, ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource),
|
||||
baseFolder, false, i == useSeedForMachine, AgentNone, sslEnabled, whitelistBinPaths ),
|
||||
baseFolder, false, i == useSeedForMachine, AgentNone, sslEnabled && sslOnly, whitelistBinPaths ),
|
||||
"SimulatedTesterMachine") );
|
||||
}
|
||||
*pStartingConfiguration = startingConfigString;
|
||||
|
@ -1386,7 +1322,8 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
|
|||
.detail("StartingConfiguration", pStartingConfiguration->toString());
|
||||
}
|
||||
|
||||
void checkExtraDB(const char *testFile, int &extraDB, int &minimumReplication, int &minimumRegions) {
|
||||
void checkTestConf(const char* testFile, int& extraDB, int& minimumReplication, int& minimumRegions,
|
||||
int& configureLocked) {
|
||||
std::ifstream ifs;
|
||||
ifs.open(testFile, std::ifstream::in);
|
||||
if (!ifs.good())
|
||||
|
@ -1418,12 +1355,16 @@ void checkExtraDB(const char *testFile, int &extraDB, int &minimumReplication, i
|
|||
if (attrib == "minimumRegions") {
|
||||
sscanf( value.c_str(), "%d", &minimumRegions );
|
||||
}
|
||||
|
||||
if (attrib == "configureLocked") {
|
||||
sscanf(value.c_str(), "%d", &configureLocked);
|
||||
}
|
||||
}
|
||||
|
||||
ifs.close();
|
||||
}
|
||||
|
||||
ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, bool restoring, std::string whitelistBinPaths, Reference<TLSOptions> tlsOptions) {
|
||||
ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, bool restoring, std::string whitelistBinPaths) {
|
||||
state vector<Future<Void>> systemActors;
|
||||
state Optional<ClusterConnectionString> connFile;
|
||||
state Standalone<StringRef> startingConfiguration;
|
||||
|
@ -1431,11 +1372,12 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
|
|||
state int extraDB = 0;
|
||||
state int minimumReplication = 0;
|
||||
state int minimumRegions = 0;
|
||||
checkExtraDB(testFile, extraDB, minimumReplication, minimumRegions);
|
||||
state int configureLocked = 0;
|
||||
checkTestConf(testFile, extraDB, minimumReplication, minimumRegions, configureLocked);
|
||||
|
||||
// TODO (IPv6) Use IPv6?
|
||||
wait(g_simulator.onProcess(
|
||||
g_simulator.newProcess("TestSystem", IPAddress(0x01010101), 1, 1,
|
||||
g_simulator.newProcess("TestSystem", IPAddress(0x01010101), 1, false, 1,
|
||||
LocalityData(Optional<Standalone<StringRef>>(),
|
||||
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
|
||||
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
|
||||
|
@ -1444,16 +1386,12 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
|
|||
TaskPriority::DefaultYield));
|
||||
Sim2FileSystem::newFileSystem();
|
||||
FlowTransport::createInstance(true, 1);
|
||||
if (tlsOptions->enabled()) {
|
||||
simInitTLS(tlsOptions);
|
||||
}
|
||||
|
||||
TEST(true); // Simulation start
|
||||
|
||||
try {
|
||||
//systemActors.push_back( startSystemMonitor(dataFolder) );
|
||||
if (rebooting) {
|
||||
wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, tlsOptions, extraDB, whitelistBinPaths), 100.0 ) );
|
||||
wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB, whitelistBinPaths), 100.0 ) );
|
||||
// FIXME: snapshot restore does not support multi-region restore, hence restore it as single region always
|
||||
if (restoring) {
|
||||
startingConfiguration = LiteralStringRef("usable_regions=1");
|
||||
|
@ -1462,7 +1400,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
|
|||
else {
|
||||
g_expect_full_pointermap = 1;
|
||||
setupSimulatedSystem(&systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB,
|
||||
minimumReplication, minimumRegions, tlsOptions, whitelistBinPaths);
|
||||
minimumReplication, minimumRegions, whitelistBinPaths, configureLocked);
|
||||
wait( delay(1.0) ); // FIXME: WHY!!! //wait for machines to boot
|
||||
}
|
||||
std::string clusterFileDir = joinPath( dataFolder, deterministicRandom()->randomUniqueID().toString() );
|
||||
|
|
|
@ -18,12 +18,10 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbrpc/TLSConnection.h"
|
||||
|
||||
#ifndef FDBSERVER_SIMULATEDCLUSTER_H
|
||||
#define FDBSERVER_SIMULATEDCLUSTER_H
|
||||
#pragma once
|
||||
|
||||
void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, bool const& restoring, std::string const& whitelistBinPath, Reference<TLSOptions> const& useSSL);
|
||||
void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, bool const& restoring, std::string const& whitelistBinPath);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -287,14 +287,10 @@ private:
|
|||
int nPointers, valueLength;
|
||||
};
|
||||
|
||||
static force_inline bool less(const uint8_t* a, int aLen, const uint8_t* b, int bLen) {
|
||||
int len = min(aLen, bLen);
|
||||
for (int i = 0; i < len; i++)
|
||||
if (a[i] < b[i])
|
||||
return true;
|
||||
else if (a[i] > b[i])
|
||||
return false;
|
||||
|
||||
static force_inline bool less( const uint8_t* a, int aLen, const uint8_t* b, int bLen ) {
|
||||
int c = memcmp(a,b,min(aLen,bLen));
|
||||
if (c<0) return true;
|
||||
if (c>0) return false;
|
||||
return aLen < bLen;
|
||||
}
|
||||
|
||||
|
|
|
@ -1547,17 +1547,9 @@ ACTOR static Future<vector<std::pair<TLogInterface, EventMap>>> getTLogsAndMetri
|
|||
return results;
|
||||
}
|
||||
|
||||
ACTOR static Future<vector<std::pair<MasterProxyInterface, EventMap>>> getProxiesAndMetrics(Database cx, std::unordered_map<NetworkAddress, WorkerInterface> address_workers) {
|
||||
Reference<ProxyInfo> proxyInfo = cx->getMasterProxies(false);
|
||||
std::vector<MasterProxyInterface> servers;
|
||||
if(proxyInfo) {
|
||||
for(int i = 0; i < proxyInfo->size(); ++i) {
|
||||
servers.push_back(proxyInfo->getInterface(i));
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<vector<std::pair<MasterProxyInterface, EventMap>>> getProxiesAndMetrics(Reference<AsyncVar<CachedSerialization<ServerDBInfo>>> db, std::unordered_map<NetworkAddress, WorkerInterface> address_workers) {
|
||||
vector<std::pair<MasterProxyInterface, EventMap>> results = wait(getServerMetrics(
|
||||
servers, address_workers, std::vector<std::string>{ "GRVLatencyMetrics", "CommitLatencyMetrics" }));
|
||||
db->get().read().client.proxies, address_workers, std::vector<std::string>{ "GRVLatencyMetrics", "CommitLatencyMetrics" }));
|
||||
|
||||
return results;
|
||||
}
|
||||
|
@ -2313,7 +2305,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
|
|||
|
||||
state Future<ErrorOr<vector<std::pair<StorageServerInterface, EventMap>>>> storageServerFuture = errorOr(getStorageServersAndMetrics(cx, address_workers));
|
||||
state Future<ErrorOr<vector<std::pair<TLogInterface, EventMap>>>> tLogFuture = errorOr(getTLogsAndMetrics(db, address_workers));
|
||||
state Future<ErrorOr<vector<std::pair<MasterProxyInterface, EventMap>>>> proxyFuture = errorOr(getProxiesAndMetrics(cx, address_workers));
|
||||
state Future<ErrorOr<vector<std::pair<MasterProxyInterface, EventMap>>>> proxyFuture = errorOr(getProxiesAndMetrics(db, address_workers));
|
||||
|
||||
state int minReplicasRemaining = -1;
|
||||
std::vector<Future<JsonBuilderObject>> futures2;
|
||||
|
|
|
@ -393,10 +393,10 @@ struct StorageServerMetrics {
|
|||
.detail("Load", rep.load.bytes);
|
||||
}
|
||||
|
||||
rep.free.bytes = sb.free;
|
||||
rep.free.iosPerKSecond = 10e6;
|
||||
rep.free.bytesPerKSecond = 100e9;
|
||||
rep.free.bytesReadPerKSecond = 100e9;
|
||||
rep.available.bytes = sb.available;
|
||||
rep.available.iosPerKSecond = 10e6;
|
||||
rep.available.bytesPerKSecond = 100e9;
|
||||
rep.available.bytesReadPerKSecond = 100e9;
|
||||
|
||||
rep.capacity.bytes = sb.total;
|
||||
rep.capacity.iosPerKSecond = 10e6;
|
||||
|
|
|
@ -301,6 +301,7 @@ struct TLogData : NonCopyable {
|
|||
std::map<UID, Reference<struct LogData>> id_data;
|
||||
|
||||
UID dbgid;
|
||||
UID workerID;
|
||||
|
||||
IKeyValueStore* persistentData; // Durable data on disk that were spilled.
|
||||
IDiskQueue* rawPersistentQueue; // The physical queue the persistentQueue below stores its data. Ideally, log interface should work without directly accessing rawPersistentQueue
|
||||
|
@ -343,8 +344,8 @@ struct TLogData : NonCopyable {
|
|||
Reference<AsyncVar<bool>> degraded;
|
||||
std::vector<TagsAndMessage> tempTagMessages;
|
||||
|
||||
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
||||
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
|
||||
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
|
||||
|
@ -508,15 +509,16 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
bool execOpCommitInProgress;
|
||||
int txsTags;
|
||||
|
||||
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, TLogSpillType logSpillType, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
||||
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion), logSpillType(logSpillType),
|
||||
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
|
||||
minPoppedTagVersion(0), minPoppedTag(invalidTag),
|
||||
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, TLogSpillType logSpillType, std::vector<Tag> tags, std::string context)
|
||||
: tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
||||
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion), logSpillType(logSpillType),
|
||||
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
|
||||
minPoppedTagVersion(0), minPoppedTag(invalidTag),
|
||||
// These are initialized differently on init() or recovery
|
||||
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
|
||||
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
|
||||
{
|
||||
startRole(Role::TRANSACTION_LOG, interf.id(), UID());
|
||||
startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, context);
|
||||
|
||||
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
|
||||
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
|
||||
|
@ -711,7 +713,7 @@ ACTOR Future<Void> updatePoppedLocation( TLogData* self, Reference<LogData> logD
|
|||
// us to remove data that still is pointed to by SpilledData in the btree.
|
||||
if (data->persistentPopped <= logData->persistentDataVersion) {
|
||||
// Recover the next needed location in the Disk Queue from the index.
|
||||
Standalone<VectorRef<KeyValueRef>> kvrefs = wait(
|
||||
Standalone<RangeResultRef> kvrefs = wait(
|
||||
self->persistentData->readRange(KeyRangeRef(
|
||||
persistTagMessageRefsKey(logData->logId, data->tag, data->persistentPopped),
|
||||
persistTagMessageRefsKey(logData->logId, data->tag, logData->persistentDataVersion + 1)), 1));
|
||||
|
@ -1493,7 +1495,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
|
|||
}
|
||||
|
||||
if ( logData->shouldSpillByValue(req.tag) ) {
|
||||
Standalone<VectorRef<KeyValueRef>> kvs = wait(
|
||||
Standalone<RangeResultRef> kvs = wait(
|
||||
self->persistentData->readRange(KeyRangeRef(
|
||||
persistTagMessagesKey(logData->logId, req.tag, req.begin),
|
||||
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||
|
@ -1512,7 +1514,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
|
|||
}
|
||||
} else {
|
||||
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
|
||||
Standalone<VectorRef<KeyValueRef>> kvrefs = wait(
|
||||
Standalone<RangeResultRef> kvrefs = wait(
|
||||
self->persistentData->readRange(KeyRangeRef(
|
||||
persistTagMessageRefsKey(logData->logId, req.tag, req.begin),
|
||||
persistTagMessageRefsKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)),
|
||||
|
@ -1649,12 +1651,8 @@ ACTOR Future<Void> watchDegraded(TLogData* self) {
|
|||
return Void();
|
||||
}
|
||||
|
||||
//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask
|
||||
state int loopCount = 0;
|
||||
while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {
|
||||
wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskPriority::Low));
|
||||
loopCount++;
|
||||
}
|
||||
wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
|
||||
|
||||
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
|
||||
TEST(true); //TLog degraded
|
||||
self->degraded->set(true);
|
||||
|
@ -1897,7 +1895,7 @@ ACTOR Future<Void> rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC
|
|||
if ( self->dbInfo->get().master.id() != lastMasterID) {
|
||||
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
|
||||
TLogRejoinRequest req(tli);
|
||||
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id());
|
||||
TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id());
|
||||
choose {
|
||||
when(TLogRejoinReply rep =
|
||||
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
|
||||
|
@ -2358,14 +2356,14 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
wait(storage->init());
|
||||
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
|
||||
state Future<Optional<Value>> fRecoveryLocation = storage->readValue(persistRecoveryLocationKey);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fTxsTags = storage->readRange(persistTxsTagsKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fProtocolVersions = storage->readRange(persistProtocolVersionKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fTLogSpillTypes = storage->readRange(persistTLogSpillTypeKeys);
|
||||
state Future<Standalone<RangeResultRef>> fVers = storage->readRange(persistCurrentVersionKeys);
|
||||
state Future<Standalone<RangeResultRef>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
|
||||
state Future<Standalone<RangeResultRef>> fLocality = storage->readRange(persistLocalityKeys);
|
||||
state Future<Standalone<RangeResultRef>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
|
||||
state Future<Standalone<RangeResultRef>> fTxsTags = storage->readRange(persistTxsTagsKeys);
|
||||
state Future<Standalone<RangeResultRef>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
||||
state Future<Standalone<RangeResultRef>> fProtocolVersions = storage->readRange(persistProtocolVersionKeys);
|
||||
state Future<Standalone<RangeResultRef>> fTLogSpillTypes = storage->readRange(persistTLogSpillTypeKeys);
|
||||
|
||||
// FIXME: metadata in queue?
|
||||
|
||||
|
@ -2384,7 +2382,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
}
|
||||
|
||||
if (!fFormat.get().present()) {
|
||||
Standalone<VectorRef<KeyValueRef>> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
||||
Standalone<RangeResultRef> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
||||
if (!v.size()) {
|
||||
TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
|
||||
throw worker_removed();
|
||||
|
@ -2451,7 +2449,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
TLogSpillType logSpillType = BinaryReader::fromStringRef<TLogSpillType>( fTLogSpillTypes.get()[idx].value, AssumeVersion(protocolVersion) );
|
||||
|
||||
//We do not need the remoteTag, because we will not be loading any additional data
|
||||
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, logSpillType, std::vector<Tag>()) );
|
||||
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, logSpillType, std::vector<Tag>(), "Restored") );
|
||||
logData->locality = id_locality[id1];
|
||||
logData->stopped = true;
|
||||
self->id_data[id1] = logData;
|
||||
|
@ -2473,7 +2471,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
|||
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
|
||||
loop {
|
||||
if(logData->removed.isReady()) break;
|
||||
Standalone<VectorRef<KeyValueRef>> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
||||
Standalone<RangeResultRef> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
||||
if (!data.size()) break;
|
||||
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
|
||||
|
||||
|
@ -2657,7 +2655,8 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
|||
|
||||
stopAllTLogs(self, recruited.id());
|
||||
|
||||
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.spillType, req.allTags) );
|
||||
bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
|
||||
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.spillType, req.allTags, recovering ? "Recovered" : "Recruited") );
|
||||
self->id_data[recruited.id()] = logData;
|
||||
logData->locality = req.locality;
|
||||
logData->recoveryCount = req.epoch;
|
||||
|
@ -2674,7 +2673,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
|||
throw logData->removed.getError();
|
||||
}
|
||||
|
||||
if (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned) {
|
||||
if (recovering) {
|
||||
logData->unrecoveredBefore = req.startVersion;
|
||||
logData->recoveredAt = req.recoverAt;
|
||||
logData->knownCommittedVersion = req.startVersion - 1;
|
||||
|
@ -2783,13 +2782,11 @@ ACTOR Future<Void> startSpillingInTenSeconds(TLogData* self, UID tlogId, Referen
|
|||
}
|
||||
|
||||
// New tLog (if !recoverFrom.size()) or restore from network
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog ) {
|
||||
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder );
|
||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog ) {
|
||||
state TLogData self( tlogId, workerID, persistentData, persistentQueue, db, degraded, folder );
|
||||
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
||||
|
||||
TraceEvent("SharedTlog", tlogId);
|
||||
// FIXME: Pass the worker id instead of stubbing it
|
||||
startRole(Role::SHARED_TRANSACTION_LOG, tlogId, UID());
|
||||
try {
|
||||
if(restoreFromDisk) {
|
||||
wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) );
|
||||
|
@ -2833,7 +2830,6 @@ ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ
|
|||
} catch (Error& e) {
|
||||
self.terminated.send(Void());
|
||||
TraceEvent("TLogError", tlogId).error(e, true);
|
||||
endRole(Role::SHARED_TRANSACTION_LOG, tlogId, "Error", true);
|
||||
if(recovered.canBeSet()) recovered.send(Void());
|
||||
|
||||
while(!tlogRequests.isEmpty()) {
|
||||
|
|
|
@ -4859,22 +4859,26 @@ public:
|
|||
m_tree->set(keyValue);
|
||||
}
|
||||
|
||||
Future< Standalone< VectorRef< KeyValueRef > > > readRange(KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30) {
|
||||
Future< Standalone< RangeResultRef > > readRange(KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30) {
|
||||
debug_printf("READRANGE %s\n", printable(keys).c_str());
|
||||
return catchError(readRange_impl(this, keys, rowLimit, byteLimit));
|
||||
}
|
||||
|
||||
ACTOR static Future< Standalone< VectorRef< KeyValueRef > > > readRange_impl(KeyValueStoreRedwoodUnversioned *self, KeyRange keys, int rowLimit, int byteLimit) {
|
||||
ACTOR static Future< Standalone< RangeResultRef > > readRange_impl(KeyValueStoreRedwoodUnversioned *self, KeyRange keys, int rowLimit, int byteLimit) {
|
||||
self->m_tree->counts.getRanges++;
|
||||
state Standalone<VectorRef<KeyValueRef>> result;
|
||||
state Standalone<RangeResultRef> result;
|
||||
state int accumulatedBytes = 0;
|
||||
ASSERT( byteLimit > 0 );
|
||||
|
||||
if(rowLimit == 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
state Reference<IStoreCursor> cur = self->m_tree->readAtVersion(self->m_tree->getLastCommittedVersion());
|
||||
// Prefetch is currently only done in the forward direction
|
||||
state int prefetchBytes = rowLimit > 1 ? byteLimit : 0;
|
||||
|
||||
if(rowLimit >= 0) {
|
||||
if(rowLimit > 0) {
|
||||
wait(cur->findFirstEqualOrGreater(keys.begin, prefetchBytes));
|
||||
while(cur->isValid() && cur->getKey() < keys.end) {
|
||||
KeyValueRef kv(KeyRef(result.arena(), cur->getKey()), ValueRef(result.arena(), cur->getValue()));
|
||||
|
@ -4900,6 +4904,12 @@ public:
|
|||
wait(cur->prev());
|
||||
}
|
||||
}
|
||||
|
||||
result.more = rowLimit == 0 || accumulatedBytes >= byteLimit;
|
||||
if(result.more) {
|
||||
ASSERT(result.size() > 0);
|
||||
result.readThrough = result[result.size()-1].key;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -454,7 +454,7 @@ private:
|
|||
}
|
||||
};
|
||||
|
||||
void startRole(const Role &role, UID roleId, UID workerId, std::map<std::string, std::string> details = std::map<std::string, std::string>(), std::string origination = "Recruited");
|
||||
void startRole(const Role &role, UID roleId, UID workerId, const std::map<std::string, std::string> &details = std::map<std::string, std::string>(), const std::string &origination = "Recruited");
|
||||
void endRole(const Role &role, UID id, std::string reason, bool ok = true, Error e = Error());
|
||||
|
||||
struct ServerDBInfo;
|
||||
|
@ -491,8 +491,8 @@ ACTOR Future<Void> masterProxyServer(MasterProxyInterface proxy, InitializeMaste
|
|||
Reference<AsyncVar<ServerDBInfo>> db, std::string whitelistBinPaths);
|
||||
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
||||
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
|
||||
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk,
|
||||
Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
|
||||
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID,
|
||||
bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
|
||||
Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog);
|
||||
|
||||
ACTOR Future<Void> monitorServerDBInfo(Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
|
||||
|
@ -512,20 +512,20 @@ void updateCpuProfiler(ProfilerRequest req);
|
|||
|
||||
namespace oldTLog_4_6 {
|
||||
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
||||
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId);
|
||||
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId, UID workerID);
|
||||
}
|
||||
namespace oldTLog_6_0 {
|
||||
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
||||
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
|
||||
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk,
|
||||
Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
|
||||
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID,
|
||||
bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
|
||||
Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog);
|
||||
}
|
||||
namespace oldTLog_6_2 {
|
||||
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
||||
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
|
||||
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk,
|
||||
Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
|
||||
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID,
|
||||
bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
|
||||
Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog);
|
||||
}
|
||||
|
||||
|
|
|
@ -51,11 +51,11 @@
|
|||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include <time.h>
|
||||
#include "fdbserver/Status.h"
|
||||
#include "fdbrpc/TLSConnection.h"
|
||||
#include "fdbrpc/Net2FileSystem.h"
|
||||
#include "fdbrpc/Platform.h"
|
||||
#include "fdbrpc/AsyncFileCached.actor.h"
|
||||
#include "fdbserver/CoroFlow.h"
|
||||
#include "flow/TLSPolicy.h"
|
||||
#if defined(CMAKE_BUILD) || !defined(WIN32)
|
||||
#include "versions.h"
|
||||
#endif
|
||||
|
@ -942,8 +942,8 @@ struct CLIOptions {
|
|||
int minTesterCount = 1;
|
||||
bool testOnServers = false;
|
||||
|
||||
Reference<TLSOptions> tlsOptions = Reference<TLSOptions>(new TLSOptions);
|
||||
std::string tlsCertPath, tlsKeyPath, tlsCAPath, tlsPassword;
|
||||
Reference<TLSPolicy> tlsPolicy = Reference<TLSPolicy>(new TLSPolicy(TLSPolicy::Is::SERVER));
|
||||
TLSParams tlsParams;
|
||||
std::vector<std::string> tlsVerifyPeers;
|
||||
double fileIoTimeout = 0.0;
|
||||
bool fileIoWarnOnly = false;
|
||||
|
@ -1371,22 +1371,22 @@ private:
|
|||
break;
|
||||
|
||||
#ifndef TLS_DISABLED
|
||||
case TLSOptions::OPT_TLS_PLUGIN:
|
||||
case TLSParams::OPT_TLS_PLUGIN:
|
||||
args.OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_CERTIFICATES:
|
||||
tlsCertPath = args.OptionArg();
|
||||
case TLSParams::OPT_TLS_CERTIFICATES:
|
||||
tlsParams.tlsCertPath = args.OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_PASSWORD:
|
||||
tlsPassword = args.OptionArg();
|
||||
case TLSParams::OPT_TLS_PASSWORD:
|
||||
tlsParams.tlsPassword = args.OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_CA_FILE:
|
||||
tlsCAPath = args.OptionArg();
|
||||
case TLSParams::OPT_TLS_CA_FILE:
|
||||
tlsParams.tlsCAPath = args.OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_KEY:
|
||||
tlsKeyPath = args.OptionArg();
|
||||
case TLSParams::OPT_TLS_KEY:
|
||||
tlsParams.tlsKeyPath = args.OptionArg();
|
||||
break;
|
||||
case TLSOptions::OPT_TLS_VERIFY_PEERS:
|
||||
case TLSParams::OPT_TLS_VERIFY_PEERS:
|
||||
tlsVerifyPeers.push_back(args.OptionArg());
|
||||
break;
|
||||
#endif
|
||||
|
@ -1626,7 +1626,12 @@ int main(int argc, char* argv[]) {
|
|||
startNewSimulator();
|
||||
openTraceFile(NetworkAddress(), opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
|
||||
} else {
|
||||
g_network = newNet2(opts.useThreadPool, true);
|
||||
#ifndef TLS_DISABLED
|
||||
if ( opts.tlsVerifyPeers.size() ) {
|
||||
opts.tlsPolicy->set_verify_peers( opts.tlsVerifyPeers );
|
||||
}
|
||||
#endif
|
||||
g_network = newNet2(opts.useThreadPool, true, opts.tlsPolicy, opts.tlsParams);
|
||||
FlowTransport::createInstance(false, 1);
|
||||
|
||||
const bool expectsPublicAddress = (role == FDBD || role == NetworkTestServer || role == Restore);
|
||||
|
@ -1641,18 +1646,6 @@ int main(int argc, char* argv[]) {
|
|||
openTraceFile(opts.publicAddresses.address, opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace",
|
||||
opts.logGroup);
|
||||
|
||||
#ifndef TLS_DISABLED
|
||||
if (opts.tlsCertPath.size()) opts.tlsOptions->set_cert_file(opts.tlsCertPath);
|
||||
if (opts.tlsCAPath.size()) opts.tlsOptions->set_ca_file(opts.tlsCAPath);
|
||||
if (opts.tlsKeyPath.size()) {
|
||||
if (opts.tlsPassword.size()) opts.tlsOptions->set_key_password(opts.tlsPassword);
|
||||
|
||||
opts.tlsOptions->set_key_file(opts.tlsKeyPath);
|
||||
}
|
||||
if (opts.tlsVerifyPeers.size()) opts.tlsOptions->set_verify_peers(opts.tlsVerifyPeers);
|
||||
|
||||
opts.tlsOptions->register_network();
|
||||
#endif
|
||||
if (expectsPublicAddress) {
|
||||
for (int ii = 0; ii < (opts.publicAddresses.secondaryAddress.present() ? 2 : 1); ++ii) {
|
||||
const NetworkAddress& publicAddress =
|
||||
|
@ -1853,8 +1846,7 @@ int main(int argc, char* argv[]) {
|
|||
}
|
||||
}
|
||||
}
|
||||
setupAndRun(dataFolder, opts.testFile, opts.restarting, (isRestoring >= 1), opts.whitelistBinPaths,
|
||||
opts.tlsOptions);
|
||||
setupAndRun(dataFolder, opts.testFile, opts.restarting, (isRestoring >= 1), opts.whitelistBinPaths);
|
||||
g_simulator.run();
|
||||
} else if (role == FDBD) {
|
||||
// Call fast restore for the class FastRestoreClass. This is a short-cut to run fast restore in circus
|
||||
|
@ -2070,6 +2062,11 @@ int main(int argc, char* argv[]) {
|
|||
TraceEvent(SevError, "MainError").error(e);
|
||||
//printf("\n%d tests passed; %d tests failed\n", passCount, failCount);
|
||||
flushAndExit(FDB_EXIT_MAIN_ERROR);
|
||||
} catch (boost::system::system_error& e) {
|
||||
fprintf(stderr, "boost::system::system_error: %s (%d)", e.what(), e.code().value());
|
||||
TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what());
|
||||
//printf("\n%d tests passed; %d tests failed\n", passCount, failCount);
|
||||
flushAndExit(FDB_EXIT_MAIN_EXCEPTION);
|
||||
} catch (std::exception& e) {
|
||||
fprintf(stderr, "std::exception: %s\n", e.what());
|
||||
TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what());
|
||||
|
|
|
@ -672,8 +672,8 @@ ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Refer
|
|||
|
||||
TraceEvent("MasterRecovering", self->dbgid).detail("LastEpochEnd", self->lastEpochEnd).detail("RecoveryTransactionVersion", self->recoveryTransactionVersion);
|
||||
|
||||
Standalone<VectorRef<KeyValueRef>> rawConf = wait( self->txnStateStore->readRange( configKeys ) );
|
||||
self->configuration.fromKeyValues( rawConf );
|
||||
Standalone<RangeResultRef> rawConf = wait( self->txnStateStore->readRange( configKeys ) );
|
||||
self->configuration.fromKeyValues( rawConf.castTo<VectorRef<KeyValueRef>>() );
|
||||
self->originalConfiguration = self->configuration;
|
||||
self->hasConfiguration = true;
|
||||
|
||||
|
@ -683,13 +683,13 @@ ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Refer
|
|||
.detail("Conf", self->configuration.toString())
|
||||
.trackLatest("RecoveredConfig");
|
||||
|
||||
Standalone<VectorRef<KeyValueRef>> rawLocalities = wait( self->txnStateStore->readRange( tagLocalityListKeys ) );
|
||||
Standalone<RangeResultRef> rawLocalities = wait( self->txnStateStore->readRange( tagLocalityListKeys ) );
|
||||
self->dcId_locality.clear();
|
||||
for(auto& kv : rawLocalities) {
|
||||
self->dcId_locality[decodeTagLocalityListKey(kv.key)] = decodeTagLocalityListValue(kv.value);
|
||||
}
|
||||
|
||||
Standalone<VectorRef<KeyValueRef>> rawTags = wait( self->txnStateStore->readRange( serverTagKeys ) );
|
||||
Standalone<RangeResultRef> rawTags = wait( self->txnStateStore->readRange( serverTagKeys ) );
|
||||
self->allTags.clear();
|
||||
if(self->lastEpochEnd > 0) {
|
||||
self->allTags.push_back(cacheTag);
|
||||
|
@ -709,7 +709,7 @@ ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Refer
|
|||
}
|
||||
}
|
||||
|
||||
Standalone<VectorRef<KeyValueRef>> rawHistoryTags = wait( self->txnStateStore->readRange( serverTagHistoryKeys ) );
|
||||
Standalone<RangeResultRef> rawHistoryTags = wait( self->txnStateStore->readRange( serverTagHistoryKeys ) );
|
||||
for(auto& kv : rawHistoryTags) {
|
||||
self->allTags.push_back(decodeServerTagValue( kv.value ));
|
||||
}
|
||||
|
@ -732,13 +732,13 @@ ACTOR Future<Void> sendInitialCommitToResolvers( Reference<MasterData> self ) {
|
|||
state Sequence txnSequence = 0;
|
||||
ASSERT(self->recoveryTransactionVersion);
|
||||
|
||||
state Standalone<VectorRef<KeyValueRef>> data = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get();
|
||||
state Standalone<RangeResultRef> data = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get();
|
||||
state vector<Future<Void>> txnReplies;
|
||||
state int64_t dataOutstanding = 0;
|
||||
loop {
|
||||
if(!data.size()) break;
|
||||
((KeyRangeRef&)txnKeys) = KeyRangeRef( keyAfter(data.back().key, txnKeys.arena()), txnKeys.end );
|
||||
Standalone<VectorRef<KeyValueRef>> nextData = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get();
|
||||
Standalone<RangeResultRef> nextData = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get();
|
||||
|
||||
for(auto& r : self->proxies) {
|
||||
TxnStateRequest req;
|
||||
|
|
|
@ -168,7 +168,7 @@ struct StorageServerDisk {
|
|||
Future<Key> readNextKeyInclusive( KeyRef key ) { return readFirstKey(storage, KeyRangeRef(key, allKeys.end)); }
|
||||
Future<Optional<Value>> readValue( KeyRef key, Optional<UID> debugID = Optional<UID>() ) { return storage->readValue(key, debugID); }
|
||||
Future<Optional<Value>> readValuePrefix( KeyRef key, int maxLength, Optional<UID> debugID = Optional<UID>() ) { return storage->readValuePrefix(key, maxLength, debugID); }
|
||||
Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) { return storage->readRange(keys, rowLimit, byteLimit); }
|
||||
Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) { return storage->readRange(keys, rowLimit, byteLimit); }
|
||||
|
||||
KeyValueStoreType getKeyValueStoreType() { return storage->getType(); }
|
||||
StorageBytes getStorageBytes() { return storage->getStorageBytes(); }
|
||||
|
@ -181,7 +181,7 @@ private:
|
|||
void writeMutations( MutationListRef mutations, Version debugVersion, const char* debugContext );
|
||||
|
||||
ACTOR static Future<Key> readFirstKey( IKeyValueStore* storage, KeyRangeRef range ) {
|
||||
Standalone<VectorRef<KeyValueRef>> r = wait( storage->readRange( range, 1 ) );
|
||||
Standalone<RangeResultRef> r = wait( storage->readRange( range, 1 ) );
|
||||
if (r.size()) return r[0].key;
|
||||
else return range.end;
|
||||
}
|
||||
|
@ -1052,17 +1052,19 @@ void merge( Arena& arena, VectorRef<KeyValueRef, VecSerStrategy::String>& output
|
|||
// Combines data from base (at an older version) with sets from newer versions in [start, end) and appends the first (up to) |limit| rows to output
|
||||
// If limit<0, base and output are in descending order, and start->key()>end->key(), but start is still inclusive and end is exclusive
|
||||
{
|
||||
if (limit==0) return;
|
||||
int originalLimit = abs(limit) + output.size();
|
||||
ASSERT(limit != 0);
|
||||
|
||||
bool forward = limit>0;
|
||||
if (!forward) limit = -limit;
|
||||
int adjustedLimit = limit + output.size();
|
||||
int accumulatedBytes = 0;
|
||||
|
||||
KeyValueRef const* baseStart = base.begin();
|
||||
KeyValueRef const* baseEnd = base.end();
|
||||
while (baseStart!=baseEnd && start!=end && --limit>=0 && accumulatedBytes < limitBytes) {
|
||||
if (forward ? baseStart->key < start.key() : baseStart->key > start.key())
|
||||
while (baseStart!=baseEnd && start!=end && output.size() < adjustedLimit && accumulatedBytes < limitBytes) {
|
||||
if (forward ? baseStart->key < start.key() : baseStart->key > start.key()) {
|
||||
output.push_back_deep( arena, *baseStart++ );
|
||||
}
|
||||
else {
|
||||
output.push_back_deep( arena, KeyValueRef(start.key(), start->getValue()) );
|
||||
if (baseStart->key == start.key()) ++baseStart;
|
||||
|
@ -1070,18 +1072,17 @@ void merge( Arena& arena, VectorRef<KeyValueRef, VecSerStrategy::String>& output
|
|||
}
|
||||
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
|
||||
}
|
||||
while (baseStart!=baseEnd && --limit>=0 && accumulatedBytes < limitBytes) {
|
||||
while (baseStart!=baseEnd && output.size() < adjustedLimit && accumulatedBytes < limitBytes) {
|
||||
output.push_back_deep( arena, *baseStart++ );
|
||||
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
|
||||
}
|
||||
if( !stopAtEndOfBase ) {
|
||||
while (start!=end && --limit>=0 && accumulatedBytes < limitBytes) {
|
||||
while (start!=end && output.size() < adjustedLimit && accumulatedBytes < limitBytes) {
|
||||
output.push_back_deep( arena, KeyValueRef(start.key(), start->getValue()) );
|
||||
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
|
||||
if (forward) ++start; else --start;
|
||||
}
|
||||
}
|
||||
ASSERT( output.size() <= originalLimit );
|
||||
}
|
||||
|
||||
// If limit>=0, it returns the first rows in the range (sorted ascending), otherwise the last rows (sorted descending).
|
||||
|
@ -1095,10 +1096,6 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
|||
state KeyRef readEnd;
|
||||
state Key readBeginTemp;
|
||||
state int vCount;
|
||||
//state UID rrid = deterministicRandom()->randomUniqueID();
|
||||
//state int originalLimit = limit;
|
||||
//state int originalLimitBytes = *pLimitBytes;
|
||||
//state bool track = rrid.first() == 0x1bc134c2f752187cLL;
|
||||
|
||||
// Check if the desired key-range intersects the cached key-ranges
|
||||
// TODO Find a more efficient way to do it
|
||||
|
@ -1106,9 +1103,7 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
|||
auto cached = data->cachedRangeMap.intersectingRanges(range);
|
||||
result.cached = (cached.begin() != cached.end());
|
||||
|
||||
// FIXME: Review pLimitBytes behavior
|
||||
// if (limit >= 0) we are reading forward, else backward
|
||||
|
||||
if (limit >= 0) {
|
||||
// We might care about a clear beginning before start that
|
||||
// runs into range
|
||||
|
@ -1120,20 +1115,7 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
|||
|
||||
vStart = view.lower_bound(readBegin);
|
||||
|
||||
/*if (track) {
|
||||
printf("readRange(%llx, @%lld, '%s'-'%s')\n", data->thisServerID.first(), version, printable(range.begin).c_str(), printable(range.end).c_str());
|
||||
printf("mvcc:\n");
|
||||
vEnd = view.upper_bound(range.end);
|
||||
for(auto r=vStart; r != vEnd; ++r) {
|
||||
if (r->isClearTo())
|
||||
printf(" '%s'-'%s' cleared\n", printable(r.key()).c_str(), printable(r->getEndKey()).c_str());
|
||||
else
|
||||
printf(" '%s' := '%s'\n", printable(r.key()).c_str(), printable(r->getValue()).c_str());
|
||||
}
|
||||
}*/
|
||||
|
||||
while (limit>0 && *pLimitBytes>0 && readBegin < range.end) {
|
||||
// ASSERT( vStart == view.lower_bound(readBegin) );
|
||||
ASSERT( !vStart || vStart.key() >= readBegin );
|
||||
if (vStart) { auto b = vStart; --b; ASSERT( !b || b.key() < readBegin ); }
|
||||
ASSERT( data->storageVersion() <= version );
|
||||
|
@ -1150,94 +1132,59 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
|||
|
||||
// Read the data on disk up to vEnd (or the end of the range)
|
||||
readEnd = vEnd ? std::min( vEnd.key(), range.end ) : range.end;
|
||||
Standalone<VectorRef<KeyValueRef>> atStorageVersion = wait(
|
||||
Standalone<RangeResultRef> atStorageVersion = wait(
|
||||
data->storage.readRange( KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes ) );
|
||||
|
||||
/*if (track) {
|
||||
printf("read [%s,%s): %d rows\n", printable(readBegin).c_str(), printable(readEnd).c_str(), atStorageVersion.size());
|
||||
for(auto r=atStorageVersion.begin(); r != atStorageVersion.end(); ++r)
|
||||
printf(" '%s' := '%s'\n", printable(r->key).c_str(), printable(r->value).c_str());
|
||||
}*/
|
||||
|
||||
ASSERT( atStorageVersion.size() <= limit );
|
||||
if (data->storageVersion() > version) throw transaction_too_old();
|
||||
|
||||
bool more = atStorageVersion.size()!=0;
|
||||
|
||||
// merge the sets in [vStart,vEnd) with the sets on disk, stopping at the last key from disk if there is 'more'
|
||||
// merge the sets in [vStart,vEnd) with the sets on disk, stopping at the last key from disk if we were limited
|
||||
int prevSize = result.data.size();
|
||||
merge( result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, more, *pLimitBytes );
|
||||
merge( result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, atStorageVersion.more, *pLimitBytes );
|
||||
limit -= result.data.size() - prevSize;
|
||||
|
||||
for (auto i = result.data.begin() + prevSize; i != result.data.end(); i++) {
|
||||
*pLimitBytes -= sizeof(KeyValueRef) + i->expectedSize();
|
||||
}
|
||||
|
||||
// Setup for the next iteration
|
||||
if (more) { // if there might be more data, begin reading right after what we already found to find out
|
||||
//if (track) printf("more\n");
|
||||
if (!(limit<=0 || *pLimitBytes<=0 || result.data.end()[-1].key == atStorageVersion.end()[-1].key))
|
||||
TraceEvent(SevError, "ReadRangeIssue", data->thisServerID).detail("ReadBegin", readBegin).detail("ReadEnd", readEnd)
|
||||
.detail("VStart", vStart ? vStart.key() : LiteralStringRef("nil")).detail("VEnd", vEnd ? vEnd.key() : LiteralStringRef("nil"))
|
||||
.detail("AtStorageVersionBack", atStorageVersion.end()[-1].key).detail("ResultBack", result.data.end()[-1].key)
|
||||
.detail("Limit", limit).detail("LimitBytes", *pLimitBytes).detail("ResultSize", result.data.size()).detail("PrevSize", prevSize);
|
||||
readBegin = readBeginTemp = keyAfter( result.data.end()[-1].key );
|
||||
ASSERT( limit<=0 || *pLimitBytes<=0 || result.data.end()[-1].key == atStorageVersion.end()[-1].key );
|
||||
} else if (vStart && vStart->isClearTo()){ // if vStart is a clear, skip it.
|
||||
//if (track) printf("skip clear\n");
|
||||
readBegin = vStart->getEndKey(); // next disk read should start at the end of the clear
|
||||
++vStart;
|
||||
} else { // Otherwise, continue at readEnd
|
||||
//if (track) printf("continue\n");
|
||||
readBegin = readEnd;
|
||||
}
|
||||
}
|
||||
// all but the last item are less than *pLimitBytes
|
||||
ASSERT( result.data.size() == 0 || *pLimitBytes + result.data.end()[-1].expectedSize() + sizeof(KeyValueRef) > 0 );
|
||||
/*if (*pLimitBytes <= 0)
|
||||
TraceEvent(SevWarn, "ReadRangeLimitExceeded")
|
||||
.detail("Version", version)
|
||||
.detail("Begin", range.begin )
|
||||
.detail("End", range.end )
|
||||
.detail("LimitReamin", limit)
|
||||
.detail("LimitBytesRemain", *pLimitBytes); */
|
||||
if (limit <=0 || *pLimitBytes <= 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/*GetKeyValuesReply correct = wait( readRangeOld(data, version, range, originalLimit, originalLimitBytes) );
|
||||
bool prefix_equal = true;
|
||||
int totalsize = 0;
|
||||
int first_difference = -1;
|
||||
for(int i=0; i<result.data.size() && i<correct.data.size(); i++) {
|
||||
if (result.data[i] != correct.data[i]) {
|
||||
first_difference = i;
|
||||
prefix_equal = false;
|
||||
// If we hit our limits reading from disk but then combining with MVCC gave us back more room
|
||||
if (atStorageVersion.more) {
|
||||
ASSERT(result.data.end()[-1].key == atStorageVersion.end()[-1].key);
|
||||
readBegin = readBeginTemp = keyAfter(result.data.end()[-1].key);
|
||||
} else if (vEnd && vEnd->isClearTo()) {
|
||||
ASSERT(vStart == vEnd); // vStart will have been advanced by merge()
|
||||
ASSERT(vEnd->getEndKey() > readBegin);
|
||||
readBegin = vEnd->getEndKey();
|
||||
++vStart;
|
||||
} else {
|
||||
ASSERT(readEnd == range.end);
|
||||
break;
|
||||
}
|
||||
totalsize += result.data[i].expectedSize() + sizeof(KeyValueRef);
|
||||
}
|
||||
|
||||
// for the following check
|
||||
result.more = limit == 0 || *pLimitBytes<=0; // FIXME: Does this have to be exact?
|
||||
result.version = version;
|
||||
if ( !(totalsize>originalLimitBytes ? prefix_equal : result.data==correct.data) || correct.more != result.more ) {
|
||||
TraceEvent(SevError, "IncorrectResult", rrid).detail("Server", data->thisServerID).detail("CorrectRows", correct.data.size())
|
||||
.detail("FirstDifference", first_difference).detail("OriginalLimit", originalLimit)
|
||||
.detail("ResultRows", result.data.size()).detail("Result0", result.data[0].key).detail("Correct0", correct.data[0].key)
|
||||
.detail("ResultN", result.data.size() ? result.data[std::min(correct.data.size(),result.data.size())-1].key : "nil")
|
||||
.detail("CorrectN", correct.data.size() ? correct.data[std::min(correct.data.size(),result.data.size())-1].key : "nil");
|
||||
}*/
|
||||
} else {
|
||||
// Reverse read - abandon hope alle ye who enter here
|
||||
readEnd = range.end;
|
||||
|
||||
vStart = view.lastLess(readEnd);
|
||||
vStart = view.lastLess(range.end);
|
||||
|
||||
// A clear might extend all the way to range.end
|
||||
if (vStart && vStart->isClearTo() && vStart->getEndKey() >= readEnd) {
|
||||
if (vStart && vStart->isClearTo() && vStart->getEndKey() >= range.end) {
|
||||
readEnd = vStart.key();
|
||||
--vStart;
|
||||
} else {
|
||||
readEnd = range.end;
|
||||
}
|
||||
|
||||
while (limit < 0 && *pLimitBytes > 0 && readEnd > range.begin) {
|
||||
ASSERT(!vStart || vStart.key() < readEnd);
|
||||
if (vStart) {
|
||||
auto b = vStart;
|
||||
++b;
|
||||
ASSERT(!b || b.key() >= readEnd);
|
||||
}
|
||||
ASSERT(data->storageVersion() <= version);
|
||||
|
||||
vEnd = vStart;
|
||||
vCount = 0;
|
||||
int vSize=0;
|
||||
|
@ -1247,31 +1194,43 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
|||
--vEnd;
|
||||
}
|
||||
|
||||
readBegin = range.begin;
|
||||
if (vEnd)
|
||||
readBegin = std::max( readBegin, vEnd->isClearTo() ? vEnd->getEndKey() : vEnd.key() );
|
||||
readBegin = vEnd ? std::max(vEnd->isClearTo() ? vEnd->getEndKey() : vEnd.key(), range.begin) : range.begin;
|
||||
Standalone<RangeResultRef> atStorageVersion =
|
||||
wait(data->storage.readRange(KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes));
|
||||
|
||||
Standalone<VectorRef<KeyValueRef>> atStorageVersion = wait( data->storage.readRange( KeyRangeRef(readBegin, readEnd), limit ) );
|
||||
ASSERT(atStorageVersion.size() <= -limit);
|
||||
if (data->storageVersion() > version) throw transaction_too_old();
|
||||
|
||||
int prevSize = result.data.size();
|
||||
merge( result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, false, *pLimitBytes );
|
||||
merge(result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, atStorageVersion.more, *pLimitBytes);
|
||||
limit += result.data.size() - prevSize;
|
||||
|
||||
for (auto i = result.data.begin() + prevSize; i != result.data.end(); i++) {
|
||||
*pLimitBytes -= sizeof(KeyValueRef) + i->expectedSize();
|
||||
}
|
||||
|
||||
vStart = vEnd;
|
||||
readEnd = readBegin;
|
||||
if (limit >=0 || *pLimitBytes <= 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (vStart && vStart->isClearTo()) {
|
||||
ASSERT( vStart.key() < readEnd );
|
||||
readEnd = vStart.key();
|
||||
if (atStorageVersion.more) {
|
||||
ASSERT(result.data.end()[-1].key == atStorageVersion.end()[-1].key);
|
||||
readEnd = result.data.end()[-1].key;
|
||||
} else if (vEnd && vEnd->isClearTo()) {
|
||||
ASSERT(vStart == vEnd);
|
||||
ASSERT(vEnd.key() < readEnd)
|
||||
readEnd = vEnd.key();
|
||||
--vStart;
|
||||
} else {
|
||||
ASSERT(readBegin == range.begin);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// all but the last item are less than *pLimitBytes
|
||||
ASSERT(result.data.size() == 0 || *pLimitBytes + result.data.end()[-1].expectedSize() + sizeof(KeyValueRef) > 0);
|
||||
|
||||
result.more = limit == 0 || *pLimitBytes<=0; // FIXME: Does this have to be exact?
|
||||
result.version = version;
|
||||
return result;
|
||||
|
@ -3119,8 +3078,8 @@ ACTOR Future<Void> applyByteSampleResult( StorageServer* data, IKeyValueStore* s
|
|||
state int totalKeys = 0;
|
||||
state int totalBytes = 0;
|
||||
loop {
|
||||
Standalone<VectorRef<KeyValueRef>> bs = wait( storage->readRange( KeyRangeRef(begin, end), SERVER_KNOBS->STORAGE_LIMIT_BYTES, SERVER_KNOBS->STORAGE_LIMIT_BYTES ) );
|
||||
if(results) results->push_back(bs);
|
||||
Standalone<RangeResultRef> bs = wait( storage->readRange( KeyRangeRef(begin, end), SERVER_KNOBS->STORAGE_LIMIT_BYTES, SERVER_KNOBS->STORAGE_LIMIT_BYTES ) );
|
||||
if(results) results->push_back(bs.castTo<VectorRef<KeyValueRef>>());
|
||||
int rangeSize = bs.expectedSize();
|
||||
totalFetches++;
|
||||
totalKeys += bs.size();
|
||||
|
@ -3201,8 +3160,8 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
|
|||
state Future<Optional<Value>> fVersion = storage->readValue(persistVersion);
|
||||
state Future<Optional<Value>> fLogProtocol = storage->readValue(persistLogProtocol);
|
||||
state Future<Optional<Value>> fPrimaryLocality = storage->readValue(persistPrimaryLocality);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fShardAssigned = storage->readRange(persistShardAssignedKeys);
|
||||
state Future<Standalone<VectorRef<KeyValueRef>>> fShardAvailable = storage->readRange(persistShardAvailableKeys);
|
||||
state Future<Standalone<RangeResultRef>> fShardAssigned = storage->readRange(persistShardAssignedKeys);
|
||||
state Future<Standalone<RangeResultRef>> fShardAvailable = storage->readRange(persistShardAvailableKeys);
|
||||
|
||||
state Promise<Void> byteSampleSampleRecovered;
|
||||
state Promise<Void> startByteSampleRestore;
|
||||
|
@ -3239,7 +3198,7 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
|
|||
debug_checkRestoredVersion( data->thisServerID, version, "StorageServer" );
|
||||
data->setInitialVersion( version );
|
||||
|
||||
state Standalone<VectorRef<KeyValueRef>> available = fShardAvailable.get();
|
||||
state Standalone<RangeResultRef> available = fShardAvailable.get();
|
||||
state int availableLoc;
|
||||
for(availableLoc=0; availableLoc<available.size(); availableLoc++) {
|
||||
KeyRangeRef keys(
|
||||
|
@ -3253,7 +3212,7 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
|
|||
wait(yield());
|
||||
}
|
||||
|
||||
state Standalone<VectorRef<KeyValueRef>> assigned = fShardAssigned.get();
|
||||
state Standalone<RangeResultRef> assigned = fShardAssigned.get();
|
||||
state int assignedLoc;
|
||||
for(assignedLoc=0; assignedLoc<assigned.size(); assignedLoc++) {
|
||||
KeyRangeRef keys(
|
||||
|
@ -3444,6 +3403,7 @@ ACTOR Future<Void> waitMetrics( StorageServerMetrics* self, WaitMetricsRequest r
|
|||
|
||||
if( timedout ) {
|
||||
TEST( true ); // ShardWaitMetrics return on timeout
|
||||
//FIXME: instead of using random chance, send wrong_shard_server when the call in from waitMetricsMultiple (requires additional information in the request)
|
||||
if(deterministicRandom()->random01() < SERVER_KNOBS->WAIT_METRICS_WRONG_SHARD_CHANCE) {
|
||||
req.reply.sendError( wrong_shard_server() );
|
||||
} else {
|
||||
|
|
|
@ -973,6 +973,8 @@ vector<TestSpec> readTests( ifstream& ifs ) {
|
|||
TraceEvent("TestParserTest").detail("ParsedSimDrAgents", spec.simDrAgents);
|
||||
} else if( attrib == "extraDB" ) {
|
||||
TraceEvent("TestParserTest").detail("ParsedExtraDB", "");
|
||||
} else if ( attrib == "configureLocked" ) {
|
||||
TraceEvent("TestParserTest").detail("ParsedConfigureLocked", "");
|
||||
} else if( attrib == "minimumReplication" ) {
|
||||
TraceEvent("TestParserTest").detail("ParsedMinimumReplication", "");
|
||||
} else if( attrib == "minimumRegions" ) {
|
||||
|
|
|
@ -660,7 +660,7 @@ Standalone<StringRef> roleString(std::set<std::pair<std::string, std::string>> r
|
|||
return StringRef(result);
|
||||
}
|
||||
|
||||
void startRole(const Role &role, UID roleId, UID workerId, std::map<std::string, std::string> details, std::string origination) {
|
||||
void startRole(const Role &role, UID roleId, UID workerId, const std::map<std::string, std::string> &details, const std::string &origination) {
|
||||
if(role.includeInTraceRoles) {
|
||||
addTraceRole(role.abbreviation);
|
||||
}
|
||||
|
@ -980,7 +980,7 @@ ACTOR Future<Void> workerServer(
|
|||
auto& logData = sharedLogs[SharedLogsKey(s.tLogOptions, s.storeType)];
|
||||
// FIXME: Shouldn't if logData.first isValid && !isReady, shouldn't we
|
||||
// be sending a fake InitializeTLogRequest rather than calling tLog() ?
|
||||
Future<Void> tl = tLogFn( kv, queue, dbInfo, locality, !logData.actor.isValid() || logData.actor.isReady() ? logData.requests : PromiseStream<InitializeTLogRequest>(), s.storeID, true, oldLog, recovery, folder, degraded, activeSharedTLog );
|
||||
Future<Void> tl = tLogFn( kv, queue, dbInfo, locality, !logData.actor.isValid() || logData.actor.isReady() ? logData.requests : PromiseStream<InitializeTLogRequest>(), s.storeID, interf.id(), true, oldLog, recovery, folder, degraded, activeSharedTLog );
|
||||
recoveries.push_back(recovery.getFuture());
|
||||
activeSharedTLog->set(s.storeID);
|
||||
|
||||
|
@ -1161,7 +1161,7 @@ ACTOR Future<Void> workerServer(
|
|||
filesClosed.add( data->onClosed() );
|
||||
filesClosed.add( queue->onClosed() );
|
||||
|
||||
Future<Void> tLogCore = tLogFn( data, queue, dbInfo, locality, logData.requests, logId, false, Promise<Void>(), Promise<Void>(), folder, degraded, activeSharedTLog );
|
||||
Future<Void> tLogCore = tLogFn( data, queue, dbInfo, locality, logData.requests, logId, interf.id(), false, Promise<Void>(), Promise<Void>(), folder, degraded, activeSharedTLog );
|
||||
tLogCore = handleIOErrors( tLogCore, data, logId );
|
||||
tLogCore = handleIOErrors( tLogCore, queue, logId );
|
||||
errorForwarders.add( forwardError( errors, Role::SHARED_TRANSACTION_LOG, logId, tLogCore ) );
|
||||
|
@ -1458,37 +1458,48 @@ ACTOR Future<UID> createAndLockProcessIdFile(std::string folder) {
|
|||
state UID processIDUid;
|
||||
platform::createDirectory(folder);
|
||||
|
||||
try {
|
||||
state std::string lockFilePath = joinPath(folder, "processId");
|
||||
state ErrorOr<Reference<IAsyncFile>> lockFile = wait(errorOr(IAsyncFileSystem::filesystem(g_network)->open(lockFilePath, IAsyncFile::OPEN_READWRITE | IAsyncFile::OPEN_LOCK, 0600)));
|
||||
loop {
|
||||
try {
|
||||
state std::string lockFilePath = joinPath(folder, "processId");
|
||||
state ErrorOr<Reference<IAsyncFile>> lockFile = wait(errorOr(IAsyncFileSystem::filesystem(g_network)->open(lockFilePath, IAsyncFile::OPEN_READWRITE | IAsyncFile::OPEN_LOCK, 0600)));
|
||||
|
||||
if (lockFile.isError() && lockFile.getError().code() == error_code_file_not_found && !fileExists(lockFilePath)) {
|
||||
Reference<IAsyncFile> _lockFile = wait(IAsyncFileSystem::filesystem()->open(lockFilePath, IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_LOCK | IAsyncFile::OPEN_READWRITE, 0600));
|
||||
lockFile = _lockFile;
|
||||
processIDUid = deterministicRandom()->randomUniqueID();
|
||||
BinaryWriter wr(IncludeVersion());
|
||||
wr << processIDUid;
|
||||
wait(lockFile.get()->write(wr.getData(), wr.getLength(), 0));
|
||||
wait(lockFile.get()->sync());
|
||||
}
|
||||
else {
|
||||
if (lockFile.isError()) throw lockFile.getError(); // If we've failed to open the file, throw an exception
|
||||
if (lockFile.isError() && lockFile.getError().code() == error_code_file_not_found && !fileExists(lockFilePath)) {
|
||||
Reference<IAsyncFile> _lockFile = wait(IAsyncFileSystem::filesystem()->open(lockFilePath, IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_LOCK | IAsyncFile::OPEN_READWRITE, 0600));
|
||||
lockFile = _lockFile;
|
||||
processIDUid = deterministicRandom()->randomUniqueID();
|
||||
BinaryWriter wr(IncludeVersion());
|
||||
wr << processIDUid;
|
||||
wait(lockFile.get()->write(wr.getData(), wr.getLength(), 0));
|
||||
wait(lockFile.get()->sync());
|
||||
}
|
||||
else {
|
||||
if (lockFile.isError()) throw lockFile.getError(); // If we've failed to open the file, throw an exception
|
||||
|
||||
int64_t fileSize = wait(lockFile.get()->size());
|
||||
state Key fileData = makeString(fileSize);
|
||||
wait(success(lockFile.get()->read(mutateString(fileData), fileSize, 0)));
|
||||
processIDUid = BinaryReader::fromStringRef<UID>(fileData, IncludeVersion());
|
||||
int64_t fileSize = wait(lockFile.get()->size());
|
||||
state Key fileData = makeString(fileSize);
|
||||
wait(success(lockFile.get()->read(mutateString(fileData), fileSize, 0)));
|
||||
try {
|
||||
processIDUid = BinaryReader::fromStringRef<UID>(fileData, IncludeVersion());
|
||||
return processIDUid;
|
||||
} catch (Error& e) {
|
||||
if(!g_network->isSimulated()) {
|
||||
throw;
|
||||
}
|
||||
deleteFile(lockFilePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled) {
|
||||
if (!e.isInjectedFault())
|
||||
catch (Error& e) {
|
||||
if (e.code() == error_code_actor_cancelled) {
|
||||
throw;
|
||||
}
|
||||
if (!e.isInjectedFault()) {
|
||||
fprintf(stderr, "ERROR: error creating or opening process id file `%s'.\n", joinPath(folder, "processId").c_str());
|
||||
}
|
||||
TraceEvent(SevError, "OpenProcessIdError").error(e);
|
||||
throw;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
return processIDUid;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> fdbd(
|
||||
|
|
|
@ -1136,18 +1136,19 @@ struct ConsistencyCheckWorkload : TestWorkload
|
|||
std::set<Optional<Key>> missingStorage;
|
||||
|
||||
for( int i = 0; i < workers.size(); i++ ) {
|
||||
if( !configuration.isExcludedServer(workers[i].interf.address()) &&
|
||||
NetworkAddress addr = workers[i].interf.tLog.getEndpoint().addresses.getTLSAddress();
|
||||
if( !configuration.isExcludedServer(addr) &&
|
||||
( workers[i].processClass == ProcessClass::StorageClass || workers[i].processClass == ProcessClass::UnsetClass ) ) {
|
||||
bool found = false;
|
||||
for( int j = 0; j < storageServers.size(); j++ ) {
|
||||
if( storageServers[j].address() == workers[i].interf.address() ) {
|
||||
if( storageServers[j].getValue.getEndpoint().addresses.getTLSAddress() == addr ) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( !found ) {
|
||||
TraceEvent("ConsistencyCheck_NoStorage")
|
||||
.detail("Address", workers[i].interf.address())
|
||||
.detail("Address", addr)
|
||||
.detail("ProcessClassEqualToStorageClass",
|
||||
(int)(workers[i].processClass == ProcessClass::StorageClass));
|
||||
missingStorage.insert(workers[i].interf.locality.dcId());
|
||||
|
@ -1195,8 +1196,15 @@ struct ConsistencyCheckWorkload : TestWorkload
|
|||
if(!statefulProcesses[itr->interf.address()].count(id)) {
|
||||
TraceEvent("ConsistencyCheck_ExtraDataStore").detail("Address", itr->interf.address()).detail("DataStoreID", id);
|
||||
if(g_network->isSimulated()) {
|
||||
TraceEvent("ConsistencyCheck_RebootProcess").detail("Address", itr->interf.address()).detail("DataStoreID", id);
|
||||
g_simulator.rebootProcess(g_simulator.getProcessByAddress(itr->interf.address()), ISimulator::RebootProcess);
|
||||
//FIXME: this is hiding the fact that we can recruit a new storage server on a location the has files left behind by a previous failure
|
||||
// this means that the process is wasting disk space until the process is rebooting
|
||||
auto p = g_simulator.getProcessByAddress(itr->interf.address());
|
||||
TraceEvent("ConsistencyCheck_RebootProcess").detail("Address", itr->interf.address()).detail("DataStoreID", id).detail("Reliable", p->isReliable());
|
||||
if(p->isReliable()) {
|
||||
g_simulator.rebootProcess(p, ISimulator::RebootProcess);
|
||||
} else {
|
||||
g_simulator.killProcess(p, ISimulator::KillInstantly);
|
||||
}
|
||||
}
|
||||
|
||||
foundExtraDataStore = true;
|
||||
|
@ -1220,12 +1228,13 @@ struct ConsistencyCheckWorkload : TestWorkload
|
|||
std::set<NetworkAddress> workerAddresses;
|
||||
|
||||
for (const auto& it : workers) {
|
||||
ISimulator::ProcessInfo* info = g_simulator.getProcessByAddress(it.interf.address());
|
||||
NetworkAddress addr = it.interf.tLog.getEndpoint().addresses.getTLSAddress();
|
||||
ISimulator::ProcessInfo* info = g_simulator.getProcessByAddress(addr);
|
||||
if(!info || info->failed) {
|
||||
TraceEvent("ConsistencyCheck_FailedWorkerInList").detail("Addr", it.interf.address());
|
||||
return false;
|
||||
}
|
||||
workerAddresses.insert( NetworkAddress(it.interf.address().ip, it.interf.address().port, true, false) );
|
||||
workerAddresses.insert( NetworkAddress(addr.ip, addr.port, true, addr.isTLS()) );
|
||||
}
|
||||
|
||||
vector<ISimulator::ProcessInfo*> all = g_simulator.getAllProcesses();
|
||||
|
|
|
@ -46,7 +46,7 @@ struct DDMetricsExcludeWorkload : TestWorkload {
|
|||
|
||||
ACTOR static Future<double> getMovingDataAmount(Database cx, DDMetricsExcludeWorkload* self) {
|
||||
try {
|
||||
StatusObject statusObj = wait(StatusClient::statusFetcher(cx->getConnectionFile()));
|
||||
StatusObject statusObj = wait(StatusClient::statusFetcher(cx));
|
||||
StatusObjectReader statusObjCluster;
|
||||
((StatusObjectReader)statusObj).get("cluster", statusObjCluster);
|
||||
StatusObjectReader statusObjData;
|
||||
|
|
|
@ -260,7 +260,7 @@ ACTOR Future<Void> testKVStoreMain( KVStoreTestWorkload* workload, KVTest* ptest
|
|||
state Key k;
|
||||
state double cst = timer();
|
||||
while (true) {
|
||||
Standalone<VectorRef<KeyValueRef>> kv =
|
||||
Standalone<RangeResultRef> kv =
|
||||
wait(test.store->readRange(KeyRangeRef(k, LiteralStringRef("\xff\xff\xff\xff")), 1000));
|
||||
count += kv.size();
|
||||
if (kv.size() < 1000) break;
|
||||
|
|
|
@ -27,12 +27,14 @@
|
|||
struct LockDatabaseWorkload : TestWorkload {
|
||||
double lockAfter, unlockAfter;
|
||||
bool ok;
|
||||
bool onlyCheckLocked;
|
||||
|
||||
LockDatabaseWorkload(WorkloadContext const& wcx)
|
||||
: TestWorkload(wcx), ok(true)
|
||||
{
|
||||
lockAfter = getOption( options, LiteralStringRef("lockAfter"), 0.0 );
|
||||
unlockAfter = getOption( options, LiteralStringRef("unlockAfter"), 10.0 );
|
||||
onlyCheckLocked = getOption(options, LiteralStringRef("onlyCheckLocked"), false);
|
||||
ASSERT(unlockAfter > lockAfter);
|
||||
}
|
||||
|
||||
|
@ -42,9 +44,8 @@ struct LockDatabaseWorkload : TestWorkload {
|
|||
return Void();
|
||||
}
|
||||
|
||||
virtual Future<Void> start( Database const& cx ) {
|
||||
if( clientId == 0 )
|
||||
return lockWorker( cx, this );
|
||||
virtual Future<Void> start(Database const& cx) {
|
||||
if (clientId == 0) return onlyCheckLocked ? timeout(checkLocked(cx, this), 60, Void()) : lockWorker(cx, this);
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -110,6 +111,7 @@ struct LockDatabaseWorkload : TestWorkload {
|
|||
self->ok = false;
|
||||
return Void();
|
||||
} catch( Error &e ) {
|
||||
TEST(e.code() == error_code_database_locked); // Database confirmed locked
|
||||
wait( tr.onError(e) );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -327,10 +327,43 @@ struct ReadWriteWorkload : KVWorkload {
|
|||
elapsed += self->periodicLoggingInterval;
|
||||
wait( delayUntil(start + elapsed) );
|
||||
|
||||
TraceEvent((self->description() + "_RowReadLatency").c_str()).detail("Mean", self->readLatencies.mean()).detail("Median", self->readLatencies.median()).detail("Percentile5", self->readLatencies.percentile(.05)).detail("Percentile95", self->readLatencies.percentile(.95)).detail("Count", self->readLatencyCount).detail("Elapsed", elapsed);
|
||||
TraceEvent((self->description() + "_GRVLatency").c_str()).detail("Mean", self->GRVLatencies.mean()).detail("Median", self->GRVLatencies.median()).detail("Percentile5", self->GRVLatencies.percentile(.05)).detail("Percentile95", self->GRVLatencies.percentile(.95));
|
||||
TraceEvent((self->description() + "_CommitLatency").c_str()).detail("Mean", self->commitLatencies.mean()).detail("Median", self->commitLatencies.median()).detail("Percentile5", self->commitLatencies.percentile(.05)).detail("Percentile95", self->commitLatencies.percentile(.95));
|
||||
TraceEvent((self->description() + "_TotalLatency").c_str()).detail("Mean", self->latencies.mean()).detail("Median", self->latencies.median()).detail("Percentile5", self->latencies.percentile(.05)).detail("Percentile95", self->latencies.percentile(.95));
|
||||
TraceEvent((self->description() + "_RowReadLatency").c_str())
|
||||
.detail("Mean", self->readLatencies.mean())
|
||||
.detail("Median", self->readLatencies.median())
|
||||
.detail("Percentile5", self->readLatencies.percentile(.05))
|
||||
.detail("Percentile95", self->readLatencies.percentile(.95))
|
||||
.detail("Percentile99", self->readLatencies.percentile(.99))
|
||||
.detail("Percentile99_9", self->readLatencies.percentile(.999))
|
||||
.detail("Max", self->readLatencies.max())
|
||||
.detail("Count", self->readLatencyCount)
|
||||
.detail("Elapsed", elapsed);
|
||||
|
||||
TraceEvent((self->description() + "_GRVLatency").c_str())
|
||||
.detail("Mean", self->GRVLatencies.mean())
|
||||
.detail("Median", self->GRVLatencies.median())
|
||||
.detail("Percentile5", self->GRVLatencies.percentile(.05))
|
||||
.detail("Percentile95", self->GRVLatencies.percentile(.95))
|
||||
.detail("Percentile99", self->GRVLatencies.percentile(.99))
|
||||
.detail("Percentile99_9", self->GRVLatencies.percentile(.999))
|
||||
.detail("Max", self->GRVLatencies.max());
|
||||
|
||||
TraceEvent((self->description() + "_CommitLatency").c_str())
|
||||
.detail("Mean", self->commitLatencies.mean())
|
||||
.detail("Median", self->commitLatencies.median())
|
||||
.detail("Percentile5", self->commitLatencies.percentile(.05))
|
||||
.detail("Percentile95", self->commitLatencies.percentile(.95))
|
||||
.detail("Percentile99", self->commitLatencies.percentile(.99))
|
||||
.detail("Percentile99_9", self->commitLatencies.percentile(.999))
|
||||
.detail("Max", self->commitLatencies.max());
|
||||
|
||||
TraceEvent((self->description() + "_TotalLatency").c_str())
|
||||
.detail("Mean", self->latencies.mean())
|
||||
.detail("Median", self->latencies.median())
|
||||
.detail("Percentile5", self->latencies.percentile(.05))
|
||||
.detail("Percentile95", self->latencies.percentile(.95))
|
||||
.detail("Percentile99", self->latencies.percentile(.99))
|
||||
.detail("Percentile99_9", self->latencies.percentile(.999))
|
||||
.detail("Max", self->latencies.max());
|
||||
|
||||
int64_t ops = (self->aTransactions.getValue() * (self->readsPerTransactionA+self->writesPerTransactionA)) +
|
||||
(self->bTransactions.getValue() * (self->readsPerTransactionB+self->writesPerTransactionB));
|
||||
|
|
|
@ -69,7 +69,7 @@ struct StatusWorkload : TestWorkload {
|
|||
if (clientId != 0)
|
||||
return Void();
|
||||
|
||||
return success(timeout(fetcher(cx->getConnectionFile(), this), testDuration));
|
||||
return success(timeout(fetcher(cx, this), testDuration));
|
||||
}
|
||||
virtual Future<bool> check(Database const& cx) {
|
||||
return errors.getValue() == 0;
|
||||
|
@ -161,7 +161,7 @@ struct StatusWorkload : TestWorkload {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> fetcher(Reference<ClusterConnectionFile> connFile, StatusWorkload *self) {
|
||||
ACTOR Future<Void> fetcher(Database cx, StatusWorkload *self) {
|
||||
state double lastTime = now();
|
||||
|
||||
loop{
|
||||
|
@ -170,7 +170,7 @@ struct StatusWorkload : TestWorkload {
|
|||
// Since we count the requests that start, we could potentially never really hear back?
|
||||
++self->requests;
|
||||
state double issued = now();
|
||||
StatusObject result = wait(StatusClient::statusFetcher(connFile));
|
||||
StatusObject result = wait(StatusClient::statusFetcher(cx));
|
||||
++self->replies;
|
||||
BinaryWriter br(AssumeVersion(currentProtocolVersion));
|
||||
save(br, result);
|
||||
|
|
|
@ -519,6 +519,10 @@ public:
|
|||
}
|
||||
#endif
|
||||
|
||||
template <class U> Standalone<U> castTo() const {
|
||||
return Standalone<U>(*this, arena());
|
||||
}
|
||||
|
||||
template <class Archive>
|
||||
void serialize(Archive& ar) {
|
||||
// FIXME: something like BinaryReader(ar) >> arena >> *(T*)this; to guarantee standalone arena???
|
||||
|
|
|
@ -58,6 +58,8 @@ set(FLOW_SRCS
|
|||
ThreadSafeQueue.h
|
||||
Trace.cpp
|
||||
Trace.h
|
||||
TLSPolicy.h
|
||||
TLSPolicy.cpp
|
||||
UnitTest.cpp
|
||||
UnitTest.h
|
||||
XmlTraceLogFormatter.h
|
||||
|
@ -84,6 +86,7 @@ set(FLOW_SRCS
|
|||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/SourceVersion.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h)
|
||||
|
||||
add_flow_target(STATIC_LIBRARY NAME flow SRCS ${FLOW_SRCS})
|
||||
target_include_directories(flow SYSTEM PUBLIC ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_include_directories(flow PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
if (NOT APPLE AND NOT WIN32)
|
||||
set (FLOW_LIBS ${FLOW_LIBS} rt)
|
||||
|
@ -92,7 +95,6 @@ elseif(WIN32)
|
|||
target_link_libraries(flow PUBLIC psapi.lib)
|
||||
endif()
|
||||
target_link_libraries(flow PRIVATE ${FLOW_LIBS})
|
||||
target_link_libraries(flow PUBLIC boost_target Threads::Threads ${CMAKE_DL_LIBS})
|
||||
if(USE_VALGRIND)
|
||||
target_link_libraries(flow PUBLIC Valgrind)
|
||||
endif()
|
||||
|
@ -100,7 +102,11 @@ endif()
|
|||
if(NOT WITH_TLS OR OPEN_FOR_IDE)
|
||||
target_compile_definitions(flow PUBLIC TLS_DISABLED)
|
||||
else()
|
||||
target_link_libraries(flow PUBLIC FDBLibTLS)
|
||||
target_link_libraries(flow PUBLIC OpenSSL::SSL)
|
||||
endif()
|
||||
target_link_libraries(flow PUBLIC boost_target Threads::Threads ${CMAKE_DL_LIBS})
|
||||
if(USE_VALGRIND)
|
||||
target_link_libraries(flow PUBLIC Valgrind)
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
|
|
|
@ -68,7 +68,6 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
|
|||
init( MAX_RECONNECTION_TIME, 0.5 );
|
||||
init( RECONNECTION_TIME_GROWTH_RATE, 1.2 );
|
||||
init( RECONNECTION_RESET_TIME, 5.0 );
|
||||
init( CONNECTION_ACCEPT_DELAY, 0.5 );
|
||||
init( TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY, 5.0 );
|
||||
init( TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT, 20.0 );
|
||||
init( PEER_UNAVAILABLE_FOR_LONG_TIME_TIMEOUT, 3600.0 );
|
||||
|
@ -112,6 +111,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
|
|||
|
||||
//GenericActors
|
||||
init( BUGGIFY_FLOW_LOCK_RELEASE_DELAY, 1.0 );
|
||||
init( LOW_PRIORITY_DELAY_COUNT, 5 );
|
||||
|
||||
//IAsyncFile
|
||||
init( INCREMENTAL_DELETE_TRUNCATE_AMOUNT, 5e8 ); //500MB
|
||||
|
@ -123,6 +123,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
|
|||
init( SLOW_LOOP_CUTOFF, 15.0 / 1000.0 );
|
||||
init( SLOW_LOOP_SAMPLING_RATE, 0.1 );
|
||||
init( TSC_YIELD_TIME, 1000000 );
|
||||
init( CERT_FILE_MAX_SIZE, 5 * 1024 * 1024 );
|
||||
|
||||
//Network
|
||||
init( PACKET_LIMIT, 100LL<<20 );
|
||||
|
@ -133,6 +134,8 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
|
|||
init( MIN_PACKET_BUFFER_FREE_BYTES, 256 );
|
||||
init( FLOW_TCP_NODELAY, 1 );
|
||||
init( FLOW_TCP_QUICKACK, 0 );
|
||||
init( UNRESTRICTED_HANDSHAKE_LIMIT, 15 );
|
||||
init( BOUNDED_HANDSHAKE_LIMIT, 400 );
|
||||
|
||||
//Sim2
|
||||
init( MIN_OPEN_TIME, 0.0002 );
|
||||
|
|
|
@ -87,7 +87,6 @@ public:
|
|||
double MAX_RECONNECTION_TIME;
|
||||
double RECONNECTION_TIME_GROWTH_RATE;
|
||||
double RECONNECTION_RESET_TIME;
|
||||
double CONNECTION_ACCEPT_DELAY;
|
||||
|
||||
int TLS_CERT_REFRESH_DELAY_SECONDS;
|
||||
double TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT;
|
||||
|
@ -131,6 +130,7 @@ public:
|
|||
|
||||
//GenericActors
|
||||
double BUGGIFY_FLOW_LOCK_RELEASE_DELAY;
|
||||
int LOW_PRIORITY_DELAY_COUNT;
|
||||
|
||||
//IAsyncFile
|
||||
int64_t INCREMENTAL_DELETE_TRUNCATE_AMOUNT;
|
||||
|
@ -143,6 +143,7 @@ public:
|
|||
double SLOW_LOOP_SAMPLING_RATE;
|
||||
int64_t TSC_YIELD_TIME;
|
||||
int64_t REACTOR_FLAGS;
|
||||
int CERT_FILE_MAX_SIZE;
|
||||
|
||||
//Network
|
||||
int64_t PACKET_LIMIT;
|
||||
|
@ -153,6 +154,8 @@ public:
|
|||
int MIN_PACKET_BUFFER_FREE_BYTES;
|
||||
int FLOW_TCP_NODELAY;
|
||||
int FLOW_TCP_QUICKACK;
|
||||
int UNRESTRICTED_HANDSHAKE_LIMIT;
|
||||
int BOUNDED_HANDSHAKE_LIMIT;
|
||||
|
||||
//Sim2
|
||||
//FIMXE: more parameters could be factored out
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include "flow/AsioReactor.h"
|
||||
#include "flow/Profiler.h"
|
||||
#include "flow/ProtocolVersion.h"
|
||||
#include "flow/TLSPolicy.h"
|
||||
|
||||
#ifdef WIN32
|
||||
#include <mmsystem.h>
|
||||
|
@ -49,7 +50,6 @@ intptr_t g_stackYieldLimit = 0;
|
|||
|
||||
using namespace boost::asio::ip;
|
||||
|
||||
|
||||
#if defined(__linux__)
|
||||
#include <execinfo.h>
|
||||
|
||||
|
@ -111,7 +111,7 @@ thread_local INetwork* thread_network = 0;
|
|||
class Net2 sealed : public INetwork, public INetworkConnections {
|
||||
|
||||
public:
|
||||
Net2(bool useThreadPool, bool useMetrics);
|
||||
Net2(bool useThreadPool, bool useMetrics, Reference<TLSPolicy> policy, const TLSParams& tlsParams);
|
||||
void run();
|
||||
void initMetrics();
|
||||
|
||||
|
@ -122,6 +122,7 @@ public:
|
|||
|
||||
// INetwork interface
|
||||
virtual double now() { return currentTime; };
|
||||
virtual double timer() { return ::timer(); };
|
||||
virtual Future<Void> delay( double seconds, TaskPriority taskId );
|
||||
virtual Future<class Void> yield( TaskPriority taskID );
|
||||
virtual bool check_yield(TaskPriority taskId);
|
||||
|
@ -154,6 +155,15 @@ public:
|
|||
//private:
|
||||
|
||||
ASIOReactor reactor;
|
||||
#ifndef TLS_DISABLED
|
||||
boost::asio::ssl::context sslContext;
|
||||
#endif
|
||||
std::string tlsPassword;
|
||||
|
||||
std::string get_password() const {
|
||||
return tlsPassword;
|
||||
}
|
||||
|
||||
INetworkConnections *network; // initially this, but can be changed
|
||||
|
||||
int64_t tsc_begin, tsc_end;
|
||||
|
@ -244,7 +254,11 @@ public:
|
|||
try {
|
||||
if (error) {
|
||||
// Log the error...
|
||||
TraceEvent(SevWarn, errContext, errID).suppressFor(1.0).detail("ErrorCode", error.value()).detail("Message", error.message());
|
||||
TraceEvent(SevWarn, errContext, errID).suppressFor(1.0).detail("ErrorCode", error.value()).detail("Message", error.message())
|
||||
#ifndef TLS_DISABLED
|
||||
.detail("WhichMeans", TLSPolicy::ErrorString(error))
|
||||
#endif
|
||||
;
|
||||
p.sendError( connection_failed() );
|
||||
} else
|
||||
p.send( Void() );
|
||||
|
@ -297,6 +311,10 @@ public:
|
|||
init();
|
||||
}
|
||||
|
||||
virtual Future<Void> acceptHandshake() { return Void(); }
|
||||
|
||||
virtual Future<Void> connectHandshake() { return Void(); }
|
||||
|
||||
// returns when write() can write at least one byte
|
||||
virtual Future<Void> onWritable() {
|
||||
++g_net2->countWriteProbes;
|
||||
|
@ -480,6 +498,342 @@ private:
|
|||
}
|
||||
};
|
||||
|
||||
#ifndef TLS_DISABLED
|
||||
typedef boost::asio::ssl::stream<boost::asio::ip::tcp::socket&> ssl_socket;
|
||||
|
||||
class SSLConnection : public IConnection, ReferenceCounted<SSLConnection> {
|
||||
public:
|
||||
virtual void addref() { ReferenceCounted<SSLConnection>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<SSLConnection>::delref(); }
|
||||
|
||||
virtual void close() {
|
||||
closeSocket();
|
||||
}
|
||||
|
||||
explicit SSLConnection( boost::asio::io_service& io_service, boost::asio::ssl::context& context )
|
||||
: id(nondeterministicRandom()->randomUniqueID()), socket(io_service), ssl_sock(socket, context)
|
||||
{
|
||||
}
|
||||
|
||||
// This is not part of the IConnection interface, because it is wrapped by INetwork::connect()
|
||||
ACTOR static Future<Reference<IConnection>> connect( boost::asio::io_service* ios, boost::asio::ssl::context* context, NetworkAddress addr ) {
|
||||
std::pair<IPAddress,uint16_t> peerIP = std::make_pair(addr.ip, addr.port);
|
||||
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
|
||||
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
|
||||
if (now() < iter->second.second) {
|
||||
if(iter->second.first >= FLOW_KNOBS->TLS_CLIENT_CONNECTION_THROTTLE_ATTEMPTS) {
|
||||
TraceEvent("TLSOutgoingConnectionThrottlingWarning").suppressFor(1.0).detail("PeerIP", addr);
|
||||
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT));
|
||||
throw connection_failed();
|
||||
}
|
||||
} else {
|
||||
g_network->networkInfo.serverTLSConnectionThrottler.erase(peerIP);
|
||||
}
|
||||
}
|
||||
|
||||
state Reference<SSLConnection> self( new SSLConnection(*ios, *context) );
|
||||
self->peer_address = addr;
|
||||
|
||||
try {
|
||||
auto to = tcpEndpoint(self->peer_address);
|
||||
BindPromise p("N2_ConnectError", self->id);
|
||||
Future<Void> onConnected = p.getFuture();
|
||||
self->socket.async_connect( to, std::move(p) );
|
||||
|
||||
wait( onConnected );
|
||||
self->init();
|
||||
return self;
|
||||
} catch (Error& e) {
|
||||
// Either the connection failed, or was cancelled by the caller
|
||||
self->closeSocket();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
// This is not part of the IConnection interface, because it is wrapped by IListener::accept()
|
||||
void accept(NetworkAddress peerAddr) {
|
||||
this->peer_address = peerAddr;
|
||||
init();
|
||||
}
|
||||
|
||||
ACTOR static void doAcceptHandshake( Reference<SSLConnection> self, Promise<Void> connected) {
|
||||
try {
|
||||
state std::pair<IPAddress,uint16_t> peerIP = std::make_pair(self->getPeerAddress().ip, static_cast<uint16_t>(0));
|
||||
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
|
||||
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
|
||||
if (now() < iter->second.second) {
|
||||
if(iter->second.first >= FLOW_KNOBS->TLS_SERVER_CONNECTION_THROTTLE_ATTEMPTS) {
|
||||
TraceEvent("TLSIncomingConnectionThrottlingWarning").suppressFor(1.0).detail("PeerIP", peerIP.first.toString());
|
||||
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT));
|
||||
self->closeSocket();
|
||||
connected.sendError(connection_failed());
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
g_network->networkInfo.serverTLSConnectionThrottler.erase(peerIP);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t permitNumber = wait(g_network->networkInfo.handshakeLock->take());
|
||||
state BoundedFlowLock::Releaser releaser(g_network->networkInfo.handshakeLock, permitNumber);
|
||||
|
||||
BindPromise p("N2_AcceptHandshakeError", UID());
|
||||
auto onHandshook = p.getFuture();
|
||||
self->getSSLSocket().async_handshake( boost::asio::ssl::stream_base::server, std::move(p) );
|
||||
wait( onHandshook );
|
||||
wait(delay(0, TaskPriority::Handshake));
|
||||
connected.send(Void());
|
||||
} catch (...) {
|
||||
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
|
||||
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
|
||||
iter->second.first++;
|
||||
} else {
|
||||
g_network->networkInfo.serverTLSConnectionThrottler[peerIP] = std::make_pair(0,now() + FLOW_KNOBS->TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT);
|
||||
}
|
||||
self->closeSocket();
|
||||
connected.sendError(connection_failed());
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> acceptHandshakeWrapper( Reference<SSLConnection> self ) {
|
||||
Promise<Void> connected;
|
||||
doAcceptHandshake(self, connected);
|
||||
try {
|
||||
wait(connected.getFuture());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
// Either the connection failed, or was cancelled by the caller
|
||||
self->closeSocket();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
virtual Future<Void> acceptHandshake() {
|
||||
return acceptHandshakeWrapper( Reference<SSLConnection>::addRef(this) );
|
||||
}
|
||||
|
||||
ACTOR static void doConnectHandshake( Reference<SSLConnection> self, Promise<Void> connected) {
|
||||
try {
|
||||
int64_t permitNumber = wait(g_network->networkInfo.handshakeLock->take());
|
||||
state BoundedFlowLock::Releaser releaser(g_network->networkInfo.handshakeLock, permitNumber);
|
||||
|
||||
BindPromise p("N2_ConnectHandshakeError", self->id);
|
||||
Future<Void> onHandshook = p.getFuture();
|
||||
self->ssl_sock.async_handshake( boost::asio::ssl::stream_base::client, std::move(p) );
|
||||
wait( onHandshook );
|
||||
wait(delay(0, TaskPriority::Handshake));
|
||||
connected.send(Void());
|
||||
} catch (...) {
|
||||
std::pair<IPAddress,uint16_t> peerIP = std::make_pair(self->peer_address.ip, self->peer_address.port);
|
||||
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
|
||||
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
|
||||
iter->second.first++;
|
||||
} else {
|
||||
g_network->networkInfo.serverTLSConnectionThrottler[peerIP] = std::make_pair(0,now() + FLOW_KNOBS->TLS_CLIENT_CONNECTION_THROTTLE_TIMEOUT);
|
||||
}
|
||||
self->closeSocket();
|
||||
connected.sendError(connection_failed());
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> connectHandshakeWrapper( Reference<SSLConnection> self ) {
|
||||
Promise<Void> connected;
|
||||
doConnectHandshake(self, connected);
|
||||
try {
|
||||
wait(connected.getFuture());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
// Either the connection failed, or was cancelled by the caller
|
||||
self->closeSocket();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
virtual Future<Void> connectHandshake() {
|
||||
return connectHandshakeWrapper( Reference<SSLConnection>::addRef(this) );
|
||||
}
|
||||
|
||||
// returns when write() can write at least one byte
|
||||
virtual Future<Void> onWritable() {
|
||||
++g_net2->countWriteProbes;
|
||||
BindPromise p("N2_WriteProbeError", id);
|
||||
auto f = p.getFuture();
|
||||
socket.async_write_some( boost::asio::null_buffers(), std::move(p) );
|
||||
return f;
|
||||
}
|
||||
|
||||
// returns when read() can read at least one byte
|
||||
virtual Future<Void> onReadable() {
|
||||
++g_net2->countReadProbes;
|
||||
BindPromise p("N2_ReadProbeError", id);
|
||||
auto f = p.getFuture();
|
||||
socket.async_read_some( boost::asio::null_buffers(), std::move(p) );
|
||||
return f;
|
||||
}
|
||||
|
||||
// Reads as many bytes as possible from the read buffer into [begin,end) and returns the number of bytes read (might be 0)
|
||||
virtual int read( uint8_t* begin, uint8_t* end ) {
|
||||
boost::system::error_code err;
|
||||
++g_net2->countReads;
|
||||
size_t toRead = end-begin;
|
||||
size_t size = ssl_sock.read_some( boost::asio::mutable_buffers_1(begin, toRead), err );
|
||||
g_net2->bytesReceived += size;
|
||||
//TraceEvent("ConnRead", this->id).detail("Bytes", size);
|
||||
if (err) {
|
||||
if (err == boost::asio::error::would_block) {
|
||||
++g_net2->countWouldBlock;
|
||||
return 0;
|
||||
}
|
||||
onReadError(err);
|
||||
throw connection_failed();
|
||||
}
|
||||
ASSERT( size ); // If the socket is closed, we expect an 'eof' error, not a zero return value
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
// Writes as many bytes as possible from the given SendBuffer chain into the write buffer and returns the number of bytes written (might be 0)
|
||||
virtual int write( SendBuffer const* data, int limit ) {
|
||||
boost::system::error_code err;
|
||||
++g_net2->countWrites;
|
||||
|
||||
size_t sent = ssl_sock.write_some( boost::iterator_range<SendBufferIterator>(SendBufferIterator(data, limit), SendBufferIterator()), err );
|
||||
|
||||
if (err) {
|
||||
// Since there was an error, sent's value can't be used to infer that the buffer has data and the limit is positive so check explicitly.
|
||||
ASSERT(limit > 0);
|
||||
bool notEmpty = false;
|
||||
for(auto p = data; p; p = p->next)
|
||||
if(p->bytes_written - p->bytes_sent > 0) {
|
||||
notEmpty = true;
|
||||
break;
|
||||
}
|
||||
ASSERT(notEmpty);
|
||||
|
||||
if (err == boost::asio::error::would_block) {
|
||||
++g_net2->countWouldBlock;
|
||||
return 0;
|
||||
}
|
||||
onWriteError(err);
|
||||
throw connection_failed();
|
||||
}
|
||||
|
||||
ASSERT( sent ); // Make sure data was sent, and also this check will fail if the buffer chain was empty or the limit was not > 0.
|
||||
return sent;
|
||||
}
|
||||
|
||||
virtual NetworkAddress getPeerAddress() { return peer_address; }
|
||||
|
||||
virtual UID getDebugID() { return id; }
|
||||
|
||||
tcp::socket& getSocket() { return socket; }
|
||||
|
||||
ssl_socket& getSSLSocket() { return ssl_sock; }
|
||||
private:
|
||||
UID id;
|
||||
tcp::socket socket;
|
||||
ssl_socket ssl_sock;
|
||||
NetworkAddress peer_address;
|
||||
|
||||
struct SendBufferIterator {
|
||||
typedef boost::asio::const_buffer value_type;
|
||||
typedef std::forward_iterator_tag iterator_category;
|
||||
typedef size_t difference_type;
|
||||
typedef boost::asio::const_buffer* pointer;
|
||||
typedef boost::asio::const_buffer& reference;
|
||||
|
||||
SendBuffer const* p;
|
||||
int limit;
|
||||
|
||||
SendBufferIterator(SendBuffer const* p=0, int limit = std::numeric_limits<int>::max()) : p(p), limit(limit) {
|
||||
ASSERT(limit > 0);
|
||||
}
|
||||
|
||||
bool operator == (SendBufferIterator const& r) const { return p == r.p; }
|
||||
bool operator != (SendBufferIterator const& r) const { return p != r.p; }
|
||||
void operator++() {
|
||||
limit -= p->bytes_written - p->bytes_sent;
|
||||
if(limit > 0)
|
||||
p = p->next;
|
||||
else
|
||||
p = NULL;
|
||||
}
|
||||
|
||||
boost::asio::const_buffer operator*() const {
|
||||
return boost::asio::const_buffer( p->data + p->bytes_sent, std::min(limit, p->bytes_written - p->bytes_sent) );
|
||||
}
|
||||
};
|
||||
|
||||
void init() {
|
||||
// Socket settings that have to be set after connect or accept succeeds
|
||||
socket.non_blocking(true);
|
||||
socket.set_option(boost::asio::ip::tcp::no_delay(true));
|
||||
platform::setCloseOnExec(socket.native_handle());
|
||||
}
|
||||
|
||||
void closeSocket() {
|
||||
boost::system::error_code cancelError;
|
||||
socket.cancel(cancelError);
|
||||
boost::system::error_code closeError;
|
||||
socket.close(closeError);
|
||||
boost::system::error_code shutdownError;
|
||||
ssl_sock.shutdown(shutdownError);
|
||||
}
|
||||
|
||||
void onReadError( const boost::system::error_code& error ) {
|
||||
TraceEvent(SevWarn, "N2_ReadError", id).suppressFor(1.0).detail("Message", error.value());
|
||||
closeSocket();
|
||||
}
|
||||
void onWriteError( const boost::system::error_code& error ) {
|
||||
TraceEvent(SevWarn, "N2_WriteError", id).suppressFor(1.0).detail("Message", error.value());
|
||||
closeSocket();
|
||||
}
|
||||
};
|
||||
|
||||
class SSLListener : public IListener, ReferenceCounted<SSLListener> {
|
||||
NetworkAddress listenAddress;
|
||||
tcp::acceptor acceptor;
|
||||
boost::asio::ssl::context* context;
|
||||
|
||||
public:
|
||||
SSLListener( boost::asio::io_service& io_service, boost::asio::ssl::context* context, NetworkAddress listenAddress )
|
||||
: listenAddress(listenAddress), acceptor( io_service, tcpEndpoint( listenAddress ) ), context(context)
|
||||
{
|
||||
platform::setCloseOnExec(acceptor.native_handle());
|
||||
}
|
||||
|
||||
virtual void addref() { ReferenceCounted<SSLListener>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<SSLListener>::delref(); }
|
||||
|
||||
// Returns one incoming connection when it is available
|
||||
virtual Future<Reference<IConnection>> accept() {
|
||||
return doAccept( this );
|
||||
}
|
||||
|
||||
virtual NetworkAddress getListenAddress() { return listenAddress; }
|
||||
|
||||
private:
|
||||
ACTOR static Future<Reference<IConnection>> doAccept( SSLListener* self ) {
|
||||
state Reference<SSLConnection> conn( new SSLConnection( self->acceptor.get_io_service(), *self->context) );
|
||||
state tcp::acceptor::endpoint_type peer_endpoint;
|
||||
try {
|
||||
BindPromise p("N2_AcceptError", UID());
|
||||
auto f = p.getFuture();
|
||||
self->acceptor.async_accept( conn->getSocket(), peer_endpoint, std::move(p) );
|
||||
wait( f );
|
||||
auto peer_address = peer_endpoint.address().is_v6() ? IPAddress(peer_endpoint.address().to_v6().to_bytes()) : IPAddress(peer_endpoint.address().to_v4().to_ulong());
|
||||
|
||||
conn->accept(NetworkAddress(peer_address, peer_endpoint.port(), false, true));
|
||||
|
||||
return conn;
|
||||
} catch (...) {
|
||||
conn->close();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
struct PromiseTask : public Task, public FastAllocated<PromiseTask> {
|
||||
Promise<Void> promise;
|
||||
PromiseTask() {}
|
||||
|
@ -491,7 +845,15 @@ struct PromiseTask : public Task, public FastAllocated<PromiseTask> {
|
|||
}
|
||||
};
|
||||
|
||||
Net2::Net2(bool useThreadPool, bool useMetrics)
|
||||
// 5MB for loading files into memory
|
||||
|
||||
#ifndef TLS_DISABLED
|
||||
bool insecurely_always_accept(bool _1, boost::asio::ssl::verify_context& _2) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
Net2::Net2(bool useThreadPool, bool useMetrics, Reference<TLSPolicy> policy, const TLSParams& tlsParams)
|
||||
: useThreadPool(useThreadPool),
|
||||
network(this),
|
||||
reactor(this),
|
||||
|
@ -500,10 +862,49 @@ Net2::Net2(bool useThreadPool, bool useMetrics)
|
|||
// Until run() is called, yield() will always yield
|
||||
tsc_begin(0), tsc_end(0), taskBegin(0), currentTaskID(TaskPriority::DefaultYield),
|
||||
lastMinTaskID(TaskPriority::Zero),
|
||||
numYields(0)
|
||||
numYields(0),
|
||||
tlsPassword(tlsParams.tlsPassword)
|
||||
#ifndef TLS_DISABLED
|
||||
,sslContext(boost::asio::ssl::context(boost::asio::ssl::context::tlsv12))
|
||||
#endif
|
||||
|
||||
{
|
||||
TraceEvent("Net2Starting");
|
||||
|
||||
#ifndef TLS_DISABLED
|
||||
sslContext.set_options(boost::asio::ssl::context::default_workarounds);
|
||||
sslContext.set_verify_mode(boost::asio::ssl::context::verify_peer | boost::asio::ssl::verify_fail_if_no_peer_cert);
|
||||
if (policy) {
|
||||
sslContext.set_verify_callback([policy](bool preverified, boost::asio::ssl::verify_context& ctx) {
|
||||
return policy->verify_peer(preverified, ctx.native_handle());
|
||||
});
|
||||
} else {
|
||||
sslContext.set_verify_callback(boost::bind(&insecurely_always_accept, _1, _2));
|
||||
}
|
||||
|
||||
sslContext.set_password_callback(std::bind(&Net2::get_password, this));
|
||||
|
||||
if (tlsParams.tlsCertPath.size() ) {
|
||||
sslContext.use_certificate_chain_file(tlsParams.tlsCertPath);
|
||||
}
|
||||
if (tlsParams.tlsCertBytes.size() ) {
|
||||
sslContext.use_certificate(boost::asio::buffer(tlsParams.tlsCertBytes.data(), tlsParams.tlsCertBytes.size()), boost::asio::ssl::context::pem);
|
||||
}
|
||||
if (tlsParams.tlsCAPath.size()) {
|
||||
std::string cert = readFileBytes(tlsParams.tlsCAPath, FLOW_KNOBS->CERT_FILE_MAX_SIZE);
|
||||
sslContext.add_certificate_authority(boost::asio::buffer(cert.data(), cert.size()));
|
||||
}
|
||||
if (tlsParams.tlsCABytes.size()) {
|
||||
sslContext.add_certificate_authority(boost::asio::buffer(tlsParams.tlsCABytes.data(), tlsParams.tlsCABytes.size()));
|
||||
}
|
||||
if (tlsParams.tlsKeyPath.size()) {
|
||||
sslContext.use_private_key_file(tlsParams.tlsKeyPath, boost::asio::ssl::context::pem);
|
||||
}
|
||||
if (tlsParams.tlsKeyBytes.size()) {
|
||||
sslContext.use_private_key(boost::asio::buffer(tlsParams.tlsKeyBytes.data(), tlsParams.tlsKeyBytes.size()), boost::asio::ssl::context::pem);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Set the global members
|
||||
if(useMetrics) {
|
||||
setGlobal(INetwork::enTDMetrics, (flowGlobalType) &tdmetrics);
|
||||
|
@ -879,8 +1280,13 @@ THREAD_HANDLE Net2::startThread( THREAD_FUNC_RETURN (*func) (void*), void *arg )
|
|||
return ::startThread(func, arg);
|
||||
}
|
||||
|
||||
|
||||
Future< Reference<IConnection> > Net2::connect( NetworkAddress toAddr, std::string host ) {
|
||||
#ifndef TLS_DISABLED
|
||||
if ( toAddr.isTLS() ) {
|
||||
return SSLConnection::connect(&this->reactor.ios, &this->sslContext, toAddr);
|
||||
}
|
||||
#endif
|
||||
|
||||
return Connection::connect(&this->reactor.ios, toAddr);
|
||||
}
|
||||
|
||||
|
@ -954,6 +1360,11 @@ bool Net2::isAddressOnThisHost( NetworkAddress const& addr ) {
|
|||
|
||||
Reference<IListener> Net2::listen( NetworkAddress localAddr ) {
|
||||
try {
|
||||
#ifndef TLS_DISABLED
|
||||
if ( localAddr.isTLS() ) {
|
||||
return Reference<IListener>(new SSLListener( reactor.ios, &this->sslContext, localAddr ));
|
||||
}
|
||||
#endif
|
||||
return Reference<IListener>( new Listener( reactor.ios, localAddr ) );
|
||||
} catch (boost::system::system_error const& e) {
|
||||
Error x;
|
||||
|
@ -1048,13 +1459,13 @@ void ASIOReactor::wake() {
|
|||
|
||||
} // namespace net2
|
||||
|
||||
INetwork* newNet2(bool useThreadPool, bool useMetrics) {
|
||||
INetwork* newNet2(bool useThreadPool, bool useMetrics, Reference<TLSPolicy> policy, const TLSParams& tlsParams) {
|
||||
try {
|
||||
N2::g_net2 = new N2::Net2(useThreadPool, useMetrics);
|
||||
N2::g_net2 = new N2::Net2(useThreadPool, useMetrics, policy, tlsParams);
|
||||
}
|
||||
catch(boost::system::system_error e) {
|
||||
TraceEvent("Net2InitError").detail("Message", e.what());
|
||||
throw unknown_error();
|
||||
throw;
|
||||
}
|
||||
catch(std::exception const& e) {
|
||||
TraceEvent("Net2InitError").detail("Message", e.what());
|
||||
|
|
|
@ -0,0 +1,535 @@
|
|||
/*
|
||||
* TLSPolicy.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "flow/TLSPolicy.h"
|
||||
|
||||
TLSPolicy::~TLSPolicy() {}
|
||||
|
||||
#ifndef TLS_DISABLED
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <exception>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <openssl/objects.h>
|
||||
#include <openssl/bio.h>
|
||||
#include <openssl/err.h>
|
||||
#include <openssl/pem.h>
|
||||
#include <openssl/x509.h>
|
||||
#include <openssl/x509v3.h>
|
||||
#include <openssl/x509_vfy.h>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
#include "flow/FastRef.h"
|
||||
#include "flow/Trace.h"
|
||||
|
||||
std::string TLSPolicy::ErrorString(boost::system::error_code e) {
|
||||
char* str = ERR_error_string(e.value(), NULL);
|
||||
return std::string(str);
|
||||
}
|
||||
|
||||
// To force typeinfo to only be emitted once.
|
||||
|
||||
|
||||
std::string TLSPolicy::toString() const {
|
||||
std::stringstream ss;
|
||||
ss << "TLSPolicy{ Rules=[";
|
||||
for (const auto &r : rules) {
|
||||
ss << " " << r.toString() << ",";
|
||||
}
|
||||
ss << " ] }";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string TLSPolicy::Rule::toString() const {
|
||||
std::stringstream ss;
|
||||
|
||||
ss << "Rule{ verify_cert=" << verify_cert
|
||||
<< ", verify_time=" << verify_time;
|
||||
ss << ", Subject=[";
|
||||
for (const auto& s : subject_criteria) {
|
||||
ss << " { NID=" << s.first << ", Criteria=" << s.second.criteria << "},";
|
||||
}
|
||||
ss << " ], Issuer=[";
|
||||
for (const auto& s : issuer_criteria) {
|
||||
ss << " { NID=" << s.first << ", Criteria=" << s.second.criteria << "},";
|
||||
}
|
||||
ss << " ], Root=[";
|
||||
for (const auto& s : root_criteria) {
|
||||
ss << " { NID=" << s.first << ", Criteria=" << s.second.criteria << "},";
|
||||
}
|
||||
ss << " ] }";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
static int hexValue(char c) {
|
||||
static char const digits[] = "0123456789ABCDEF";
|
||||
|
||||
if (c >= 'a' && c <= 'f')
|
||||
c -= ('a' - 'A');
|
||||
|
||||
int value = std::find(digits, digits + 16, c) - digits;
|
||||
if (value >= 16) {
|
||||
throw std::runtime_error("hexValue");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// Does not handle "raw" form (e.g. #28C4D1), only escaped text
|
||||
static std::string de4514(std::string const& input, int start, int& out_end) {
|
||||
std::string output;
|
||||
|
||||
if(input[start] == '#' || input[start] == ' ') {
|
||||
out_end = start;
|
||||
return output;
|
||||
}
|
||||
|
||||
int space_count = 0;
|
||||
|
||||
for(int p = start; p < input.size();) {
|
||||
switch(input[p]) {
|
||||
case '\\': // Handle escaped sequence
|
||||
|
||||
// Backslash escaping nothing!
|
||||
if(p == input.size() - 1) {
|
||||
out_end = p;
|
||||
goto FIN;
|
||||
}
|
||||
|
||||
switch(input[p+1]) {
|
||||
case ' ':
|
||||
case '"':
|
||||
case '#':
|
||||
case '+':
|
||||
case ',':
|
||||
case ';':
|
||||
case '<':
|
||||
case '=':
|
||||
case '>':
|
||||
case '|':
|
||||
case '\\':
|
||||
output += input[p+1];
|
||||
p += 2;
|
||||
space_count = 0;
|
||||
continue;
|
||||
|
||||
default:
|
||||
// Backslash escaping pair of hex digits requires two characters
|
||||
if(p == input.size() - 2) {
|
||||
out_end = p;
|
||||
goto FIN;
|
||||
}
|
||||
|
||||
try {
|
||||
output += hexValue(input[p+1]) * 16 + hexValue(input[p+2]);
|
||||
p += 3;
|
||||
space_count = 0;
|
||||
continue;
|
||||
} catch( ... ) {
|
||||
out_end = p;
|
||||
goto FIN;
|
||||
}
|
||||
}
|
||||
|
||||
case '"':
|
||||
case '+':
|
||||
case ',':
|
||||
case ';':
|
||||
case '<':
|
||||
case '>':
|
||||
case 0:
|
||||
// All of these must have been escaped
|
||||
out_end = p;
|
||||
goto FIN;
|
||||
|
||||
default:
|
||||
// Character is what it is
|
||||
output += input[p];
|
||||
if(input[p] == ' ')
|
||||
space_count++;
|
||||
else
|
||||
space_count = 0;
|
||||
p++;
|
||||
}
|
||||
}
|
||||
|
||||
out_end = input.size();
|
||||
|
||||
FIN:
|
||||
out_end -= space_count;
|
||||
output.resize(output.size() - space_count);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
static std::pair<std::string, std::string> splitPair(std::string const& input, char c) {
|
||||
int p = input.find_first_of(c);
|
||||
if(p == input.npos) {
|
||||
throw std::runtime_error("splitPair");
|
||||
}
|
||||
return std::make_pair(input.substr(0, p), input.substr(p+1, input.size()));
|
||||
}
|
||||
|
||||
static NID abbrevToNID(std::string const& sn) {
|
||||
NID nid = NID_undef;
|
||||
|
||||
if (sn == "C" || sn == "CN" || sn == "L" || sn == "ST" || sn == "O" || sn == "OU" || sn == "UID" || sn == "DC" || sn == "subjectAltName")
|
||||
nid = OBJ_sn2nid(sn.c_str());
|
||||
if (nid == NID_undef)
|
||||
throw std::runtime_error("abbrevToNID");
|
||||
|
||||
return nid;
|
||||
}
|
||||
|
||||
static X509Location locationForNID(NID nid) {
|
||||
const char* name = OBJ_nid2ln(nid);
|
||||
if (name == NULL) {
|
||||
throw std::runtime_error("locationForNID");
|
||||
}
|
||||
if (strncmp(name, "X509v3", 6) == 0) {
|
||||
return X509Location::EXTENSION;
|
||||
} else {
|
||||
// It probably isn't true that all other NIDs live in the NAME, but it is for now...
|
||||
return X509Location::NAME;
|
||||
}
|
||||
}
|
||||
|
||||
bool TLSPolicy::set_verify_peers(std::vector<std::string> verify_peers) {
|
||||
for (int i = 0; i < verify_peers.size(); i++) {
|
||||
try {
|
||||
std::string& verifyString = verify_peers[i];
|
||||
int start = 0;
|
||||
while(start < verifyString.size()) {
|
||||
int split = verifyString.find('|', start);
|
||||
if(split == std::string::npos) {
|
||||
break;
|
||||
}
|
||||
if(split == start || verifyString[split-1] != '\\') {
|
||||
rules.emplace_back(verifyString.substr(start,split-start));
|
||||
start = split+1;
|
||||
}
|
||||
}
|
||||
rules.emplace_back(verifyString.substr(start));
|
||||
} catch ( const std::runtime_error& e ) {
|
||||
rules.clear();
|
||||
std::string& verifyString = verify_peers[i];
|
||||
TraceEvent(SevError, "FDBLibTLSVerifyPeersParseError").detail("Config", verifyString);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TLSPolicy::Rule::Rule(std::string input) {
|
||||
int s = 0;
|
||||
|
||||
while (s < input.size()) {
|
||||
int eq = input.find('=', s);
|
||||
|
||||
if (eq == input.npos)
|
||||
throw std::runtime_error("parse_verify");
|
||||
|
||||
MatchType mt = MatchType::EXACT;
|
||||
if (input[eq-1] == '>') mt = MatchType::PREFIX;
|
||||
if (input[eq-1] == '<') mt = MatchType::SUFFIX;
|
||||
std::string term = input.substr(s, eq - s - (mt == MatchType::EXACT ? 0 : 1));
|
||||
|
||||
if (term.find("Check.") == 0) {
|
||||
if (eq + 2 > input.size())
|
||||
throw std::runtime_error("parse_verify");
|
||||
if (eq + 2 != input.size() && input[eq + 2] != ',')
|
||||
throw std::runtime_error("parse_verify");
|
||||
if (mt != MatchType::EXACT)
|
||||
throw std::runtime_error("parse_verify: cannot prefix match Check");
|
||||
|
||||
bool* flag;
|
||||
|
||||
if (term == "Check.Valid")
|
||||
flag = &verify_cert;
|
||||
else if (term == "Check.Unexpired")
|
||||
flag = &verify_time;
|
||||
else
|
||||
throw std::runtime_error("parse_verify");
|
||||
|
||||
if (input[eq + 1] == '0')
|
||||
*flag = false;
|
||||
else if (input[eq + 1] == '1')
|
||||
*flag = true;
|
||||
else
|
||||
throw std::runtime_error("parse_verify");
|
||||
|
||||
s = eq + 3;
|
||||
} else {
|
||||
std::map< int, Criteria >* criteria = &subject_criteria;
|
||||
|
||||
if (term.find('.') != term.npos) {
|
||||
auto scoped = splitPair(term, '.');
|
||||
|
||||
if (scoped.first == "S" || scoped.first == "Subject")
|
||||
criteria = &subject_criteria;
|
||||
else if (scoped.first == "I" || scoped.first == "Issuer")
|
||||
criteria = &issuer_criteria;
|
||||
else if (scoped.first == "R" || scoped.first == "Root")
|
||||
criteria = &root_criteria;
|
||||
else
|
||||
throw std::runtime_error("parse_verify");
|
||||
|
||||
term = scoped.second;
|
||||
}
|
||||
|
||||
int remain;
|
||||
auto unesc = de4514(input, eq + 1, remain);
|
||||
|
||||
if (remain == eq + 1)
|
||||
throw std::runtime_error("parse_verify");
|
||||
|
||||
NID termNID = abbrevToNID(term);
|
||||
const X509Location loc = locationForNID(termNID);
|
||||
criteria->insert(std::make_pair(termNID, Criteria(unesc, mt, loc)));
|
||||
|
||||
if (remain != input.size() && input[remain] != ',')
|
||||
throw std::runtime_error("parse_verify");
|
||||
|
||||
s = remain + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool match_criteria_entry(const std::string& criteria, ASN1_STRING* entry, MatchType mt) {
|
||||
bool rc = false;
|
||||
ASN1_STRING* asn_criteria = NULL;
|
||||
unsigned char* criteria_utf8 = NULL;
|
||||
int criteria_utf8_len = 0;
|
||||
unsigned char* entry_utf8 = NULL;
|
||||
int entry_utf8_len = 0;
|
||||
|
||||
if ((asn_criteria = ASN1_IA5STRING_new()) == NULL)
|
||||
goto err;
|
||||
if (ASN1_STRING_set(asn_criteria, criteria.c_str(), criteria.size()) != 1)
|
||||
goto err;
|
||||
if ((criteria_utf8_len = ASN1_STRING_to_UTF8(&criteria_utf8, asn_criteria)) < 1)
|
||||
goto err;
|
||||
if ((entry_utf8_len = ASN1_STRING_to_UTF8(&entry_utf8, entry)) < 1)
|
||||
goto err;
|
||||
if (mt == MatchType::EXACT) {
|
||||
if (criteria_utf8_len == entry_utf8_len &&
|
||||
memcmp(criteria_utf8, entry_utf8, criteria_utf8_len) == 0)
|
||||
rc = true;
|
||||
} else if (mt == MatchType::PREFIX) {
|
||||
if (criteria_utf8_len <= entry_utf8_len &&
|
||||
memcmp(criteria_utf8, entry_utf8, criteria_utf8_len) == 0)
|
||||
rc = true;
|
||||
} else if (mt == MatchType::SUFFIX) {
|
||||
if (criteria_utf8_len <= entry_utf8_len &&
|
||||
memcmp(criteria_utf8, entry_utf8 + (entry_utf8_len - criteria_utf8_len), criteria_utf8_len) == 0)
|
||||
rc = true;
|
||||
}
|
||||
|
||||
err:
|
||||
ASN1_STRING_free(asn_criteria);
|
||||
free(criteria_utf8);
|
||||
free(entry_utf8);
|
||||
return rc;
|
||||
}
|
||||
|
||||
bool match_name_criteria(X509_NAME *name, NID nid, const std::string& criteria, MatchType mt) {
|
||||
X509_NAME_ENTRY *name_entry;
|
||||
int idx;
|
||||
|
||||
// If name does not exist, or has multiple of this RDN, refuse to proceed.
|
||||
if ((idx = X509_NAME_get_index_by_NID(name, nid, -1)) < 0)
|
||||
return false;
|
||||
if (X509_NAME_get_index_by_NID(name, nid, idx) != -1)
|
||||
return false;
|
||||
if ((name_entry = X509_NAME_get_entry(name, idx)) == NULL)
|
||||
return false;
|
||||
|
||||
return match_criteria_entry(criteria, X509_NAME_ENTRY_get_data(name_entry), mt);
|
||||
}
|
||||
|
||||
bool match_extension_criteria(X509 *cert, NID nid, const std::string& value, MatchType mt) {
|
||||
if (nid != NID_subject_alt_name && nid != NID_issuer_alt_name) {
|
||||
// I have no idea how other extensions work.
|
||||
return false;
|
||||
}
|
||||
auto pos = value.find(':');
|
||||
if (pos == value.npos) {
|
||||
return false;
|
||||
}
|
||||
std::string value_gen = value.substr(0, pos);
|
||||
std::string value_val = value.substr(pos+1, value.npos);
|
||||
STACK_OF(GENERAL_NAME)* sans = reinterpret_cast<STACK_OF(GENERAL_NAME)*>(X509_get_ext_d2i(cert, nid, NULL, NULL));
|
||||
if (sans == NULL) {
|
||||
return false;
|
||||
}
|
||||
int num_sans = sk_GENERAL_NAME_num( sans );
|
||||
bool rc = false;
|
||||
for( int i = 0; i < num_sans && !rc; ++i ) {
|
||||
GENERAL_NAME* altname = sk_GENERAL_NAME_value( sans, i );
|
||||
std::string matchable;
|
||||
switch (altname->type) {
|
||||
case GEN_OTHERNAME:
|
||||
break;
|
||||
case GEN_EMAIL:
|
||||
if (value_gen == "EMAIL" &&
|
||||
match_criteria_entry( value_val, altname->d.rfc822Name, mt)) {
|
||||
rc = true;
|
||||
break;
|
||||
}
|
||||
case GEN_DNS:
|
||||
if (value_gen == "DNS" &&
|
||||
match_criteria_entry( value_val, altname->d.dNSName, mt )) {
|
||||
rc = true;
|
||||
break;
|
||||
}
|
||||
case GEN_X400:
|
||||
case GEN_DIRNAME:
|
||||
case GEN_EDIPARTY:
|
||||
break;
|
||||
case GEN_URI:
|
||||
if (value_gen == "URI" &&
|
||||
match_criteria_entry( value_val, altname->d.uniformResourceIdentifier, mt )) {
|
||||
rc = true;
|
||||
break;
|
||||
}
|
||||
case GEN_IPADD:
|
||||
if (value_gen == "IP" &&
|
||||
match_criteria_entry( value_val, altname->d.iPAddress, mt )) {
|
||||
rc = true;
|
||||
break;
|
||||
}
|
||||
case GEN_RID:
|
||||
break;
|
||||
}
|
||||
}
|
||||
sk_GENERAL_NAME_pop_free(sans, GENERAL_NAME_free);
|
||||
return rc;
|
||||
}
|
||||
|
||||
bool match_criteria(X509* cert, X509_NAME* subject, NID nid, const std::string& criteria, MatchType mt, X509Location loc) {
|
||||
switch(loc) {
|
||||
case X509Location::NAME: {
|
||||
return match_name_criteria(subject, nid, criteria, mt);
|
||||
}
|
||||
case X509Location::EXTENSION: {
|
||||
return match_extension_criteria(cert, nid, criteria, mt);
|
||||
}
|
||||
}
|
||||
// Should never be reachable.
|
||||
return false;
|
||||
}
|
||||
|
||||
std::tuple<bool,std::string> check_verify(const TLSPolicy::Rule* verify, X509_STORE_CTX* store_ctx, bool is_client) {
|
||||
X509_NAME *subject, *issuer;
|
||||
bool rc = false;
|
||||
X509* cert = NULL;
|
||||
// if returning false, give a reason string
|
||||
std::string reason = "";
|
||||
|
||||
// Check subject criteria.
|
||||
cert = sk_X509_value(X509_STORE_CTX_get0_chain(store_ctx), 0);
|
||||
if ((subject = X509_get_subject_name(cert)) == NULL) {
|
||||
reason = "Cert subject error";
|
||||
goto err;
|
||||
}
|
||||
for (auto &pair: verify->subject_criteria) {
|
||||
if (!match_criteria(cert, subject, pair.first, pair.second.criteria, pair.second.match_type, pair.second.location)) {
|
||||
reason = "Cert subject match failure";
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
// Check issuer criteria.
|
||||
if ((issuer = X509_get_issuer_name(cert)) == NULL) {
|
||||
reason = "Cert issuer error";
|
||||
goto err;
|
||||
}
|
||||
for (auto &pair: verify->issuer_criteria) {
|
||||
if (!match_criteria(cert, issuer, pair.first, pair.second.criteria, pair.second.match_type, pair.second.location)) {
|
||||
reason = "Cert issuer match failure";
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
// Check root criteria - this is the subject of the final certificate in the stack.
|
||||
cert = sk_X509_value(X509_STORE_CTX_get0_chain(store_ctx), sk_X509_num(X509_STORE_CTX_get0_chain(store_ctx)) - 1);
|
||||
if ((subject = X509_get_subject_name(cert)) == NULL) {
|
||||
reason = "Root subject error";
|
||||
goto err;
|
||||
}
|
||||
for (auto &pair: verify->root_criteria) {
|
||||
if (!match_criteria(cert, subject, pair.first, pair.second.criteria, pair.second.match_type, pair.second.location)) {
|
||||
reason = "Root subject match failure";
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
// If we got this far, everything checked out...
|
||||
rc = true;
|
||||
|
||||
err:
|
||||
return std::make_tuple(rc, reason);
|
||||
}
|
||||
|
||||
bool TLSPolicy::verify_peer(bool preverified, X509_STORE_CTX* store_ctx) {
|
||||
bool rc = false;
|
||||
std::set<std::string> verify_failure_reasons;
|
||||
bool verify_success;
|
||||
std::string verify_failure_reason;
|
||||
|
||||
// If certificate verification is disabled, there's nothing more to do.
|
||||
if (std::any_of(rules.begin(), rules.end(), [](const Rule& r){ return !r.verify_cert; })) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if(!preverified) {
|
||||
TraceEvent("TLSPolicyFailure").suppressFor(1.0).detail("Reason", "preverification failed").detail("VerifyError", X509_verify_cert_error_string(X509_STORE_CTX_get_error(store_ctx)));
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!rules.size()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Any matching rule is sufficient.
|
||||
for (auto &verify_rule: rules) {
|
||||
std::tie(verify_success, verify_failure_reason) = check_verify(&verify_rule, store_ctx, is_client);
|
||||
if (verify_success) {
|
||||
rc = true;
|
||||
break;
|
||||
} else {
|
||||
if (verify_failure_reason.length() > 0)
|
||||
verify_failure_reasons.insert(verify_failure_reason);
|
||||
}
|
||||
}
|
||||
|
||||
if (!rc) {
|
||||
// log the various failure reasons
|
||||
for (std::string reason : verify_failure_reasons) {
|
||||
TraceEvent("TLSPolicyFailure").suppressFor(1.0).detail("Reason", reason);
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,145 @@
|
|||
/*
|
||||
* TLSPolicy.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _FLOW_TLSPOLICY_H_
|
||||
#define _FLOW_TLSPOLICY_H_
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <boost/system/system_error.hpp>
|
||||
#include "flow/FastRef.h"
|
||||
|
||||
#ifndef TLS_DISABLED
|
||||
|
||||
#include <openssl/x509.h>
|
||||
typedef int NID;
|
||||
|
||||
enum class MatchType {
|
||||
EXACT,
|
||||
PREFIX,
|
||||
SUFFIX,
|
||||
};
|
||||
|
||||
enum class X509Location {
|
||||
// This NID is located within a X509_NAME
|
||||
NAME,
|
||||
// This NID is an X509 extension, and should be parsed accordingly
|
||||
EXTENSION,
|
||||
};
|
||||
|
||||
struct Criteria {
|
||||
Criteria( const std::string& s )
|
||||
: criteria(s), match_type(MatchType::EXACT), location(X509Location::NAME) {}
|
||||
Criteria( const std::string& s, MatchType mt )
|
||||
: criteria(s), match_type(mt), location(X509Location::NAME) {}
|
||||
Criteria( const std::string& s, X509Location loc)
|
||||
: criteria(s), match_type(MatchType::EXACT), location(loc) {}
|
||||
Criteria( const std::string& s, MatchType mt, X509Location loc)
|
||||
: criteria(s), match_type(mt), location(loc) {}
|
||||
|
||||
std::string criteria;
|
||||
MatchType match_type;
|
||||
X509Location location;
|
||||
|
||||
bool operator==(const Criteria& c) const {
|
||||
return criteria == c.criteria && match_type == c.match_type && location == c.location;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
struct TLSParams {
|
||||
enum { OPT_TLS = 100000, OPT_TLS_PLUGIN, OPT_TLS_CERTIFICATES, OPT_TLS_KEY, OPT_TLS_VERIFY_PEERS, OPT_TLS_CA_FILE, OPT_TLS_PASSWORD };
|
||||
|
||||
std::string tlsCertPath, tlsKeyPath, tlsCAPath, tlsPassword;
|
||||
std::string tlsCertBytes, tlsKeyBytes, tlsCABytes;
|
||||
};
|
||||
|
||||
class TLSPolicy : ReferenceCounted<TLSPolicy> {
|
||||
public:
|
||||
enum class Is {
|
||||
CLIENT,
|
||||
SERVER
|
||||
};
|
||||
|
||||
TLSPolicy(Is client) : is_client(client == Is::CLIENT) {}
|
||||
virtual ~TLSPolicy();
|
||||
|
||||
virtual void addref() { ReferenceCounted<TLSPolicy>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<TLSPolicy>::delref(); }
|
||||
|
||||
#ifndef TLS_DISABLED
|
||||
static std::string ErrorString(boost::system::error_code e);
|
||||
|
||||
bool set_verify_peers(std::vector<std::string> verify_peers);
|
||||
bool verify_peer(bool preverified, X509_STORE_CTX* store_ctx);
|
||||
|
||||
std::string toString() const;
|
||||
|
||||
struct Rule {
|
||||
explicit Rule(std::string input);
|
||||
|
||||
std::string toString() const;
|
||||
|
||||
std::map< NID, Criteria > subject_criteria;
|
||||
std::map< NID, Criteria > issuer_criteria;
|
||||
std::map< NID, Criteria > root_criteria;
|
||||
|
||||
bool verify_cert = true;
|
||||
bool verify_time = true;
|
||||
};
|
||||
|
||||
std::vector<Rule> rules;
|
||||
#endif
|
||||
bool is_client;
|
||||
};
|
||||
|
||||
#define TLS_PLUGIN_FLAG "--tls_plugin"
|
||||
#define TLS_CERTIFICATE_FILE_FLAG "--tls_certificate_file"
|
||||
#define TLS_KEY_FILE_FLAG "--tls_key_file"
|
||||
#define TLS_VERIFY_PEERS_FLAG "--tls_verify_peers"
|
||||
#define TLS_CA_FILE_FLAG "--tls_ca_file"
|
||||
#define TLS_PASSWORD_FLAG "--tls_password"
|
||||
|
||||
#define TLS_OPTION_FLAGS \
|
||||
{ TLSParams::OPT_TLS_PLUGIN, TLS_PLUGIN_FLAG, SO_REQ_SEP }, \
|
||||
{ TLSParams::OPT_TLS_CERTIFICATES, TLS_CERTIFICATE_FILE_FLAG, SO_REQ_SEP }, \
|
||||
{ TLSParams::OPT_TLS_KEY, TLS_KEY_FILE_FLAG, SO_REQ_SEP }, \
|
||||
{ TLSParams::OPT_TLS_VERIFY_PEERS, TLS_VERIFY_PEERS_FLAG, SO_REQ_SEP }, \
|
||||
{ TLSParams::OPT_TLS_PASSWORD, TLS_PASSWORD_FLAG, SO_REQ_SEP }, \
|
||||
{ TLSParams::OPT_TLS_CA_FILE, TLS_CA_FILE_FLAG, SO_REQ_SEP },
|
||||
|
||||
#define TLS_HELP \
|
||||
" " TLS_CERTIFICATE_FILE_FLAG " CERTFILE\n" \
|
||||
" The path of a file containing the TLS certificate and CA\n" \
|
||||
" chain.\n" \
|
||||
" " TLS_CA_FILE_FLAG " CERTAUTHFILE\n" \
|
||||
" The path of a file containing the CA certificates chain.\n" \
|
||||
" " TLS_KEY_FILE_FLAG " KEYFILE\n" \
|
||||
" The path of a file containing the private key corresponding\n" \
|
||||
" to the TLS certificate.\n" \
|
||||
" " TLS_PASSWORD_FLAG " PASSCODE\n" \
|
||||
" The passphrase of encrypted private key\n" \
|
||||
" " TLS_VERIFY_PEERS_FLAG " CONSTRAINTS\n" \
|
||||
" The constraints by which to validate TLS peers. The contents\n" \
|
||||
" and format of CONSTRAINTS are plugin-specific.\n"
|
||||
|
||||
#endif
|
|
@ -684,6 +684,50 @@ void removeTraceRole(std::string role) {
|
|||
g_traceLog.removeRole(role);
|
||||
}
|
||||
|
||||
TraceEvent::TraceEvent() : initialized(true), enabled(false), logged(true) {}
|
||||
|
||||
TraceEvent::TraceEvent(TraceEvent &&ev) {
|
||||
enabled = ev.enabled;
|
||||
err = ev.err;
|
||||
fields = std::move(ev.fields);
|
||||
id = ev.id;
|
||||
initialized = ev.initialized;
|
||||
logged = ev.logged;
|
||||
maxEventLength = ev.maxEventLength;
|
||||
maxFieldLength = ev.maxFieldLength;
|
||||
severity = ev.severity;
|
||||
tmpEventMetric = ev.tmpEventMetric;
|
||||
trackingKey = ev.trackingKey;
|
||||
type = ev.type;
|
||||
|
||||
ev.initialized = true;
|
||||
ev.enabled = false;
|
||||
ev.logged = true;
|
||||
ev.tmpEventMetric = nullptr;
|
||||
}
|
||||
|
||||
TraceEvent& TraceEvent::operator=(TraceEvent &&ev) {
|
||||
enabled = ev.enabled;
|
||||
err = ev.err;
|
||||
fields = std::move(ev.fields);
|
||||
id = ev.id;
|
||||
initialized = ev.initialized;
|
||||
logged = ev.logged;
|
||||
maxEventLength = ev.maxEventLength;
|
||||
maxFieldLength = ev.maxFieldLength;
|
||||
severity = ev.severity;
|
||||
tmpEventMetric = ev.tmpEventMetric;
|
||||
trackingKey = ev.trackingKey;
|
||||
type = ev.type;
|
||||
|
||||
ev.initialized = true;
|
||||
ev.enabled = false;
|
||||
ev.logged = true;
|
||||
ev.tmpEventMetric = nullptr;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
TraceEvent::TraceEvent( const char* type, UID id ) : id(id), type(type), severity(SevInfo), initialized(false), enabled(true), logged(false) {
|
||||
g_trace_depth++;
|
||||
setMaxFieldLength(0);
|
||||
|
@ -760,7 +804,9 @@ bool TraceEvent::init() {
|
|||
}
|
||||
|
||||
detail("Severity", int(severity));
|
||||
detailf("Time", "%.6f", getCurrentTime());
|
||||
detail("Time", "0.000000");
|
||||
timeIndex = fields.size() - 1;
|
||||
|
||||
detail("Type", type);
|
||||
if(g_network && g_network->isSimulated()) {
|
||||
NetworkAddress local = g_network->getLocalAddress();
|
||||
|
@ -968,6 +1014,8 @@ void TraceEvent::log() {
|
|||
init();
|
||||
try {
|
||||
if (enabled) {
|
||||
fields.mutate(timeIndex).second = format("%.6f", TraceEvent::getCurrentTime());
|
||||
|
||||
if (this->severity == SevError) {
|
||||
severity = SevInfo;
|
||||
backtrace();
|
||||
|
@ -1181,6 +1229,10 @@ std::string TraceEventFields::getValue(std::string key) const {
|
|||
}
|
||||
}
|
||||
|
||||
TraceEventFields::Field& TraceEventFields::mutate(int index) {
|
||||
return fields.at(index);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void parseNumericValue(std::string const& s, double &outValue, bool permissive = false) {
|
||||
double d = 0;
|
||||
|
@ -1306,6 +1358,9 @@ void TraceEventFields::validateFormat() const {
|
|||
}
|
||||
|
||||
std::string traceableStringToString(const char* value, size_t S) {
|
||||
ASSERT_WE_THINK(S > 0 && value[S - 1] == '\0');
|
||||
if(g_network) {
|
||||
ASSERT_WE_THINK(S > 0 && value[S - 1] == '\0');
|
||||
}
|
||||
|
||||
return std::string(value, S - 1); // Exclude trailing \0 byte
|
||||
}
|
||||
|
|
|
@ -81,6 +81,8 @@ public:
|
|||
int64_t getInt64(std::string key, bool permissive=false) const;
|
||||
double getDouble(std::string key, bool permissive=false) const;
|
||||
|
||||
Field &mutate(int index);
|
||||
|
||||
std::string toString() const;
|
||||
void validateFormat() const;
|
||||
template<class Archiver>
|
||||
|
@ -374,11 +376,15 @@ struct SpecialTraceMetricType
|
|||
TRACE_METRIC_TYPE(double, double);
|
||||
|
||||
struct TraceEvent {
|
||||
TraceEvent();
|
||||
TraceEvent( const char* type, UID id = UID() ); // Assumes SevInfo severity
|
||||
TraceEvent( Severity, const char* type, UID id = UID() );
|
||||
TraceEvent( struct TraceInterval&, UID id = UID() );
|
||||
TraceEvent( Severity severity, struct TraceInterval& interval, UID id = UID() );
|
||||
|
||||
TraceEvent( TraceEvent &&ev );
|
||||
TraceEvent& operator=( TraceEvent &&ev );
|
||||
|
||||
static void setNetworkThread();
|
||||
static bool isNetworkThread();
|
||||
|
||||
|
@ -490,6 +496,7 @@ private:
|
|||
|
||||
int maxFieldLength;
|
||||
int maxEventLength;
|
||||
int timeIndex;
|
||||
|
||||
void setSizeLimits();
|
||||
|
||||
|
|
|
@ -51,6 +51,7 @@
|
|||
<ClCompile Include="version.cpp" />
|
||||
<ClCompile Include="SignalSafeUnwind.cpp" />
|
||||
<ClCompile Include="serialize.cpp" />
|
||||
<ClCompile Include="TLSPolicy.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="CompressedInt.h" />
|
||||
|
@ -95,6 +96,7 @@
|
|||
<ClInclude Include="Platform.h" />
|
||||
<ClInclude Include="ThreadSafeQueue.h" />
|
||||
<ClInclude Include="Trace.h" />
|
||||
<ClInclude Include="TLSPolicy.h" />
|
||||
<ClInclude Include="SignalSafeUnwind.h" />
|
||||
<ClInclude Include="UnitTest.h" />
|
||||
<ActorCompiler Include="ThreadHelper.actor.h">
|
||||
|
|
|
@ -129,3 +129,12 @@ ACTOR Future<Void> returnIfTrue( Future<bool> f )
|
|||
wait( Never() );
|
||||
throw internal_error();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> lowPriorityDelay( double waitTime ) {
|
||||
state int loopCount = 0;
|
||||
while(loopCount < FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT) {
|
||||
wait(delay(waitTime/FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT, TaskPriority::Low));
|
||||
loopCount++;
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include "flow/flow.h"
|
||||
#include "flow/Knobs.h"
|
||||
#include "flow/Util.h"
|
||||
#include "flow/IndexedSet.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
#pragma warning( disable: 4355 ) // 'this' : used in base member initializer list
|
||||
|
||||
|
@ -813,6 +814,7 @@ Future<Void> anyTrue( std::vector<Reference<AsyncVar<bool>>> const& input, Refer
|
|||
Future<Void> cancelOnly( std::vector<Future<Void>> const& futures );
|
||||
Future<Void> timeoutWarningCollector( FutureStream<Void> const& input, double const& logDelay, const char* const& context, UID const& id );
|
||||
Future<bool> quorumEqualsTrue( std::vector<Future<bool>> const& futures, int const& required );
|
||||
Future<Void> lowPriorityDelay( double const& waitTime );
|
||||
|
||||
ACTOR template <class T>
|
||||
Future<Void> streamHelper( PromiseStream<T> output, PromiseStream<Error> errors, Future<T> input ) {
|
||||
|
@ -1297,6 +1299,110 @@ private:
|
|||
}
|
||||
};
|
||||
|
||||
struct NotifiedInt {
|
||||
NotifiedInt( int64_t val = 0 ) : val(val) {}
|
||||
|
||||
Future<Void> whenAtLeast( int64_t limit ) {
|
||||
if (val >= limit)
|
||||
return Void();
|
||||
Promise<Void> p;
|
||||
waiting.push( std::make_pair(limit,p) );
|
||||
return p.getFuture();
|
||||
}
|
||||
|
||||
int64_t get() const { return val; }
|
||||
|
||||
void set( int64_t v ) {
|
||||
ASSERT( v >= val );
|
||||
if (v != val) {
|
||||
val = v;
|
||||
|
||||
std::vector<Promise<Void>> toSend;
|
||||
while ( waiting.size() && v >= waiting.top().first ) {
|
||||
Promise<Void> p = std::move(waiting.top().second);
|
||||
waiting.pop();
|
||||
toSend.push_back(p);
|
||||
}
|
||||
for(auto& p : toSend) {
|
||||
p.send(Void());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void operator=( int64_t v ) {
|
||||
set( v );
|
||||
}
|
||||
|
||||
NotifiedInt(NotifiedInt&& r) BOOST_NOEXCEPT : waiting(std::move(r.waiting)), val(r.val) {}
|
||||
void operator=(NotifiedInt&& r) BOOST_NOEXCEPT { waiting = std::move(r.waiting); val = r.val; }
|
||||
|
||||
private:
|
||||
typedef std::pair<int64_t,Promise<Void>> Item;
|
||||
struct ItemCompare {
|
||||
bool operator()(const Item& a, const Item& b) { return a.first > b.first; }
|
||||
};
|
||||
std::priority_queue<Item, std::vector<Item>, ItemCompare> waiting;
|
||||
int64_t val;
|
||||
};
|
||||
|
||||
struct BoundedFlowLock : NonCopyable, public ReferenceCounted<BoundedFlowLock> {
|
||||
// BoundedFlowLock is different from a FlowLock in that it has a bound on how many locks can be taken from the oldest outstanding lock.
|
||||
// For instance, with a FlowLock that has two permits, if one permit is taken but never released, the other permit can be reused an unlimited
|
||||
// amount of times, but with a BoundedFlowLock, it can only be reused a fixed number of times.
|
||||
|
||||
struct Releaser : NonCopyable {
|
||||
BoundedFlowLock* lock;
|
||||
int64_t permitNumber;
|
||||
Releaser() : lock(nullptr), permitNumber(0) {}
|
||||
Releaser( BoundedFlowLock* lock, int64_t permitNumber ) : lock(lock), permitNumber(permitNumber) {}
|
||||
Releaser(Releaser&& r) BOOST_NOEXCEPT : lock(r.lock), permitNumber(r.permitNumber) { r.permitNumber = 0; }
|
||||
void operator=(Releaser&& r) { if (permitNumber) lock->release(permitNumber); lock = r.lock; permitNumber = r.permitNumber; r.permitNumber = 0; }
|
||||
|
||||
void release() {
|
||||
if (permitNumber) {
|
||||
lock->release(permitNumber);
|
||||
}
|
||||
permitNumber = 0;
|
||||
}
|
||||
|
||||
~Releaser() { if (permitNumber) lock->release(permitNumber); }
|
||||
};
|
||||
|
||||
BoundedFlowLock() : unrestrictedPermits(1), boundedPermits(0), nextPermitNumber(0), minOutstanding(0) {}
|
||||
explicit BoundedFlowLock(int64_t unrestrictedPermits, int64_t boundedPermits) : unrestrictedPermits(unrestrictedPermits), boundedPermits(boundedPermits), nextPermitNumber(0), minOutstanding(0) {}
|
||||
|
||||
Future<int64_t> take() {
|
||||
return takeActor(this);
|
||||
}
|
||||
void release( int64_t permitNumber ) {
|
||||
outstanding.erase(permitNumber);
|
||||
updateMinOutstanding();
|
||||
}
|
||||
private:
|
||||
IndexedSet<int64_t, int64_t> outstanding;
|
||||
NotifiedInt minOutstanding;
|
||||
int64_t nextPermitNumber;
|
||||
const int64_t unrestrictedPermits;
|
||||
const int64_t boundedPermits;
|
||||
|
||||
void updateMinOutstanding() {
|
||||
auto it = outstanding.index(unrestrictedPermits-1);
|
||||
if(it == outstanding.end()) {
|
||||
minOutstanding.set(nextPermitNumber);
|
||||
} else {
|
||||
minOutstanding.set(*it);
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<int64_t> takeActor(BoundedFlowLock* lock) {
|
||||
state int64_t permitNumber = ++lock->nextPermitNumber;
|
||||
lock->outstanding.insert(permitNumber, 1);
|
||||
lock->updateMinOutstanding();
|
||||
wait( lock->minOutstanding.whenAtLeast(std::max<int64_t>(0, permitNumber - lock->boundedPermits)) );
|
||||
return permitNumber;
|
||||
}
|
||||
};
|
||||
|
||||
ACTOR template <class T>
|
||||
Future<Void> yieldPromiseStream( FutureStream<T> input, PromiseStream<T> output, TaskPriority taskID = TaskPriority::DefaultYield ) {
|
||||
loop {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue