Merge remote-tracking branch 'upstream/master' into fdbcli-hints

This commit is contained in:
Alex Miller 2020-02-24 16:02:30 -08:00
commit 9545091899
106 changed files with 2582 additions and 1773 deletions

View File

@ -179,9 +179,6 @@ set(SEED "0x${SEED_}" CACHE STRING "Random seed for testing")
################################################################################ ################################################################################
include(CompileBoost) include(CompileBoost)
if(WITH_TLS)
add_subdirectory(FDBLibTLS)
endif()
add_subdirectory(flow) add_subdirectory(flow)
add_subdirectory(fdbrpc) add_subdirectory(fdbrpc)
add_subdirectory(fdbclient) add_subdirectory(fdbclient)

View File

@ -51,7 +51,7 @@ ifeq ($(PLATFORM),Linux)
CXXFLAGS += -std=c++17 CXXFLAGS += -std=c++17
BOOST_BASEDIR ?= /opt BOOST_BASEDIR ?= /opt
TLS_LIBDIR ?= /usr/local/lib TLS_LIBDIR ?= /usr/local/lib64
DLEXT := so DLEXT := so
java_DLEXT := so java_DLEXT := so
TARGET_LIBC_VERSION ?= 2.11 TARGET_LIBC_VERSION ?= 2.11
@ -67,7 +67,7 @@ else ifeq ($(PLATFORM),Darwin)
.LIBPATTERNS := lib%.dylib lib%.a .LIBPATTERNS := lib%.dylib lib%.a
BOOST_BASEDIR ?= ${HOME} BOOST_BASEDIR ?= ${HOME}
TLS_LIBDIR ?= /usr/local/lib TLS_LIBDIR ?= /usr/local/lib64
DLEXT := dylib DLEXT := dylib
java_DLEXT := jnilib java_DLEXT := jnilib
else else
@ -112,8 +112,8 @@ CFLAGS += -DTLS_DISABLED
FDB_TLS_LIB := FDB_TLS_LIB :=
TLS_LIBS := TLS_LIBS :=
else else
FDB_TLS_LIB := lib/libFDBLibTLS.a FDB_TLS_LIB :=
TLS_LIBS += $(addprefix $(TLS_LIBDIR)/,libtls.a libssl.a libcrypto.a) TLS_LIBS += $(addprefix $(TLS_LIBDIR)/,libssl.a libcrypto.a)
endif endif
CXXFLAGS += -Wno-deprecated -DBOOST_ERROR_CODE_HEADER_ONLY -DBOOST_SYSTEM_NO_DEPRECATED CXXFLAGS += -Wno-deprecated -DBOOST_ERROR_CODE_HEADER_ONLY -DBOOST_SYSTEM_NO_DEPRECATED
@ -126,9 +126,6 @@ VPATH += $(addprefix :,$(filter-out lib,$(patsubst -L%,%,$(filter -L%,$(LDFLAGS)
CS_PROJECTS := flow/actorcompiler flow/coveragetool fdbclient/vexillographer CS_PROJECTS := flow/actorcompiler flow/coveragetool fdbclient/vexillographer
CPP_PROJECTS := flow fdbrpc fdbclient fdbbackup fdbserver fdbcli bindings/c bindings/java fdbmonitor bindings/flow/tester bindings/flow CPP_PROJECTS := flow fdbrpc fdbclient fdbbackup fdbserver fdbcli bindings/c bindings/java fdbmonitor bindings/flow/tester bindings/flow
ifndef TLS_DISABLED
CPP_PROJECTS += FDBLibTLS
endif
OTHER_PROJECTS := bindings/python bindings/ruby bindings/go OTHER_PROJECTS := bindings/python bindings/ruby bindings/go
CS_MK_GENERATED := $(CS_PROJECTS:=/generated.mk) CS_MK_GENERATED := $(CS_PROJECTS:=/generated.mk)

View File

@ -38,6 +38,21 @@ else()
endif() endif()
add_dependencies(fdb_c fdb_c_generated fdb_c_options) add_dependencies(fdb_c fdb_c_generated fdb_c_options)
target_link_libraries(fdb_c PUBLIC $<BUILD_INTERFACE:fdbclient>) target_link_libraries(fdb_c PUBLIC $<BUILD_INTERFACE:fdbclient>)
if(APPLE)
set(symbols ${CMAKE_CURRENT_BINARY_DIR}/fdb_c.symbols)
add_custom_command(OUTPUT ${symbols}
COMMAND $<TARGET_FILE:Python::Interpreter> ${CMAKE_CURRENT_SOURCE_DIR}/symbolify.py
${CMAKE_CURRENT_SOURCE_DIR}/foundationdb/fdb_c.h
${symbols}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/symbolify.py ${CMAKE_CURRENT_SOURCE_DIR}/foundationdb/fdb_c.h
COMMENT "Generate exported_symbols_list")
add_custom_target(exported_symbols_list DEPENDS ${symbols})
add_dependencies(fdb_c exported_symbols_list)
target_link_options(fdb_c PRIVATE "LINKER:-no_weak_exports,-exported_symbols_list,${symbols}")
elseif(WIN32)
else()
target_link_options(fdb_c PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/fdb_c.map,-z,nodelete")
endif()
target_include_directories(fdb_c PUBLIC target_include_directories(fdb_c PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}> $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>

View File

@ -107,7 +107,12 @@ fdb_error_t fdb_network_set_option( FDBNetworkOption option,
} }
fdb_error_t fdb_setup_network_impl() { fdb_error_t fdb_setup_network_impl() {
CATCH_AND_RETURN( API->setupNetwork(); ); CATCH_AND_RETURN(
try {
API->setupNetwork();
} catch (boost::system::system_error& e) {
return error_code_tls_error;
} );
} }
fdb_error_t fdb_setup_network_v13( const char* localAddress ) { fdb_error_t fdb_setup_network_v13( const char* localAddress ) {

10
bindings/c/symbolify.py Normal file
View File

@ -0,0 +1,10 @@
if __name__ == '__main__':
import re
import sys
r = re.compile('DLLEXPORT[^(]*(fdb_[^(]*)[(]')
(fdb_c_h, symbols_file) = sys.argv[1:]
with open(fdb_c_h, 'r') as f:
symbols = sorted(set('_' + m.group(1) for m in r.finditer(f.read())))
with open(symbols_file, 'w') as f:
f.write('\n'.join(symbols))
f.write('\n')

View File

@ -82,7 +82,7 @@ void fdb_flow_test() {
fdb->setupNetwork(); fdb->setupNetwork();
startThread(networkThread, fdb); startThread(networkThread, fdb);
g_network = newNet2( false ); g_network = newNet2(false);
openTraceFile(NetworkAddress(), 1000000, 1000000, "."); openTraceFile(NetworkAddress(), 1000000, 1000000, ".");
systemMonitor(); systemMonitor();

View File

@ -23,6 +23,7 @@
fdb_flow_tester_CFLAGS := -Ibindings/c $(fdbrpc_CFLAGS) fdb_flow_tester_CFLAGS := -Ibindings/c $(fdbrpc_CFLAGS)
fdb_flow_tester_LDFLAGS := -Llib $(fdbrpc_LDFLAGS) -lfdb_c fdb_flow_tester_LDFLAGS := -Llib $(fdbrpc_LDFLAGS) -lfdb_c
fdb_flow_tester_LIBS := lib/libfdb_flow.a lib/libflow.a lib/libfdb_c.$(DLEXT) fdb_flow_tester_LIBS := lib/libfdb_flow.a lib/libflow.a lib/libfdb_c.$(DLEXT)
fdb_flow_tester_STATIC_LIBS := $(TLS_LIBS)
fdb_flow_tester: lib/libfdb_c.$(DLEXT) fdb_flow_tester: lib/libfdb_c.$(DLEXT)
@mkdir -p bindings/flow/bin @mkdir -p bindings/flow/bin

View File

@ -54,7 +54,8 @@ type RangeOptions struct {
// Reverse indicates that the read should be performed in lexicographic // Reverse indicates that the read should be performed in lexicographic
// (false) or reverse lexicographic (true) order. When Reverse is true and // (false) or reverse lexicographic (true) order. When Reverse is true and
// Limit is non-zero, the last Limit key-value pairs in the range are // Limit is non-zero, the last Limit key-value pairs in the range are
// returned. // returned. Reading ranges in reverse is supported natively by the
// database and should have minimal extra cost.
Reverse bool Reverse bool
} }

View File

@ -184,7 +184,9 @@ public interface ReadTransaction extends ReadTransactionContext {
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
* should not limit the number of results. If {@code reverse} is {@code true} rows * should not limit the number of results. If {@code reverse} is {@code true} rows
* will be limited starting at the end of the range. * will be limited starting at the end of the range.
* @param reverse return results starting at the end of the range in reverse order * @param reverse return results starting at the end of the range in reverse order.
* Reading ranges in reverse is supported natively by the database and should
* have minimal extra cost.
* *
* @return a handle to access the results of the asynchronous call * @return a handle to access the results of the asynchronous call
*/ */
@ -205,7 +207,9 @@ public interface ReadTransaction extends ReadTransactionContext {
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
* should not limit the number of results. If {@code reverse} is {@code true} rows * should not limit the number of results. If {@code reverse} is {@code true} rows
* will be limited starting at the end of the range. * will be limited starting at the end of the range.
* @param reverse return results starting at the end of the range in reverse order * @param reverse return results starting at the end of the range in reverse order.
* Reading ranges in reverse is supported natively by the database and should
* have minimal extra cost.
* @param mode provide a hint about how the results are to be used. This * @param mode provide a hint about how the results are to be used. This
* can provide speed improvements or efficiency gains based on the caller's * can provide speed improvements or efficiency gains based on the caller's
* knowledge of the upcoming access pattern. * knowledge of the upcoming access pattern.
@ -272,7 +276,9 @@ public interface ReadTransaction extends ReadTransactionContext {
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
* should not limit the number of results. If {@code reverse} is {@code true} rows * should not limit the number of results. If {@code reverse} is {@code true} rows
* will be limited starting at the end of the range. * will be limited starting at the end of the range.
* @param reverse return results starting at the end of the range in reverse order * @param reverse return results starting at the end of the range in reverse order.
* Reading ranges in reverse is supported natively by the database and should
* have minimal extra cost.
* *
* @return a handle to access the results of the asynchronous call * @return a handle to access the results of the asynchronous call
*/ */
@ -293,7 +299,9 @@ public interface ReadTransaction extends ReadTransactionContext {
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
* should not limit the number of results. If {@code reverse} is {@code true} rows * should not limit the number of results. If {@code reverse} is {@code true} rows
* will be limited starting at the end of the range. * will be limited starting at the end of the range.
* @param reverse return results starting at the end of the range in reverse order * @param reverse return results starting at the end of the range in reverse order.
* Reading ranges in reverse is supported natively by the database and should
* have minimal extra cost.
* @param mode provide a hint about how the results are to be used. This * @param mode provide a hint about how the results are to be used. This
* can provide speed improvements or efficiency gains based on the caller's * can provide speed improvements or efficiency gains based on the caller's
* knowledge of the upcoming access pattern. * knowledge of the upcoming access pattern.
@ -369,7 +377,9 @@ public interface ReadTransaction extends ReadTransactionContext {
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
* should not limit the number of results. If {@code reverse} is {@code true} rows * should not limit the number of results. If {@code reverse} is {@code true} rows
* will be limited starting at the end of the range. * will be limited starting at the end of the range.
* @param reverse return results starting at the end of the range in reverse order * @param reverse return results starting at the end of the range in reverse order.
* Reading ranges in reverse is supported natively by the database and should
* have minimal extra cost.
* *
* @return a handle to access the results of the asynchronous call * @return a handle to access the results of the asynchronous call
*/ */
@ -393,7 +403,9 @@ public interface ReadTransaction extends ReadTransactionContext {
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
* should not limit the number of results. If {@code reverse} is {@code true} rows * should not limit the number of results. If {@code reverse} is {@code true} rows
* will be limited starting at the end of the range. * will be limited starting at the end of the range.
* @param reverse return results starting at the end of the range in reverse order * @param reverse return results starting at the end of the range in reverse order.
* Reading ranges in reverse is supported natively by the database and should
* have minimal extra cost.
* @param mode provide a hint about how the results are to be used. This * @param mode provide a hint about how the results are to be used. This
* can provide speed improvements or efficiency gains based on the caller's * can provide speed improvements or efficiency gains based on the caller's
* knowledge of the upcoming access pattern. * knowledge of the upcoming access pattern.

View File

@ -817,9 +817,9 @@ public class DirectoryLayer implements Directory {
private static long unpackLittleEndian(byte[] bytes) { private static long unpackLittleEndian(byte[] bytes) {
assert bytes.length == 8; assert bytes.length == 8;
int value = 0; long value = 0;
for(int i = 0; i < 8; ++i) { for(int i = 0; i < 8; ++i) {
value += (bytes[i] << (i * 8)); value += (Byte.toUnsignedLong(bytes[i]) << (i * 8));
} }
return value; return value;
} }

View File

@ -1,6 +1,4 @@
FROM centos:6 FROM centos:6
LABEL version=0.1.9
ENV DOCKER_IMAGEVER=0.1.9
# Install dependencies for developer tools, bindings,\ # Install dependencies for developer tools, bindings,\
# documentation, actorcompiler, and packaging tools\ # documentation, actorcompiler, and packaging tools\
@ -8,9 +6,10 @@ RUN yum install -y yum-utils &&\
yum-config-manager --enable rhel-server-rhscl-7-rpms &&\ yum-config-manager --enable rhel-server-rhscl-7-rpms &&\
yum -y install centos-release-scl epel-release &&\ yum -y install centos-release-scl epel-release &&\
yum -y install devtoolset-8-8.1-1.el6 java-1.8.0-openjdk-devel \ yum -y install devtoolset-8-8.1-1.el6 java-1.8.0-openjdk-devel \
devtoolset-8-gcc-8.3.1-3.1.el6 devtoolset-8-gcc-c++-8.3.1-3.1.el6 \
rh-python36-python-devel devtoolset-8-valgrind-devel \ rh-python36-python-devel devtoolset-8-valgrind-devel \
mono-core rh-ruby24 golang python27 rpm-build debbuild \ mono-core rh-ruby24 golang python27 rpm-build debbuild \
python-pip npm dos2unix valgrind-devel ccache distcc devtoolset-8-libubsan-devel libubsan-devel &&\ python-pip dos2unix valgrind-devel ccache distcc devtoolset-8-libubsan-devel libubsan-devel &&\
pip install boto3==1.1.1 pip install boto3==1.1.1
USER root USER root
@ -19,32 +18,42 @@ RUN adduser --comment '' fdb && chown fdb /opt
# wget of bintray without forcing UTF-8 encoding results in 403 Forbidden # wget of bintray without forcing UTF-8 encoding results in 403 Forbidden
RUN cd /opt/ &&\ RUN cd /opt/ &&\
curl -L https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.bz2 > boost_1_67_0.tar.bz2 &&\ curl -L https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.bz2 -o boost_1_67_0.tar.bz2 &&\
echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost_1_67_0.tar.bz2" > boost-sha.txt &&\ echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost_1_67_0.tar.bz2" > boost-sha-67.txt &&\
sha256sum -c boost-sha.txt &&\ sha256sum -c boost-sha-67.txt &&\
tar -xjf boost_1_67_0.tar.bz2 &&\ tar -xjf boost_1_67_0.tar.bz2 &&\
rm -rf boost_1_67_0.tar.bz2 boost-sha.txt boost_1_67_0/libs rm -rf boost_1_67_0.tar.bz2 boost-sha-67.txt boost_1_67_0/libs &&\
curl -L https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.bz2 -o boost_1_72_0.tar.bz2 &&\
echo "59c9b274bc451cf91a9ba1dd2c7fdcaf5d60b1b3aa83f2c9fa143417cc660722 boost_1_72_0.tar.bz2" > boost-sha-72.txt &&\
sha256sum -c boost-sha-72.txt &&\
tar -xjf boost_1_72_0.tar.bz2 &&\
rm -rf boost_1_72_0.tar.bz2 boost-sha-72.txt boost_1_72_0/libs
# install cmake # install cmake
RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz > /tmp/cmake.tar.gz &&\ RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz -o /tmp/cmake.tar.gz &&\
echo "563a39e0a7c7368f81bfa1c3aff8b590a0617cdfe51177ddc808f66cc0866c76 /tmp/cmake.tar.gz" > /tmp/cmake-sha.txt &&\ echo "563a39e0a7c7368f81bfa1c3aff8b590a0617cdfe51177ddc808f66cc0866c76 /tmp/cmake.tar.gz" > /tmp/cmake-sha.txt &&\
sha256sum -c /tmp/cmake-sha.txt &&\ sha256sum -c /tmp/cmake-sha.txt &&\
cd /tmp && tar xf cmake.tar.gz &&\ cd /tmp && tar xf cmake.tar.gz &&\
cp -r cmake-3.13.4-Linux-x86_64/* /usr/local/ &&\ cp -r cmake-3.13.4-Linux-x86_64/* /usr/local/ &&\
rm -rf cmake.tar.gz cmake-3.13.4-Linux-x86_64 cmake-sha.txt rm -rf cmake.tar.gz cmake-3.13.4-Linux-x86_64 cmake-sha.txt
# install LibreSSL # install Ninja
RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip > ninja.zip &&\ RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip -o ninja.zip &&\
unzip ninja.zip && cd ninja-1.9.0 && scl enable devtoolset-8 -- ./configure.py --bootstrap && cp ninja /usr/bin &&\ unzip ninja.zip && cd ninja-1.9.0 && scl enable devtoolset-8 -- ./configure.py --bootstrap && cp ninja /usr/bin &&\
cd .. && rm -rf ninja-1.9.0 ninja.zip &&\ cd .. && rm -rf ninja-1.9.0 ninja.zip
curl -L https://ftp.openbsd.org/pub/OpenBSD/LibreSSL/libressl-2.8.2.tar.gz > /tmp/libressl.tar.gz &&\
cd /tmp && echo "b8cb31e59f1294557bfc80f2a662969bc064e83006ceef0574e2553a1c254fd5 libressl.tar.gz" > libressl-sha.txt &&\
sha256sum -c libressl-sha.txt && tar xf libressl.tar.gz &&\
cd libressl-2.8.2 && cd /tmp/libressl-2.8.2 && scl enable devtoolset-8 -- ./configure --prefix=/usr/local/stow/libressl CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
cd /tmp/libressl-2.8.2 && scl enable devtoolset-8 -- make -j`nproc` install &&\
rm -rf /tmp/libressl-2.8.2 /tmp/libressl.tar.gz
# install openssl
RUN cd /tmp && curl -L https://www.openssl.org/source/openssl-1.1.1d.tar.gz -o openssl.tar.gz &&\
echo "1e3a91bc1f9dfce01af26026f856e064eab4c8ee0a8f457b5ae30b40b8b711f2 openssl.tar.gz" > openssl-sha.txt &&\
sha256sum -c openssl-sha.txt && tar -xzf openssl.tar.gz &&\
cd openssl-1.1.1d && scl enable devtoolset-8 -- ./config CFLAGS="-fPIC -O3" --prefix=/usr/local &&\
scl enable devtoolset-8 -- make -j`nproc` && scl enable devtoolset-8 -- make -j1 install &&\
ln -sv /usr/local/lib64/lib*.so.1.1 /usr/lib64/ &&\
cd /tmp/ && rm -rf /tmp/openssl-1.1.1d /tmp/openssl.tar.gz
LABEL version=0.1.12
ENV DOCKER_IMAGEVER=0.1.12
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0 ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++ ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++
CMD scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash CMD scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash

View File

@ -2,7 +2,7 @@ version: "3"
services: services:
common: &common common: &common
image: foundationdb/foundationdb-build:0.1.9 image: foundationdb/foundationdb-build:0.1.12
build-setup: &build-setup build-setup: &build-setup
<<: *common <<: *common
@ -36,11 +36,11 @@ services:
release-packages: &release-packages release-packages: &release-packages
<<: *release-setup <<: *release-setup
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages' command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages'
snapshot-packages: &snapshot-packages snapshot-packages: &snapshot-packages
<<: *build-setup <<: *build-setup
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages' command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" packages'
prb-packages: prb-packages:
<<: *snapshot-packages <<: *snapshot-packages
@ -48,11 +48,11 @@ services:
release-bindings: &release-bindings release-bindings: &release-bindings
<<: *release-setup <<: *release-setup
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings' command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings'
snapshot-bindings: &snapshot-bindings snapshot-bindings: &snapshot-bindings
<<: *build-setup <<: *build-setup
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings' command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'make -j "$${MAKEJOBS}" bindings'
prb-bindings: prb-bindings:
<<: *snapshot-bindings <<: *snapshot-bindings
@ -60,7 +60,7 @@ services:
snapshot-cmake: &snapshot-cmake snapshot-cmake: &snapshot-cmake
<<: *build-setup <<: *build-setup
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DUSE_WERROR=1 -DFDB_RELEASE=0 -DVALGRIND=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "packages" "strip_targets" && cpack' command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=0 -DVALGRIND=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "packages" "strip_targets" && cpack'
prb-cmake: prb-cmake:
<<: *snapshot-cmake <<: *snapshot-cmake
@ -68,7 +68,7 @@ services:
snapshot-ctest: &snapshot-ctest snapshot-ctest: &snapshot-ctest
<<: *build-setup <<: *build-setup
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DUSE_WERROR=1 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure' command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'
prb-ctest: prb-ctest:
<<: *snapshot-ctest <<: *snapshot-ctest
@ -76,7 +76,7 @@ services:
snapshot-correctness: &snapshot-correctness snapshot-correctness: &snapshot-correctness
<<: *build-setup <<: *build-setup
command: scl enable devtoolset-8 python27 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DUSE_WERROR=1 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure' command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DCMAKE_COLOR_MAKEFILE=0 -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure'
prb-correctness: prb-correctness:
<<: *snapshot-correctness <<: *snapshot-correctness

View File

@ -9,21 +9,32 @@ if(USE_VALGRIND)
endif() endif()
################################################################################ ################################################################################
# LibreSSL # SSL
################################################################################ ################################################################################
set(DISABLE_TLS OFF CACHE BOOL "Don't try to find LibreSSL and always build without TLS support") set(DISABLE_TLS OFF CACHE BOOL "Don't try to find LibreSSL and always build without TLS support")
if(DISABLE_TLS) if(DISABLE_TLS)
set(WITH_TLS OFF) set(WITH_TLS OFF)
else() else()
set(LIBRESSL_USE_STATIC_LIBS TRUE) set(OPENSSL_USE_STATIC_LIBS TRUE)
find_package(LibreSSL) find_package(OpenSSL)
if(LibreSSL_FOUND) if(NOT OPENSSL_FOUND)
set(LIBRESSL_USE_STATIC_LIBS TRUE)
find_package(LibreSSL)
if (LIBRESSL_FOUND)
add_library(OpenSSL::SSL ALIAS LibreSSL)
endif()
endif()
if(OPENSSL_FOUND OR LIBRESSL_FOUND)
set(WITH_TLS ON) set(WITH_TLS ON)
add_compile_options(-DHAVE_OPENSSL) add_compile_options(-DHAVE_OPENSSL)
else() else()
message(STATUS "LibreSSL NOT Found - Will compile without TLS Support") message(STATUS "Neither OpenSSL nor LibreSSL were found - Will compile without TLS Support")
message(STATUS "You can set LibreSSL_ROOT to the LibreSSL install directory to help cmake find it") message(STATUS "You can set OPENSSL_ROOT_DIR or LibreSSL_ROOT to the LibreSSL install directory to help cmake find it")
set(WITH_TLS OFF)
endif()
if(WIN32)
message(STATUS "TLS is temporarilty disabled on macOS while libressl -> openssl transition happens")
set(WITH_TLS OFF) set(WITH_TLS OFF)
endif() endif()
endif() endif()
@ -59,8 +70,8 @@ endif()
# Pip # Pip
################################################################################ ################################################################################
find_package(Virtualenv) find_package(Python3 COMPONENTS Interpreter)
if (Virtualenv_FOUND) if (Python3_Interpreter_FOUND)
set(WITH_DOCUMENTATION ON) set(WITH_DOCUMENTATION ON)
else() else()
set(WITH_DOCUMENTATION OFF) set(WITH_DOCUMENTATION OFF)

View File

@ -1,20 +0,0 @@
find_program(_VIRTUALENV_EXE virtualenv)
# get version and test that program actually works
if(_VIRTUALENV_EXE)
execute_process(
COMMAND ${_VIRTUALENV_EXE} --version
RESULT_VARIABLE ret_code
OUTPUT_VARIABLE version_string
ERROR_VARIABLE error_output
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(ret_code EQUAL 0 AND NOT ERROR_VARIABLE)
# we found a working virtualenv
set(VIRTUALENV_EXE ${_VIRTUALENV_EXE})
set(VIRTUALENV_VERSION version_string)
endif()
endif()
find_package_handle_standard_args(Virtualenv
REQUIRED_VARS VIRTUALENV_EXE
VERSION_VAR ${VIRTUALENV_VERSION})

View File

@ -10,7 +10,7 @@ set(pip_command ${venv_dir}/bin/pip${EXE_SUFFIX})
set(python_command ${venv_dir}/bin/python${EXE_SUFFIX}) set(python_command ${venv_dir}/bin/python${EXE_SUFFIX})
add_custom_command(OUTPUT ${venv_dir}/venv_setup add_custom_command(OUTPUT ${venv_dir}/venv_setup
COMMAND ${VIRTUALENV_EXE} venv && COMMAND ${Python3_EXECUTABLE} -m venv venv &&
${CMAKE_COMMAND} -E copy ${sphinx_dir}/.pip.conf ${venv_dir}/pip.conf && ${CMAKE_COMMAND} -E copy ${sphinx_dir}/.pip.conf ${venv_dir}/pip.conf &&
. ${venv_dir}/bin/activate && . ${venv_dir}/bin/activate &&
${pip_command} install --upgrade pip && ${pip_command} install --upgrade pip &&
@ -86,7 +86,7 @@ else()
endif() endif()
add_custom_target(docpreview add_custom_target(docpreview
COMMAND ${python_command} -m SimpleHTTPServer ${port} COMMAND ${python_command} -m http.server ${port}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html
USES_TERMINAL) USES_TERMINAL)
add_dependencies(docpreview html) add_dependencies(docpreview html)

View File

@ -693,12 +693,18 @@ Upgrades from 6.1.x will keep all your old data and configuration settings. Data
Upgrading from 6.0.x Upgrading from 6.0.x
-------------------- --------------------
Upgrades from 6.0.x will keep all your old data and configuration settings. Data distribution will slowly reorganize how data is spread across storage servers. Upgrades from 6.0.x will keep all your old data and configuration settings.
Upgrading from 5.2.x Upgrading from 5.2.x
-------------------- --------------------
Upgrades from 5.2.x will keep all your old data and configuration settings. Upgrades from 5.2.x will keep all your old data and configuration settings. Some affinities that certain roles have for running on processes that haven't set a process class have changed, which may result in these processes running in different locations after upgrading. To avoid this, set process classes as needed. The following changes were made:
* The proxies and master no longer prefer ``resolution`` or ``transaction`` class processes to processes with unset class.
* The resolver no longer prefers ``transaction`` class processes to processes with unset class.
* The cluster controller no longer prefers ``master``, ``resolution`` or ``proxy`` class processes to processes with unset class.
See :ref:`guidelines-process-class-config` for recommendations on setting process classes. All of the above roles will prefer ``stateless`` class processes to ones that don't set a class.
Upgrading from 5.0.x - 5.1.x Upgrading from 5.0.x - 5.1.x
---------------------------- ----------------------------

View File

@ -528,8 +528,7 @@ Applications must provide error handling and an appropriate retry loop around th
|snapshot| |snapshot|
``reverse`` ``reverse``
If non-zero, key-value pairs will be returned in reverse lexicographical order beginning at the end of the range. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
If non-zero, key-value pairs will be returned in reverse lexicographical order beginning at the end of the range.
.. type:: FDBStreamingMode .. type:: FDBStreamingMode

View File

@ -293,7 +293,7 @@ A |database-blurb1| |database-blurb2|
If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned. If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned.
If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how to retrieve the specified range. This option should generally not be specified, allowing FoundationDB to retrieve the full range very efficiently. If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how to retrieve the specified range. This option should generally not be specified, allowing FoundationDB to retrieve the full range very efficiently.
@ -505,7 +505,7 @@ Reading data
If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned. If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned.
If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how the returned container is likely to be used. The default is :data:`StreamingMode.iterator`. If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how the returned container is likely to be used. The default is :data:`StreamingMode.iterator`.

View File

@ -287,7 +287,7 @@ A |database-blurb1| |database-blurb2|
Only the first ``limit`` keys (and their values) in the range will be returned. Only the first ``limit`` keys (and their values) in the range will be returned.
``:reverse`` ``:reverse``
If ``true``, then the keys in the range will be returned in reverse order. If ``true``, then the keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order. If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order.
@ -461,7 +461,7 @@ Reading data
Only the first ``limit`` keys (and their values) in the range will be returned. Only the first ``limit`` keys (and their values) in the range will be returned.
``:reverse`` ``:reverse``
If true, then the keys in the range will be returned in reverse order. If ``true``, then the keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order. If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order.

View File

@ -27,11 +27,11 @@ System requirements
* Or, an unsupported Linux distribution with: * Or, an unsupported Linux distribution with:
* Kernel version between 2.6.33 and 3.0.x (inclusive) or 3.7 or greater * Kernel version between 2.6.33 and 3.0.x (inclusive) or 3.7 or greater
* Works with .deb or .rpm packages * Preferably .deb or .rpm package support
* Or, macOS 10.7 or later * Or, macOS 10.7 or later
.. warning:: The macOS version of the FoundationDB server is intended for use on locally accessible development machines only. Other uses are not supported. .. warning:: The macOS and Windows versions of the FoundationDB server are intended for use on locally accessible development machines only. Other uses are not supported.
* 4GB **ECC** RAM (per fdbserver process) * 4GB **ECC** RAM (per fdbserver process)
* Storage * Storage
@ -387,6 +387,8 @@ FoundationDB will never use processes on the same machine for the replication of
FoundationDB replicates data to three machines, and at least three available machines are required to make progress. This is the recommended mode for a cluster of five or more machines in a single datacenter. FoundationDB replicates data to three machines, and at least three available machines are required to make progress. This is the recommended mode for a cluster of five or more machines in a single datacenter.
.. note:: When running in cloud environments with managed disks that are already replicated and persistent, ``double`` replication may still be considered for 5+ machine clusters. This will result in lower availability fault tolerance for planned or unplanned failures and lower total read throughput, but offers a reasonable tradeoff for cost.
``three_data_hall`` mode ``three_data_hall`` mode
FoundationDB stores data in triplicate, with one copy on a storage server in each of three data halls. The transaction logs are replicated four times, with two data halls containing two replicas apiece. Four available machines (two in each of two data halls) are therefore required to make progress. This configuration enables the cluster to remain available after losing a single data hall and one machine in another data hall. FoundationDB stores data in triplicate, with one copy on a storage server in each of three data halls. The transaction logs are replicated four times, with two data halls containing two replicas apiece. Four available machines (two in each of two data halls) are therefore required to make progress. This configuration enables the cluster to remain available after losing a single data hall and one machine in another data hall.
@ -768,14 +770,12 @@ Region configuration is better in almost all ways than the ``three_datacenter``
Known limitations Known limitations
----------------- -----------------
The 6.0 release still has a number of rough edges related to region configuration. This is a collection of all the issues that have been pointed out in the sections above. These issues should be significantly improved in future releases of FoundationDB: The 6.2 release still has a number of rough edges related to region configuration. This is a collection of all the issues that have been pointed out in the sections above. These issues should be significantly improved in future releases of FoundationDB:
* FoundationDB supports replicating data to at most two regions. * FoundationDB supports replicating data to at most two regions.
* ``two_satellite_fast`` does not hide latency properly when configured with more than 4 satellite transaction logs. * ``two_satellite_fast`` does not hide latency properly when configured with more than 4 satellite transaction logs.
* While a datacenter has failed, the maximum write throughput of the cluster will be roughly 1/3 of normal performance.
.. _guidelines-process-class-config: .. _guidelines-process-class-config:
Guidelines for setting process class Guidelines for setting process class

View File

@ -156,6 +156,7 @@ Other Changes
* Does not support upgrades from any version older than 5.0. * Does not support upgrades from any version older than 5.0.
* Normalized the capitalization of trace event names and attributes. `(PR #455) <https://github.com/apple/foundationdb/pull/455>`_ * Normalized the capitalization of trace event names and attributes. `(PR #455) <https://github.com/apple/foundationdb/pull/455>`_
* Various stateless processes now have a higher affinity for running on processes with unset process class, which may result in those roles changing location upon upgrade. See :ref:`version-specific-upgrading` for details. `(PR #526) <https://github.com/apple/foundationdb/pull/526>`_
* Increased the memory requirements of the transaction log by 400MB. [6.0.5] `(PR #673) <https://github.com/apple/foundationdb/pull/673>`_ * Increased the memory requirements of the transaction log by 400MB. [6.0.5] `(PR #673) <https://github.com/apple/foundationdb/pull/673>`_
Earlier release notes Earlier release notes

View File

@ -37,7 +37,6 @@
#include "fdbclient/json_spirit/json_spirit_writer_template.h" #include "fdbclient/json_spirit/json_spirit_writer_template.h"
#include "fdbrpc/Platform.h" #include "fdbrpc/Platform.h"
#include "fdbrpc/TLSConnection.h"
#include <stdarg.h> #include <stdarg.h>
#include <stdio.h> #include <stdio.h>
@ -3225,22 +3224,22 @@ int main(int argc, char* argv[]) {
blobCredentials.push_back(args->OptionArg()); blobCredentials.push_back(args->OptionArg());
break; break;
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
case TLSOptions::OPT_TLS_PLUGIN: case TLSParams::OPT_TLS_PLUGIN:
args->OptionArg(); args->OptionArg();
break; break;
case TLSOptions::OPT_TLS_CERTIFICATES: case TLSParams::OPT_TLS_CERTIFICATES:
tlsCertPath = args->OptionArg(); tlsCertPath = args->OptionArg();
break; break;
case TLSOptions::OPT_TLS_PASSWORD: case TLSParams::OPT_TLS_PASSWORD:
tlsPassword = args->OptionArg(); tlsPassword = args->OptionArg();
break; break;
case TLSOptions::OPT_TLS_CA_FILE: case TLSParams::OPT_TLS_CA_FILE:
tlsCAPath = args->OptionArg(); tlsCAPath = args->OptionArg();
break; break;
case TLSOptions::OPT_TLS_KEY: case TLSParams::OPT_TLS_KEY:
tlsKeyPath = args->OptionArg(); tlsKeyPath = args->OptionArg();
break; break;
case TLSOptions::OPT_TLS_VERIFY_PEERS: case TLSParams::OPT_TLS_VERIFY_PEERS:
tlsVerifyPeers = args->OptionArg(); tlsVerifyPeers = args->OptionArg();
break; break;
#endif #endif
@ -3855,6 +3854,13 @@ int main(int argc, char* argv[]) {
} catch (Error& e) { } catch (Error& e) {
TraceEvent(SevError, "MainError").error(e); TraceEvent(SevError, "MainError").error(e);
status = FDB_EXIT_MAIN_ERROR; status = FDB_EXIT_MAIN_ERROR;
} catch (boost::system::system_error& e) {
if (g_network) {
TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what());
} else {
fprintf(stderr, "ERROR: %s (%d)\n", e.what(), e.code().value());
}
status = FDB_EXIT_MAIN_EXCEPTION;
} catch (std::exception& e) { } catch (std::exception& e) {
TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what()); TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what());
status = FDB_EXIT_MAIN_EXCEPTION; status = FDB_EXIT_MAIN_EXCEPTION;

View File

@ -32,7 +32,6 @@
#include "fdbclient/FDBOptions.g.h" #include "fdbclient/FDBOptions.g.h"
#include "flow/DeterministicRandom.h" #include "flow/DeterministicRandom.h"
#include "fdbrpc/TLSConnection.h"
#include "fdbrpc/Platform.h" #include "fdbrpc/Platform.h"
#include "flow/SimpleOpt.h" #include "flow/SimpleOpt.h"
@ -1602,9 +1601,9 @@ ACTOR Future<Void> timeWarning( double when, const char* msg ) {
return Void(); return Void();
} }
ACTOR Future<Void> checkStatus(Future<Void> f, Reference<ClusterConnectionFile> clusterFile, bool displayDatabaseAvailable = true) { ACTOR Future<Void> checkStatus(Future<Void> f, Database db, bool displayDatabaseAvailable = true) {
wait(f); wait(f);
StatusObject s = wait(StatusClient::statusFetcher(clusterFile)); StatusObject s = wait(StatusClient::statusFetcher(db));
printf("\n"); printf("\n");
printStatus(s, StatusClient::MINIMAL, displayDatabaseAvailable); printStatus(s, StatusClient::MINIMAL, displayDatabaseAvailable);
printf("\n"); printf("\n");
@ -1646,7 +1645,7 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
state Optional<ConfigureAutoResult> conf; state Optional<ConfigureAutoResult> conf;
if( tokens[startToken] == LiteralStringRef("auto") ) { if( tokens[startToken] == LiteralStringRef("auto") ) {
StatusObject s = wait( makeInterruptable(StatusClient::statusFetcher( ccf )) ); StatusObject s = wait( makeInterruptable(StatusClient::statusFetcher( db )) );
if(warn.isValid()) if(warn.isValid())
warn.cancel(); warn.cancel();
@ -1776,6 +1775,10 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
printf("Configuration changed\n"); printf("Configuration changed\n");
ret=false; ret=false;
break; break;
case ConfigurationResult::LOCKED_NOT_NEW:
printf("ERROR: `only new databases can be configured as locked`\n");
ret = true;
break;
default: default:
ASSERT(false); ASSERT(false);
ret=true; ret=true;
@ -2091,7 +2094,7 @@ ACTOR Future<bool> exclude( Database db, std::vector<StringRef> tokens, Referenc
return true; return true;
} }
} }
StatusObject status = wait( makeInterruptable( StatusClient::statusFetcher( ccf ) ) ); StatusObject status = wait( makeInterruptable( StatusClient::statusFetcher( db ) ) );
state std::string errorString = "ERROR: Could not calculate the impact of this exclude on the total free space in the cluster.\n" state std::string errorString = "ERROR: Could not calculate the impact of this exclude on the total free space in the cluster.\n"
"Please try the exclude again in 30 seconds.\n" "Please try the exclude again in 30 seconds.\n"
@ -2537,22 +2540,22 @@ struct CLIOptions {
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
// TLS Options // TLS Options
case TLSOptions::OPT_TLS_PLUGIN: case TLSParams::OPT_TLS_PLUGIN:
args.OptionArg(); args.OptionArg();
break; break;
case TLSOptions::OPT_TLS_CERTIFICATES: case TLSParams::OPT_TLS_CERTIFICATES:
tlsCertPath = args.OptionArg(); tlsCertPath = args.OptionArg();
break; break;
case TLSOptions::OPT_TLS_CA_FILE: case TLSParams::OPT_TLS_CA_FILE:
tlsCAPath = args.OptionArg(); tlsCAPath = args.OptionArg();
break; break;
case TLSOptions::OPT_TLS_KEY: case TLSParams::OPT_TLS_KEY:
tlsKeyPath = args.OptionArg(); tlsKeyPath = args.OptionArg();
break; break;
case TLSOptions::OPT_TLS_PASSWORD: case TLSParams::OPT_TLS_PASSWORD:
tlsPassword = args.OptionArg(); tlsPassword = args.OptionArg();
break; break;
case TLSOptions::OPT_TLS_VERIFY_PEERS: case TLSParams::OPT_TLS_VERIFY_PEERS:
tlsVerifyPeers = args.OptionArg(); tlsVerifyPeers = args.OptionArg();
break; break;
#endif #endif
@ -2603,7 +2606,7 @@ ACTOR Future<Void> addInterface( std::map<Key,std::pair<Value,ClientLeaderRegInt
(*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf); (*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf);
} }
} }
when( wait(delay(1.0)) ) {} when( wait(delay(CLIENT_KNOBS->CLI_CONNECT_TIMEOUT)) ) {}
} }
return Void(); return Void();
} }
@ -2666,7 +2669,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
if (!opt.exec.present()) { if (!opt.exec.present()) {
if(opt.initialStatusCheck) { if(opt.initialStatusCheck) {
Future<Void> checkStatusF = checkStatus(Void(), db->getConnectionFile()); Future<Void> checkStatusF = checkStatus(Void(), db);
wait(makeInterruptable(success(checkStatusF))); wait(makeInterruptable(success(checkStatusF)));
} }
else { else {
@ -2704,7 +2707,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
linenoise.historyAdd(line); linenoise.historyAdd(line);
} }
warn = checkStatus(timeWarning(5.0, "\nWARNING: Long delay (Ctrl-C to interrupt)\n"), db->getConnectionFile()); warn = checkStatus(timeWarning(5.0, "\nWARNING: Long delay (Ctrl-C to interrupt)\n"), db);
try { try {
state UID randomID = deterministicRandom()->randomUniqueID(); state UID randomID = deterministicRandom()->randomUniqueID();
@ -2849,7 +2852,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
continue; continue;
} }
StatusObject s = wait(makeInterruptable(StatusClient::statusFetcher(db->getConnectionFile()))); StatusObject s = wait(makeInterruptable(StatusClient::statusFetcher(db)));
if (!opt.exec.present()) printf("\n"); if (!opt.exec.present()) printf("\n");
printStatus(s, level); printStatus(s, level);
@ -3795,5 +3798,8 @@ int main(int argc, char **argv) {
} catch (Error& e) { } catch (Error& e) {
printf("ERROR: %s (%d)\n", e.what(), e.code()); printf("ERROR: %s (%d)\n", e.what(), e.code());
return 1; return 1;
} catch (boost::system::system_error& e) {
printf("ERROR: %s (%d)\n", e.what(), e.code().value());
return 1;
} }
} }

View File

@ -357,8 +357,23 @@ public:
return writeFile(snapshotFolderString(snapshotBeginVersion) + format("/%d/", snapshotFileCount / (BUGGIFY ? 1 : 5000)) + fileName); return writeFile(snapshotFolderString(snapshotBeginVersion) + format("/%d/", snapshotFileCount / (BUGGIFY ? 1 : 5000)) + fileName);
} }
// Find what should be the filename of a path by finding whatever is after the last forward or backward slash, or failing to find those, the whole string.
static std::string fileNameOnly(std::string path) {
// Find the last forward slash position, defaulting to 0 if not found
int pos = path.find_last_of('/');
if(pos == std::string::npos) {
pos = 0;
}
// Find the last backward slash position after pos, and update pos if found
int b = path.find_last_of('\\', pos);
if(b != std::string::npos) {
pos = b;
}
return path.substr(pos + 1);
}
static bool pathToRangeFile(RangeFile &out, std::string path, int64_t size) { static bool pathToRangeFile(RangeFile &out, std::string path, int64_t size) {
std::string name = basename(path); std::string name = fileNameOnly(path);
RangeFile f; RangeFile f;
f.fileName = path; f.fileName = path;
f.fileSize = size; f.fileSize = size;
@ -371,7 +386,7 @@ public:
} }
static bool pathToLogFile(LogFile &out, std::string path, int64_t size) { static bool pathToLogFile(LogFile &out, std::string path, int64_t size) {
std::string name = basename(path); std::string name = fileNameOnly(path);
LogFile f; LogFile f;
f.fileName = path; f.fileName = path;
f.fileSize = size; f.fileSize = size;
@ -389,7 +404,7 @@ public:
} }
static bool pathToKeyspaceSnapshotFile(KeyspaceSnapshotFile &out, std::string path) { static bool pathToKeyspaceSnapshotFile(KeyspaceSnapshotFile &out, std::string path) {
std::string name = basename(path); std::string name = fileNameOnly(path);
KeyspaceSnapshotFile f; KeyspaceSnapshotFile f;
f.fileName = path; f.fileName = path;
int len; int len;

View File

@ -1959,8 +1959,8 @@ public:
} }
if (!g_network->isSimulated() && !forceAction) { if (!g_network->isSimulated() && !forceAction) {
state StatusObject srcStatus = wait(StatusClient::statusFetcher(backupAgent->taskBucket->src->getConnectionFile())); state StatusObject srcStatus = wait(StatusClient::statusFetcher(backupAgent->taskBucket->src));
StatusObject destStatus = wait(StatusClient::statusFetcher(dest->getConnectionFile())); StatusObject destStatus = wait(StatusClient::statusFetcher(dest));
checkAtomicSwitchOverConfig(srcStatus, destStatus, tagName); checkAtomicSwitchOverConfig(srcStatus, destStatus, tagName);
} }

View File

@ -192,6 +192,10 @@ public:
Future<Void> clientInfoMonitor; Future<Void> clientInfoMonitor;
Future<Void> connected; Future<Void> connected;
Reference<AsyncVar<Optional<ClusterInterface>>> statusClusterInterface;
Future<Void> statusLeaderMon;
double lastStatusFetch;
int apiVersion; int apiVersion;
int mvCacheInsertLocation; int mvCacheInsertLocation;

View File

@ -46,6 +46,7 @@ ClientKnobs::ClientKnobs(bool randomize) {
init( CLIENT_EXAMPLE_AMOUNT, 20 ); init( CLIENT_EXAMPLE_AMOUNT, 20 );
init( MAX_CLIENT_STATUS_AGE, 1.0 ); init( MAX_CLIENT_STATUS_AGE, 1.0 );
init( MAX_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_PROXY_CONNECTIONS = 1; init( MAX_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_PROXY_CONNECTIONS = 1;
init( STATUS_IDLE_TIMEOUT, 120.0 );
// wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin // wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin
@ -200,7 +201,8 @@ ClientKnobs::ClientKnobs(bool randomize) {
init( CONSISTENCY_CHECK_RATE_LIMIT_MAX, 50e6 ); // Limit in per sec init( CONSISTENCY_CHECK_RATE_LIMIT_MAX, 50e6 ); // Limit in per sec
init( CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME, 7 * 24 * 60 * 60 ); // 7 days init( CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME, 7 * 24 * 60 * 60 ); // 7 days
//fdbcli //fdbcli
init( CLI_CONNECT_PARALLELISM, 10 ); init( CLI_CONNECT_PARALLELISM, 400 );
init( CLI_CONNECT_TIMEOUT, 10.0 );
} }

View File

@ -45,6 +45,7 @@ public:
int CLIENT_EXAMPLE_AMOUNT; int CLIENT_EXAMPLE_AMOUNT;
double MAX_CLIENT_STATUS_AGE; double MAX_CLIENT_STATUS_AGE;
int MAX_PROXY_CONNECTIONS; int MAX_PROXY_CONNECTIONS;
double STATUS_IDLE_TIMEOUT;
// wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin // wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin
double WRONG_SHARD_SERVER_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test) double WRONG_SHARD_SERVER_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test)
@ -190,10 +191,11 @@ public:
int CONSISTENCY_CHECK_RATE_LIMIT_MAX; int CONSISTENCY_CHECK_RATE_LIMIT_MAX;
int CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME; int CONSISTENCY_CHECK_ONE_ROUND_TARGET_COMPLETION_TIME;
//fdbcli
int CLI_CONNECT_PARALLELISM;
// fdbcli
int CLI_CONNECT_PARALLELISM;
double CLI_CONNECT_TIMEOUT;
ClientKnobs(bool randomize = false); ClientKnobs(bool randomize = false);
}; };

View File

@ -52,6 +52,13 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
return out; return out;
} }
if (mode == "locked") {
// Setting this key is interpreted as an instruction to use the normal version-stamp-based mechanism for locking
// the database.
out[databaseLockedKey.toString()] = deterministicRandom()->randomUniqueID().toString();
return out;
}
size_t pos; size_t pos;
// key:=value is unvalidated and unchecked // key:=value is unvalidated and unchecked
@ -300,6 +307,17 @@ ACTOR Future<ConfigurationResult::Type> changeConfig( Database cx, std::map<std:
// make sure we have essential configuration options // make sure we have essential configuration options
std::string initKey = configKeysPrefix.toString() + "initialized"; std::string initKey = configKeysPrefix.toString() + "initialized";
state bool creating = m.count( initKey ) != 0; state bool creating = m.count( initKey ) != 0;
state Optional<UID> locked;
{
auto iter = m.find(databaseLockedKey.toString());
if (iter != m.end()) {
if (!creating) {
return ConfigurationResult::LOCKED_NOT_NEW;
}
locked = UID::fromString(iter->second);
m.erase(iter);
}
}
if (creating) { if (creating) {
m[initIdKey.toString()] = deterministicRandom()->randomUniqueID().toString(); m[initIdKey.toString()] = deterministicRandom()->randomUniqueID().toString();
if (!isCompleteConfiguration(m)) { if (!isCompleteConfiguration(m)) {
@ -486,6 +504,15 @@ ACTOR Future<ConfigurationResult::Type> changeConfig( Database cx, std::map<std:
tr.addReadConflictRange( singleKeyRange(m.begin()->first) ); tr.addReadConflictRange( singleKeyRange(m.begin()->first) );
} }
if (locked.present()) {
ASSERT(creating);
tr.atomicOp(databaseLockedKey,
BinaryWriter::toValue(locked.get(), Unversioned())
.withPrefix(LiteralStringRef("0123456789"))
.withSuffix(LiteralStringRef("\x00\x00\x00\x00")),
MutationRef::SetVersionstampedValue);
}
for (auto i = m.begin(); i != m.end(); ++i) { for (auto i = m.begin(); i != m.end(); ++i) {
tr.set( StringRef(i->first), StringRef(i->second) ); tr.set( StringRef(i->first), StringRef(i->second) );
} }
@ -958,9 +985,13 @@ ACTOR Future<CoordinatorsResult::Type> changeQuorum( Database cx, Reference<IQuo
if(g_network->isSimulated()) { if(g_network->isSimulated()) {
for(int i = 0; i < (desiredCoordinators.size()/2)+1; i++) { for(int i = 0; i < (desiredCoordinators.size()/2)+1; i++) {
auto address = NetworkAddress(desiredCoordinators[i].ip,desiredCoordinators[i].port,true,false); auto addresses = g_simulator.getProcessByAddress(desiredCoordinators[i])->addresses;
g_simulator.protectedAddresses.insert(address);
TraceEvent("ProtectCoordinator").detail("Address", address).backtrace(); g_simulator.protectedAddresses.insert(addresses.address);
if(addresses.secondaryAddress.present()) {
g_simulator.protectedAddresses.insert(addresses.secondaryAddress.get());
}
TraceEvent("ProtectCoordinator").detail("Address", desiredCoordinators[i]).backtrace();
} }
} }
@ -1119,8 +1150,7 @@ struct AutoQuorumChange : IQuorumChange {
*err = CoordinatorsResult::NOT_ENOUGH_MACHINES; *err = CoordinatorsResult::NOT_ENOUGH_MACHINES;
return vector<NetworkAddress>(); return vector<NetworkAddress>();
} }
desiredCount = std::max(oldCoordinators.size(), (workers.size() - 1) | 1); chosen.resize((chosen.size() - 1) | 1);
chosen.resize(desiredCount);
} }
return chosen; return chosen;
@ -1516,10 +1546,14 @@ ACTOR Future<std::set<NetworkAddress>> checkForExcludingServers(Database cx, vec
state bool ok = true; state bool ok = true;
inProgressExclusion.clear(); inProgressExclusion.clear();
for(auto& s : serverList) { for(auto& s : serverList) {
auto addr = decodeServerListValue( s.value ).address(); auto addresses = decodeServerListValue( s.value ).getKeyValues.getEndpoint().addresses;
if ( addressExcluded(exclusions, addr) ) { if ( addressExcluded(exclusions, addresses.address) ) {
ok = false; ok = false;
inProgressExclusion.insert(addr); inProgressExclusion.insert(addresses.address);
}
if ( addresses.secondaryAddress.present() && addressExcluded(exclusions, addresses.secondaryAddress.get()) ) {
ok = false;
inProgressExclusion.insert(addresses.secondaryAddress.get());
} }
} }

View File

@ -61,7 +61,8 @@ public:
NOT_ENOUGH_WORKERS, NOT_ENOUGH_WORKERS,
REGION_REPLICATION_MISMATCH, REGION_REPLICATION_MISMATCH,
DCID_MISSING, DCID_MISSING,
SUCCESS LOCKED_NOT_NEW,
SUCCESS,
}; };
}; };

View File

@ -37,12 +37,12 @@
#include "fdbrpc/LoadBalance.h" #include "fdbrpc/LoadBalance.h"
#include "fdbrpc/Net2FileSystem.h" #include "fdbrpc/Net2FileSystem.h"
#include "fdbrpc/simulator.h" #include "fdbrpc/simulator.h"
#include "fdbrpc/TLSConnection.h"
#include "flow/ActorCollection.h" #include "flow/ActorCollection.h"
#include "flow/DeterministicRandom.h" #include "flow/DeterministicRandom.h"
#include "flow/Knobs.h" #include "flow/Knobs.h"
#include "flow/Platform.h" #include "flow/Platform.h"
#include "flow/SystemMonitor.h" #include "flow/SystemMonitor.h"
#include "flow/TLSPolicy.h"
#include "flow/UnitTest.h" #include "flow/UnitTest.h"
#if defined(CMAKE_BUILD) || !defined(WIN32) #if defined(CMAKE_BUILD) || !defined(WIN32)
@ -66,12 +66,15 @@ using std::min;
using std::pair; using std::pair;
NetworkOptions networkOptions; NetworkOptions networkOptions;
Reference<TLSOptions> tlsOptions; TLSParams tlsParams;
static Reference<TLSPolicy> tlsPolicy;
static void initTLSOptions() { static void initTLSPolicy() {
if (!tlsOptions) { #ifndef TLS_DISABLED
tlsOptions = Reference<TLSOptions>(new TLSOptions()); if (!tlsPolicy) {
tlsPolicy = Reference<TLSPolicy>(new TLSPolicy(TLSPolicy::Is::CLIENT));
} }
#endif
} }
static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/"); static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/");
@ -884,49 +887,46 @@ void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> valu
break; break;
case FDBNetworkOptions::TLS_CERT_PATH: case FDBNetworkOptions::TLS_CERT_PATH:
validateOptionValue(value, true); validateOptionValue(value, true);
initTLSOptions(); tlsParams.tlsCertPath = value.get().toString();
tlsOptions->set_cert_file( value.get().toString() );
break; break;
case FDBNetworkOptions::TLS_CERT_BYTES: case FDBNetworkOptions::TLS_CERT_BYTES: {
initTLSOptions();
tlsOptions->set_cert_data( value.get().toString() );
break;
case FDBNetworkOptions::TLS_CA_PATH:
validateOptionValue(value, true); validateOptionValue(value, true);
initTLSOptions(); tlsParams.tlsCertBytes = value.get().toString();
tlsOptions->set_ca_file( value.get().toString() );
break; break;
case FDBNetworkOptions::TLS_CA_BYTES: }
case FDBNetworkOptions::TLS_CA_PATH: {
validateOptionValue(value, true); validateOptionValue(value, true);
initTLSOptions(); tlsParams.tlsCAPath = value.get().toString();
tlsOptions->set_ca_data(value.get().toString());
break; break;
}
case FDBNetworkOptions::TLS_CA_BYTES: {
validateOptionValue(value, true);
tlsParams.tlsCABytes = value.get().toString();
break;
}
case FDBNetworkOptions::TLS_PASSWORD: case FDBNetworkOptions::TLS_PASSWORD:
validateOptionValue(value, true); validateOptionValue(value, true);
initTLSOptions(); tlsParams.tlsPassword = value.get().toString();
tlsOptions->set_key_password(value.get().toString());
break; break;
case FDBNetworkOptions::TLS_KEY_PATH: case FDBNetworkOptions::TLS_KEY_PATH:
validateOptionValue(value, true); validateOptionValue(value, true);
initTLSOptions(); tlsParams.tlsKeyPath = value.get().toString();
tlsOptions->set_key_file( value.get().toString() );
break; break;
case FDBNetworkOptions::TLS_KEY_BYTES: case FDBNetworkOptions::TLS_KEY_BYTES: {
validateOptionValue(value, true); validateOptionValue(value, true);
initTLSOptions(); tlsParams.tlsKeyBytes = value.get().toString();
tlsOptions->set_key_data( value.get().toString() );
break; break;
}
case FDBNetworkOptions::TLS_VERIFY_PEERS: case FDBNetworkOptions::TLS_VERIFY_PEERS:
validateOptionValue(value, true); validateOptionValue(value, true);
initTLSOptions(); initTLSPolicy();
try { #ifndef TLS_DISABLED
tlsOptions->set_verify_peers({ value.get().toString() }); if (!tlsPolicy->set_verify_peers({ value.get().toString() })) {
} catch( Error& e ) {
TraceEvent(SevWarnAlways, "TLSValidationSetError") TraceEvent(SevWarnAlways, "TLSValidationSetError")
.error( e )
.detail("Input", value.get().toString() ); .detail("Input", value.get().toString() );
throw invalid_option_value(); throw invalid_option_value();
} }
#endif
break; break;
case FDBNetworkOptions::CLIENT_BUGGIFY_ENABLE: case FDBNetworkOptions::CLIENT_BUGGIFY_ENABLE:
enableBuggify(true, BuggifyType::Client); enableBuggify(true, BuggifyType::Client);
@ -984,15 +984,11 @@ void setupNetwork(uint64_t transportId, bool useMetrics) {
if (!networkOptions.logClientInfo.present()) if (!networkOptions.logClientInfo.present())
networkOptions.logClientInfo = true; networkOptions.logClientInfo = true;
g_network = newNet2(false, useMetrics || networkOptions.traceDirectory.present()); initTLSPolicy();
g_network = newNet2(false, useMetrics || networkOptions.traceDirectory.present(), tlsPolicy, tlsParams);
FlowTransport::createInstance(true, transportId); FlowTransport::createInstance(true, transportId);
Net2FileSystem::newFileSystem(); Net2FileSystem::newFileSystem();
initTLSOptions();
#ifndef TLS_DISABLED
tlsOptions->register_network();
#endif
} }
void runNetwork() { void runNetwork() {
@ -2550,8 +2546,8 @@ ACTOR void checkWrites( Database cx, Future<Void> committed, Promise<Void> outCo
} else { } else {
Optional<Value> val = wait( tr.get( it->range().begin ) ); Optional<Value> val = wait( tr.get( it->range().begin ) );
if( !val.present() || val.get() != m.setValue ) { if( !val.present() || val.get() != m.setValue ) {
TraceEvent evt = TraceEvent(SevError, "CheckWritesFailed") TraceEvent evt(SevError, "CheckWritesFailed");
.detail("Class", "Set") evt.detail("Class", "Set")
.detail("Key", it->range().begin) .detail("Key", it->range().begin)
.detail("Expected", m.setValue); .detail("Expected", m.setValue);
if( !val.present() ) if( !val.present() )

View File

@ -1165,8 +1165,8 @@ Optional<Value> getValueFromJSON(StatusObject statusObj) {
} }
} }
ACTOR Future<Optional<Value>> getJSON(Reference<ClusterConnectionFile> clusterFile) { ACTOR Future<Optional<Value>> getJSON(Database db) {
StatusObject statusObj = wait(StatusClient::statusFetcher(clusterFile)); StatusObject statusObj = wait(StatusClient::statusFetcher(db));
return getValueFromJSON(statusObj); return getValueFromJSON(statusObj);
} }
@ -1194,7 +1194,7 @@ Future< Optional<Value> > ReadYourWritesTransaction::get( const Key& key, bool s
if (key == LiteralStringRef("\xff\xff/status/json")){ if (key == LiteralStringRef("\xff\xff/status/json")){
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) { if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) {
return getJSON(tr.getDatabase()->getConnectionFile()); return getJSON(tr.getDatabase());
} }
else { else {
return Optional<Value>(); return Optional<Value>();

View File

@ -451,7 +451,7 @@ StatusObject getClientDatabaseStatus(StatusObjectReader client, StatusObjectRead
return databaseStatus; return databaseStatus;
} }
ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f ) { ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f, Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface) {
if (!g_network) throw network_not_setup(); if (!g_network) throw network_not_setup();
state StatusObject statusObj; state StatusObject statusObj;
@ -461,13 +461,10 @@ ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f
// This could be read from the JSON but doing so safely is ugly so using a real var. // This could be read from the JSON but doing so safely is ugly so using a real var.
state bool quorum_reachable = false; state bool quorum_reachable = false;
state int coordinatorsFaultTolerance = 0; state int coordinatorsFaultTolerance = 0;
state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
try { try {
state int64_t clientTime = time(0); state int64_t clientTime = time(0);
state Future<Void> leaderMon = monitorLeader<ClusterInterface>(f, clusterInterface);
StatusObject _statusObjClient = wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance)); StatusObject _statusObjClient = wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance));
statusObjClient = _statusObjClient; statusObjClient = _statusObjClient;
@ -547,6 +544,23 @@ ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f
return statusObj; return statusObj;
} }
Future<StatusObject> StatusClient::statusFetcher( Reference<ClusterConnectionFile> clusterFile ) { ACTOR Future<Void> timeoutMonitorLeader(Database db) {
return statusFetcherImpl(clusterFile); state Future<Void> leadMon = monitorLeader<ClusterInterface>(db->getConnectionFile(), db->statusClusterInterface);
loop {
wait(delay(CLIENT_KNOBS->STATUS_IDLE_TIMEOUT + 0.00001 + db->lastStatusFetch - now()));
if(now() - db->lastStatusFetch > CLIENT_KNOBS->STATUS_IDLE_TIMEOUT) {
db->statusClusterInterface = Reference<AsyncVar<Optional<ClusterInterface>>>();
return Void();
}
}
}
Future<StatusObject> StatusClient::statusFetcher( Database db ) {
db->lastStatusFetch = now();
if(!db->statusClusterInterface) {
db->statusClusterInterface = Reference<AsyncVar<Optional<ClusterInterface>>>(new AsyncVar<Optional<ClusterInterface>>);
db->statusLeaderMon = timeoutMonitorLeader(db);
}
return statusFetcherImpl(db->getConnectionFile(), db->statusClusterInterface);
} }

View File

@ -23,11 +23,12 @@
#include "flow/flow.h" #include "flow/flow.h"
#include "fdbclient/Status.h" #include "fdbclient/Status.h"
#include "fdbclient/DatabaseContext.h"
class StatusClient { class StatusClient {
public: public:
enum StatusLevel { MINIMAL = 0, NORMAL = 1, DETAILED = 2, JSON = 3 }; enum StatusLevel { MINIMAL = 0, NORMAL = 1, DETAILED = 2, JSON = 3 };
static Future<StatusObject> statusFetcher(Reference<ClusterConnectionFile> clusterFile); static Future<StatusObject> statusFetcher(Database db);
}; };
#endif #endif

View File

@ -390,7 +390,7 @@ struct SplitMetricsRequest {
struct GetStorageMetricsReply { struct GetStorageMetricsReply {
constexpr static FileIdentifier file_identifier = 15491478; constexpr static FileIdentifier file_identifier = 15491478;
StorageMetrics load; StorageMetrics load;
StorageMetrics free; StorageMetrics available;
StorageMetrics capacity; StorageMetrics capacity;
double bytesInputRate; double bytesInputRate;
@ -398,7 +398,7 @@ struct GetStorageMetricsReply {
template <class Ar> template <class Ar>
void serialize(Ar& ar) { void serialize(Ar& ar) {
serializer(ar, load, free, capacity, bytesInputRate); serializer(ar, load, available, capacity, bytesInputRate);
} }
}; };

View File

@ -35,7 +35,7 @@
* compile-time configuration. * compile-time configuration.
*/ */
#ifndef HAVE_OPENSSL #if !defined(HAVE_OPENSSL) || defined(TLS_DISABLED)
#include <string.h> #include <string.h>

View File

@ -23,7 +23,7 @@
* See md5.c for more information. * See md5.c for more information.
*/ */
#ifdef HAVE_OPENSSL #if defined(HAVE_OPENSSL) && !defined(TLS_DISABLED)
#include <openssl/md5.h> #include <openssl/md5.h>
#elif !defined(_MD5_H) #elif !defined(_MD5_H)
#define _MD5_H #define _MD5_H

View File

@ -161,7 +161,7 @@ description is not currently required but encouraged.
defaultFor="500"/> defaultFor="500"/>
<Option name="transaction_retry_limit" code="501" <Option name="transaction_retry_limit" code="501"
paramType="Int" paramDescription="number of times to retry" paramType="Int" paramDescription="number of times to retry"
description="Set a timeout in milliseconds which, when elapsed, will cause a transaction automatically to be cancelled. This sets the ``retry_limit`` option of each transaction created by this database. See the transaction option description for more information." description="Set a maximum number of retries after which additional calls to ``onError`` will throw the most recently seen error code. This sets the ``retry_limit`` option of each transaction created by this database. See the transaction option description for more information."
defaultFor="501"/> defaultFor="501"/>
<Option name="transaction_max_retry_delay" code="502" <Option name="transaction_max_retry_delay" code="502"
paramType="Int" paramDescription="value in milliseconds of maximum delay" paramType="Int" paramDescription="value in milliseconds of maximum delay"
@ -220,7 +220,7 @@ description is not currently required but encouraged.
<Option name="debug_transaction_identifier" code="403" paramType="String" paramDescription="String identifier to be used when tracing or profiling this transaction. The identifier must not exceed 100 characters." <Option name="debug_transaction_identifier" code="403" paramType="String" paramDescription="String identifier to be used when tracing or profiling this transaction. The identifier must not exceed 100 characters."
description="Sets a client provided identifier for the transaction that will be used in scenarios like tracing or profiling. Client trace logging or transaction profiling must be separately enabled." /> description="Sets a client provided identifier for the transaction that will be used in scenarios like tracing or profiling. Client trace logging or transaction profiling must be separately enabled." />
<Option name="log_transaction" code="404" <Option name="log_transaction" code="404"
description="Enables tracing for this transaction and logs results to the client trace logs. The DEBUG_TRANSACTION_IDENTIFIER option must be set before using this option, and client trace logging must be enabled and to get log output." /> description="Enables tracing for this transaction and logs results to the client trace logs. The DEBUG_TRANSACTION_IDENTIFIER option must be set before using this option, and client trace logging must be enabled to get log output." />
<Option name="transaction_logging_max_field_length" code="405" paramType="Int" paramDescription="Maximum length of escaped key and value fields." <Option name="transaction_logging_max_field_length" code="405" paramType="Int" paramDescription="Maximum length of escaped key and value fields."
description="Sets the maximum escaped length of key and value fields to be logged to the trace file via the LOG_TRANSACTION option, after which the field will be truncated. A negative value disables truncation." /> description="Sets the maximum escaped length of key and value fields to be logged to the trace file via the LOG_TRANSACTION option, after which the field will be truncated. A negative value disables truncation." />
<Option name="timeout" code="500" <Option name="timeout" code="500"
@ -243,7 +243,7 @@ description is not currently required but encouraged.
<Option name="snapshot_ryw_disable" code="601" <Option name="snapshot_ryw_disable" code="601"
description="Snapshot read operations will not see the results of writes done in the same transaction. This was the default behavior prior to API version 300." /> description="Snapshot read operations will not see the results of writes done in the same transaction. This was the default behavior prior to API version 300." />
<Option name="lock_aware" code="700" <Option name="lock_aware" code="700"
description="The transaction can read and write to locked databases, and is resposible for checking that it took the lock."/> description="The transaction can read and write to locked databases, and is responsible for checking that it took the lock."/>
<Option name="used_during_commit_protection_disable" code="701" <Option name="used_during_commit_protection_disable" code="701"
description="By default, operations that are performed on a transaction while it is being committed will not only fail themselves, but they will attempt to fail other in-flight operations (such as the commit) as well. This behavior is intended to help developers discover situations where operations could be unintentionally executed after the transaction has been reset. Setting this option removes that protection, causing only the offending operation to fail."/> description="By default, operations that are performed on a transaction while it is being committed will not only fail themselves, but they will attempt to fail other in-flight operations (such as the commit) as well. This behavior is intended to help developers discover situations where operations could be unintentionally executed after the transaction has been reset. Setting this option removes that protection, causing only the offending operation to fail."/>
<Option name="read_lock_aware" code="702" <Option name="read_lock_aware" code="702"

View File

@ -26,7 +26,6 @@ set(FDBRPC_SRCS
sim2.actor.cpp sim2.actor.cpp
sim_validation.cpp sim_validation.cpp
TimedRequest.h TimedRequest.h
TLSConnection.actor.cpp
TraceFileIO.cpp) TraceFileIO.cpp)
set(FDBRPC_THIRD_PARTY_SRCS set(FDBRPC_THIRD_PARTY_SRCS

View File

@ -233,6 +233,7 @@ struct YieldMockNetwork : INetwork, ReferenceCounted<YieldMockNetwork> {
virtual TaskPriority getCurrentTask() { return baseNetwork->getCurrentTask(); } virtual TaskPriority getCurrentTask() { return baseNetwork->getCurrentTask(); }
virtual void setCurrentTask(TaskPriority taskID) { baseNetwork->setCurrentTask(taskID); } virtual void setCurrentTask(TaskPriority taskID) { baseNetwork->setCurrentTask(taskID); }
virtual double now() { return baseNetwork->now(); } virtual double now() { return baseNetwork->now(); }
virtual double timer() { return baseNetwork->timer(); }
virtual void stop() { return baseNetwork->stop(); } virtual void stop() { return baseNetwork->stop(); }
virtual bool isSimulated() const { return baseNetwork->isSimulated(); } virtual bool isSimulated() const { return baseNetwork->isSimulated(); }
virtual void onMainThread(Promise<Void>&& signal, TaskPriority taskID) { return baseNetwork->onMainThread(std::move(signal), taskID); } virtual void onMainThread(Promise<Void>&& signal, TaskPriority taskID) { return baseNetwork->onMainThread(std::move(signal), taskID); }

View File

@ -302,7 +302,7 @@ ACTOR Future<Void> connectionMonitor( Reference<Peer> peer ) {
state double lastRefreshed = now(); state double lastRefreshed = now();
state int64_t lastBytesReceived = peer->bytesReceived; state int64_t lastBytesReceived = peer->bytesReceived;
loop { loop {
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME)); wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME, TaskPriority::ReadSocket));
if (lastBytesReceived < peer->bytesReceived) { if (lastBytesReceived < peer->bytesReceived) {
lastRefreshed = now(); lastRefreshed = now();
lastBytesReceived = peer->bytesReceived; lastBytesReceived = peer->bytesReceived;
@ -317,7 +317,7 @@ ACTOR Future<Void> connectionMonitor( Reference<Peer> peer ) {
//We cannot let an error be thrown from connectionMonitor while still on the stack from scanPackets in connectionReader //We cannot let an error be thrown from connectionMonitor while still on the stack from scanPackets in connectionReader
//because then it would not call the destructor of connectionReader when connectionReader is cancelled. //because then it would not call the destructor of connectionReader when connectionReader is cancelled.
wait(delay(0)); wait(delay(0, TaskPriority::ReadSocket));
if (peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0) { if (peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0) {
if (peer->peerReferences == 0 && if (peer->peerReferences == 0 &&
@ -332,7 +332,7 @@ ACTOR Future<Void> connectionMonitor( Reference<Peer> peer ) {
} }
} }
wait (delayJittered(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME)); wait (delayJittered(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME, TaskPriority::ReadSocket));
// TODO: Stop monitoring and close the connection with no onDisconnect requests outstanding // TODO: Stop monitoring and close the connection with no onDisconnect requests outstanding
state ReplyPromise<Void> reply; state ReplyPromise<Void> reply;
@ -429,14 +429,15 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
try { try {
choose { choose {
when(Reference<IConnection> _conn = when( Reference<IConnection> _conn = wait( INetworkConnections::net()->connect(self->destination) ) ) {
wait(INetworkConnections::net()->connect(self->destination))) { conn = _conn;
wait(conn->connectHandshake());
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(false)); IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(false));
if (self->unsent.empty()) { if (self->unsent.empty()) {
_conn->close(); conn->close();
conn = Reference<IConnection>();
continue; continue;
} else { } else {
conn = _conn;
TraceEvent("ConnectionExchangingConnectPacket", conn->getDebugID()) TraceEvent("ConnectionExchangingConnectPacket", conn->getDebugID())
.suppressFor(1.0) .suppressFor(1.0)
.detail("PeerAddr", self->destination); .detail("PeerAddr", self->destination);
@ -965,6 +966,7 @@ ACTOR static Future<Void> connectionReader(
ACTOR static Future<Void> connectionIncoming( TransportData* self, Reference<IConnection> conn ) { ACTOR static Future<Void> connectionIncoming( TransportData* self, Reference<IConnection> conn ) {
try { try {
wait(conn->acceptHandshake());
state Promise<Reference<Peer>> onConnected; state Promise<Reference<Peer>> onConnected;
state Future<Void> reader = connectionReader( self, conn, Reference<Peer>(), onConnected ); state Future<Void> reader = connectionReader( self, conn, Reference<Peer>(), onConnected );
choose { choose {
@ -991,11 +993,13 @@ ACTOR static Future<Void> listen( TransportData* self, NetworkAddress listenAddr
try { try {
loop { loop {
Reference<IConnection> conn = wait( listener->accept() ); Reference<IConnection> conn = wait( listener->accept() );
TraceEvent("ConnectionFrom", conn->getDebugID()).suppressFor(1.0) if(conn) {
.detail("FromAddress", conn->getPeerAddress()) TraceEvent("ConnectionFrom", conn->getDebugID()).suppressFor(1.0)
.detail("ListenAddress", listenAddr.toString()); .detail("FromAddress", conn->getPeerAddress())
incoming.add( connectionIncoming(self, conn) ); .detail("ListenAddress", listenAddr.toString());
wait(delay(0) || delay(FLOW_KNOBS->CONNECTION_ACCEPT_DELAY, TaskPriority::WriteSocket)); incoming.add( connectionIncoming(self, conn) );
}
wait(delay(0, TaskPriority::AcceptSocket));
} }
} catch (Error& e) { } catch (Error& e) {
TraceEvent(SevError, "ListenError").error(e); TraceEvent(SevError, "ListenError").error(e);
@ -1119,7 +1123,7 @@ void FlowTransport::removePeerReference(const Endpoint& endpoint, bool isStream)
.detail("Address", endpoint.getPrimaryAddress()) .detail("Address", endpoint.getPrimaryAddress())
.detail("Token", endpoint.token); .detail("Token", endpoint.token);
} }
if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0) { if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0 && peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_UNREFERENCED_CLOSE_DELAY) {
peer->resetPing.trigger(); peer->resetPing.trigger();
} }
} }

View File

@ -1,136 +0,0 @@
/*
* ITLSPlugin.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FDB_ITLSPLUGIN_H
#define FDB_ITLSPLUGIN_H
#pragma once
#include <stdint.h>
struct ITLSSession {
enum { SUCCESS = 0, WANT_READ = -1, WANT_WRITE = -2, FAILED = -3 };
virtual void addref() = 0;
virtual void delref() = 0;
// handshake should return SUCCESS if the handshake is complete,
// FAILED on fatal error, or one of WANT_READ or WANT_WRITE if the
// handshake should be reattempted after more data can be
// read/written on the underlying connection.
virtual int handshake() = 0;
// read should return the (non-zero) number of bytes read,
// WANT_READ or WANT_WRITE if the operation is blocked by the
// underlying stream, or FAILED if there is an error (including a
// closed connection).
virtual int read(uint8_t* data, int length) = 0;
// write should return the (non-zero) number of bytes written, or
// WANT_READ or WANT_WRITE if the operation is blocked by the
// underlying stream, or FAILED if there is an error.
virtual int write(const uint8_t* data, int length) = 0;
};
// Returns the number of bytes sent (possibly 0), or -1 on error
// (including connection close)
typedef int(*TLSSendCallbackFunc)(void* ctx, const uint8_t* buf, int len);
// Returns the number of bytes read (possibly 0), or -1 on error
// (including connection close)
typedef int(*TLSRecvCallbackFunc)(void* ctx, uint8_t* buf, int len);
struct ITLSPolicy {
virtual void addref() = 0;
virtual void delref() = 0;
// set_ca_data should import the provided certificate list and
// associate it with this policy. cert_data will point to a PEM
// encoded certificate list of trust roots.
//
// set_ca_data should return true if the operation succeeded,
// and false otherwise. After the first call to create_session for
// a given policy, set_ca_data should immediately return false
// if called.
virtual bool set_ca_data(const uint8_t* ca_data, int ca_len) = 0;
// set_cert_data should import the provided certificate list and
// associate it with this policy. cert_data will point to a PEM
// encoded certificate list, ordered such that each certificate
// certifies the one before it.
//
// cert_data may additionally contain key information, which must
// be ignored.
//
// set_cert_data should return true if the operation succeeded,
// and false otherwise. After the first call to create_session for
// a given policy, set_cert_data should immediately return false
// if called.
virtual bool set_cert_data(const uint8_t* cert_data, int cert_len) = 0;
// set_key_data should import the provided private key and
// associate it with this policy. key_data will point to a PEM
// encoded key, which may be encrypted. If encrypted the password
// argument should be specified, otherwise it may be NULL.
//
// key_data may additionally contain certificate information,
// which must be ignored.
//
// set_key_data should return true if the operation succeeded, and
// false otherwise. After the first call to create_session for a
// given policy, set_key_data should immediately return false if
// called.
virtual bool set_key_data(const uint8_t* key_data, int key_len, const char* password) = 0;
// set_verify_peers should modify the validation rules for
// verifying a peer during connection handshake. The format of
// verify_peers is implementation specific.
//
// set_verify_peers should return true if the operation succeed,
// and false otherwise. After the first call to create_session for
// a given policy, set_verify_peers should immediately return
// false if called.
virtual bool set_verify_peers(int count, const uint8_t* verify_peers[], int verify_peers_len[]) = 0;
// create_session should return a new object that implements
// ITLSSession, associated with this policy. After the first call
// to create_session for a given policy, further calls to
// ITLSPolicy::set_* will fail and return false.
//
// The newly created session should use send_func and recv_func to
// send and receive data on the underlying transport, and must
// provide send_ctx/recv_ctx to the callbacks.
//
// uid will be used to identify this session within trace events
virtual ITLSSession* create_session(bool is_client, const char *servername, TLSSendCallbackFunc send_func, void* send_ctx, TLSRecvCallbackFunc recv_func, void* recv_ctx, void* uid) = 0;
};
struct ITLSPlugin {
virtual void addref() = 0;
virtual void delref() = 0;
// create_policy should return a new object that implements
// ITLSPolicy.
virtual ITLSPolicy* create_policy() = 0;
static inline const char* get_plugin_type_name_and_version() { return "ITLSPlugin"; }
};
#endif /* FDB_ITLSPLUGIN_H */

View File

@ -18,27 +18,21 @@
* limitations under the License. * limitations under the License.
*/ */
#ifndef _FLOW_LOADPLUGIN_H_
#define _FLOW_LOADPLUGIN_H_
#pragma once #pragma once
// Specialized TLS plugin library #include <string>
extern "C" void *get_tls_plugin(const char *plugin_type_name_and_version); #include "flow/flow.h"
// Name of specialized TLS Plugin
extern const char* tlsPluginName;
template <class T> template <class T>
Reference<T> loadPlugin( std::string const& plugin_name ) { Reference<T> loadPlugin( std::string const& plugin_name ) {
void *(*get_plugin)(const char*) = NULL; void *(*get_plugin)(const char*) = NULL;
#ifndef TLS_DISABLED void* plugin = loadLibrary( plugin_name.c_str() );
if (!plugin_name.compare(tlsPluginName)) { if (plugin)
get_plugin = (void*(*)(const char*)) get_tls_plugin; get_plugin = (void*(*)(const char*))loadFunction( plugin, "get_plugin" );
}
else
#endif
{
void* plugin = loadLibrary( plugin_name.c_str() );
if (plugin)
get_plugin = (void*(*)(const char*))loadFunction( plugin, "get_plugin" );
}
return (get_plugin) ? Reference<T>( (T*)get_plugin( T::get_plugin_type_name_and_version() ) ) : Reference<T>( NULL ); return (get_plugin) ? Reference<T>( (T*)get_plugin( T::get_plugin_type_name_and_version() ) ) : Reference<T>( NULL );
} }
#endif

View File

@ -129,8 +129,7 @@ public:
std::vector<LocalityEntry> const& getEntries() const std::vector<LocalityEntry> const& getEntries() const
{ return _entryArray; } { return _entryArray; }
std::vector<LocalityEntry> const& getMutableEntries() const std::vector<LocalityEntry>& getMutableEntries() { return _mutableEntryArray; }
{ return _mutableEntryArray; }
std::vector<LocalityEntry> const& getGroupEntries() const std::vector<LocalityEntry> const& getGroupEntries() const
{ return _localitygroup->_entryArray; } { return _localitygroup->_entryArray; }
@ -253,7 +252,7 @@ public:
while (nRandomItems > 0) while (nRandomItems > 0)
{ {
if (nItemsLeft <= 0) { if (nRandomItems > nItemsLeft || nItemsLeft <= 0) {
bComplete = false; bComplete = false;
break; break;
} }
@ -479,6 +478,8 @@ public:
Reference<StringToIntMap> _keymap; Reference<StringToIntMap> _keymap;
virtual std::vector<std::vector<AttribValue>> const& getKeyValueArray() const { return _keyValueArray; }
protected: protected:
virtual Reference<StringToIntMap>& getGroupValueMap() virtual Reference<StringToIntMap>& getGroupValueMap()
{ return _localitygroup->getGroupValueMap(); } { return _localitygroup->getGroupValueMap(); }

View File

@ -119,6 +119,8 @@ struct PolicyAcross : IReplicationPolicy, public ReferenceCounted<PolicyAcross>
explicit PolicyAcross(const PolicyAcross& other) : PolicyAcross(other._count, other._attribKey, other._policy) {} explicit PolicyAcross(const PolicyAcross& other) : PolicyAcross(other._count, other._attribKey, other._policy) {}
virtual ~PolicyAcross(); virtual ~PolicyAcross();
virtual std::string name() const { return "Across"; } virtual std::string name() const { return "Across"; }
std::string embeddedPolicyName() const { return _policy->name(); }
int getCount() const { return _count; }
virtual std::string info() const { return format("%s^%d x ", _attribKey.c_str(), _count) + _policy->info(); } virtual std::string info() const { return format("%s^%d x ", _attribKey.c_str(), _count) + _policy->info(); }
virtual int maxResults() const { return _count * _policy->maxResults(); } virtual int maxResults() const { return _count * _policy->maxResults(); }
virtual int depth() const { return 1 + _policy->depth(); } virtual int depth() const { return 1 + _policy->depth(); }

View File

@ -82,14 +82,63 @@ double ratePolicy(
return rating; return rating;
} }
bool findBestPolicySet( int mostUsedZoneCount(Reference<LocalitySet>& logServerSet, std::vector<LocalityEntry>& bestSet) {
std::vector<LocalityEntry>& bestResults, AttribKey indexKey = logServerSet->keyIndex("zoneid");
Reference<LocalitySet> & localitySet, std::map<AttribValue, int> entries;
Reference<IReplicationPolicy> const& policy, for(int i = 0; i < bestSet.size(); i++) {
unsigned int nMinItems, Optional<AttribValue> value = logServerSet->getRecordViaEntry(bestSet[i])->getValue(indexKey);
unsigned int nSelectTests, entries[value.get()]++;
unsigned int nPolicyTests) }
{ int maxEntries = 0;
for(auto it : entries) {
maxEntries = std::max(maxEntries, it.second);
}
return maxEntries;
}
bool findBestPolicySetSimple(int targetUniqueValueCount, Reference<LocalitySet>& logServerSet, std::vector<LocalityEntry>& bestSet,
int desired) {
auto& mutableEntries = logServerSet->getMutableEntries();
deterministicRandom()->randomShuffle(mutableEntries);
// First make sure the current localitySet is able to fulfuill the policy
AttribKey indexKey = logServerSet->keyIndex("zoneid");
int uniqueValueCount = logServerSet->getKeyValueArray()[indexKey._id].size();
if (uniqueValueCount < targetUniqueValueCount) {
// logServerSet won't be able to fulfill the policy
return false;
}
std::map<AttribValue, std::vector<int>> entries;
for(int i = 0; i < mutableEntries.size(); i++) {
Optional<AttribValue> value = logServerSet->getRecord(mutableEntries[i]._id)->getValue(indexKey);
if (value.present()) {
entries[value.get()].push_back(i);
}
}
ASSERT_WE_THINK(uniqueValueCount == entries.size());
desired = std::max(desired, targetUniqueValueCount);
auto it = entries.begin();
while (bestSet.size() < desired) {
if(it->second.size()) {
bestSet.push_back(mutableEntries[it->second.back()]);
it->second.pop_back();
}
++it;
if(it == entries.end()) {
it = entries.begin();
}
}
return true;
}
bool findBestPolicySetExpensive(std::vector<LocalityEntry>& bestResults, Reference<LocalitySet>& localitySet,
Reference<IReplicationPolicy> const& policy, unsigned int nMinItems,
unsigned int nSelectTests, unsigned int nPolicyTests) {
bool bSucceeded = true; bool bSucceeded = true;
Reference<LocalitySet> bestLocalitySet, testLocalitySet; Reference<LocalitySet> bestLocalitySet, testLocalitySet;
std::vector<LocalityEntry> results; std::vector<LocalityEntry> results;
@ -113,9 +162,7 @@ bool findBestPolicySet(
} }
// Get some additional random items, if needed // Get some additional random items, if needed
if ((nMinItems > results.size()) && if ((nMinItems > results.size()) && (!localitySet->random(results, results, nMinItems - results.size()))) {
(!localitySet->random(results, results, nMinItems-results.size())))
{
bSucceeded = false; bSucceeded = false;
break; break;
} }
@ -158,6 +205,53 @@ bool findBestPolicySet(
return bSucceeded; return bSucceeded;
} }
bool findBestPolicySet(std::vector<LocalityEntry>& bestResults, Reference<LocalitySet>& localitySet,
Reference<IReplicationPolicy> const& policy, unsigned int nMinItems, unsigned int nSelectTests,
unsigned int nPolicyTests) {
bool bestFound = false;
// Specialization for policies of shape:
// - PolicyOne()
// - PolicyAcross(,"zoneId",PolicyOne())
// - TODO: More specializations for common policies
if (policy->name() == "One") {
bestFound = true;
int count = 0;
auto& mutableEntries = localitySet->getMutableEntries();
deterministicRandom()->randomShuffle(mutableEntries);
for (auto const& entry : mutableEntries) {
bestResults.push_back(entry);
if (++count == nMinItems) break;
}
} else if (policy->name() == "Across") {
PolicyAcross* pa = (PolicyAcross*)policy.getPtr();
std::set<std::string> attributeKeys;
pa->attributeKeys(&attributeKeys);
if (pa->embeddedPolicyName() == "One" && attributeKeys.size() == 1 &&
*attributeKeys.begin() == "zoneid" // This algorithm can actually apply to any field
) {
bestFound = findBestPolicySetSimple(pa->getCount(), localitySet, bestResults, nMinItems);
if (bestFound && g_network->isSimulated()) {
std::vector<LocalityEntry> oldBest;
auto oldBestFound =
findBestPolicySetExpensive(oldBest, localitySet, policy, nMinItems, nSelectTests, nPolicyTests);
if (!oldBestFound) {
TraceEvent(SevError, "FBPSMissmatch").detail("Policy", policy->info());
} else {
ASSERT(mostUsedZoneCount(localitySet, bestResults) <= mostUsedZoneCount(localitySet, oldBest));
}
}
} else {
bestFound =
findBestPolicySetExpensive(bestResults, localitySet, policy, nMinItems, nSelectTests, nPolicyTests);
}
} else {
bestFound = findBestPolicySetExpensive(bestResults, localitySet, policy, nMinItems, nSelectTests, nPolicyTests);
}
return bestFound;
}
bool findBestUniquePolicySet( bool findBestUniquePolicySet(
std::vector<LocalityEntry>& bestResults, std::vector<LocalityEntry>& bestResults,
Reference<LocalitySet> & localitySet, Reference<LocalitySet> & localitySet,

View File

@ -1,545 +0,0 @@
/*
* TLSConnection.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <memory>
#include "flow/flow.h"
#include "flow/network.h"
#include "flow/Knobs.h"
#include "fdbrpc/TLSConnection.h"
#include "fdbrpc/ITLSPlugin.h"
#include "fdbrpc/LoadPlugin.h"
#include "fdbrpc/Platform.h"
#include "fdbrpc/IAsyncFile.h"
#include "flow/actorcompiler.h" // This must be the last #include.
// Name of specialized TLS Plugin
const char* tlsPluginName = "fdb-libressl-plugin";
// Must not throw an exception from this function!
static int send_func(void* ctx, const uint8_t* buf, int len) {
TLSConnection* conn = (TLSConnection*)ctx;
try {
SendBuffer sb;
sb.bytes_sent = 0;
sb.bytes_written = len;
sb.data = buf;
sb.next = 0;
int w = conn->conn->write( &sb );
return w;
} catch ( Error& e ) {
TraceEvent("TLSConnectionSendError", conn->getDebugID()).suppressFor(1.0).detail("Peer", conn->getPeerAddress().toString()).error(e);
return -1;
} catch ( ... ) {
TraceEvent("TLSConnectionSendError", conn->getDebugID()).suppressFor(1.0).detail("Peer", conn->getPeerAddress()).error( unknown_error() );
return -1;
}
}
// Must not throw an exception from this function!
static int recv_func(void* ctx, uint8_t* buf, int len) {
TLSConnection* conn = (TLSConnection*)ctx;
try {
int r = conn->conn->read( buf, buf + len );
return r;
} catch ( Error& e ) {
TraceEvent("TLSConnectionRecvError", conn->getDebugID()).suppressFor(1.0).detail("Peer", conn->getPeerAddress()).error(e);
return -1;
} catch ( ... ) {
TraceEvent("TLSConnectionRecvError", conn->getDebugID()).suppressFor(1.0).detail("Peer", conn->getPeerAddress()).error( unknown_error() );
return -1;
}
}
ACTOR static Future<Void> handshake( TLSConnection* self ) {
state std::pair<IPAddress,uint16_t> peerIP = std::make_pair(self->conn->getPeerAddress().ip, self->is_client ? self->conn->getPeerAddress().port : static_cast<uint16_t>(0));
if(!self->is_client) {
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
if (now() < iter->second.second) {
if(iter->second.first >= FLOW_KNOBS->TLS_SERVER_CONNECTION_THROTTLE_ATTEMPTS) {
TraceEvent("TLSIncomingConnectionThrottlingWarning", self->getDebugID()).suppressFor(1.0).detail("PeerIP", peerIP.first.toString());
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT));
throw connection_failed();
}
} else {
g_network->networkInfo.serverTLSConnectionThrottler.erase(peerIP);
}
}
}
loop {
int r = self->session->handshake();
if(BUGGIFY_WITH_PROB(0.001)) {
r = ITLSSession::FAILED;
}
if ( r == ITLSSession::SUCCESS ) break;
if ( r == ITLSSession::FAILED ) {
TraceEvent("TLSConnectionHandshakeError", self->getDebugID()).suppressFor(1.0).detail("Peer", self->getPeerAddress());
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
iter->second.first++;
} else {
g_network->networkInfo.serverTLSConnectionThrottler[peerIP] = std::make_pair(0,now() + (self->is_client ? FLOW_KNOBS->TLS_CLIENT_CONNECTION_THROTTLE_TIMEOUT : FLOW_KNOBS->TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT));
}
throw connection_failed();
}
ASSERT( r == ITLSSession::WANT_WRITE || r == ITLSSession::WANT_READ );
wait( r == ITLSSession::WANT_WRITE ? self->conn->onWritable() : self->conn->onReadable() );
}
TraceEvent("TLSConnectionHandshakeSuccessful", self->getDebugID()).suppressFor(1.0).detail("Peer", self->getPeerAddress());
return Void();
}
TLSConnection::TLSConnection( Reference<IConnection> const& conn, Reference<ITLSPolicy> const& policy, bool is_client, std::string host) : conn(conn), write_wants(0), read_wants(0), uid(conn->getDebugID()), is_client(is_client) {
const char * serverName = host.empty() ? NULL : host.c_str();
session = Reference<ITLSSession>( policy->create_session(is_client, serverName, send_func, this, recv_func, this, (void*)&uid) );
if ( !session ) {
// If session is NULL, we're trusting policy->create_session
// to have used its provided logging function to have logged
// the error
throw tls_error();
}
handshook = handshake(this);
}
Future<Void> TLSConnection::onWritable() {
if ( !handshook.isReady() )
return handshook;
return
write_wants == ITLSSession::WANT_READ ? conn->onReadable() :
write_wants == ITLSSession::WANT_WRITE ? conn->onWritable() :
Void();
}
Future<Void> TLSConnection::onReadable() {
if ( !handshook.isReady() )
return handshook;
return
read_wants == ITLSSession::WANT_READ ? conn->onReadable() :
read_wants == ITLSSession::WANT_WRITE ? conn->onWritable() :
Void();
}
int TLSConnection::read( uint8_t* begin, uint8_t* end ) {
if ( !handshook.isReady() ) return 0;
handshook.get();
read_wants = 0;
int r = session->read( begin, end - begin );
if ( r > 0 )
return r;
if ( r == ITLSSession::FAILED ) throw connection_failed();
ASSERT( r == ITLSSession::WANT_WRITE || r == ITLSSession::WANT_READ );
read_wants = r;
return 0;
}
int TLSConnection::write( SendBuffer const* buffer, int limit ) {
ASSERT(limit > 0);
if ( !handshook.isReady() ) return 0;
handshook.get();
write_wants = 0;
int toSend = std::min(limit, buffer->bytes_written - buffer->bytes_sent);
ASSERT(toSend);
int w = session->write( buffer->data + buffer->bytes_sent, toSend );
if ( w > 0 )
return w;
if ( w == ITLSSession::FAILED ) throw connection_failed();
ASSERT( w == ITLSSession::WANT_WRITE || w == ITLSSession::WANT_READ );
write_wants = w;
return 0;
}
ACTOR Future<Reference<IConnection>> wrap( Reference<ITLSPolicy> policy, bool is_client, Future<Reference<IConnection>> c, std::string host) {
state Reference<IConnection> conn = wait(c);
try {
state Reference<TLSConnection> tlsConn(new TLSConnection( conn, policy, is_client, host ));
if(is_client) {
wait(tlsConn->handshook);
}
return tlsConn;
} catch( Error &e ) {
conn->close();
throw e;
}
}
Future<Reference<IConnection>> TLSListener::accept() {
return wrap( options->get_policy(TLSOptions::POLICY_VERIFY_PEERS), false, listener->accept(), "");
}
TLSNetworkConnections::TLSNetworkConnections( Reference<TLSOptions> options ) : options(options) {
network = INetworkConnections::net();
g_network->setGlobal(INetwork::enumGlobal::enNetworkConnections, (flowGlobalType) this);
}
ACTOR Future<Reference<IConnection>> waitAndFailConnection() {
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT));
throw connection_failed();
}
Future<Reference<IConnection>> TLSNetworkConnections::connect( NetworkAddress toAddr, std::string host) {
if ( toAddr.isTLS() ) {
NetworkAddress clearAddr( toAddr.ip, toAddr.port, toAddr.isPublic(), false );
std::pair<IPAddress,uint16_t> peerIP = std::make_pair(toAddr.ip, toAddr.port);
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
if (now() < iter->second.second) {
if(iter->second.first >= FLOW_KNOBS->TLS_CLIENT_CONNECTION_THROTTLE_ATTEMPTS) {
TraceEvent("TLSOutgoingConnectionThrottlingWarning").suppressFor(1.0).detail("PeerIP", toAddr);
return waitAndFailConnection();
}
} else {
g_network->networkInfo.serverTLSConnectionThrottler.erase(peerIP);
}
}
TraceEvent("TLSConnectionConnecting").suppressFor(1.0).detail("ToAddr", toAddr);
// For FDB<->FDB connections, we don't have hostnames and can't verify IP
// addresses against certificates, so we have our own peer verifying logic
// to use. For FDB<->external system connections, we can use the standard
// hostname-based certificate verification logic.
if (host.empty() || host == toAddr.ip.toString())
return wrap(options->get_policy(TLSOptions::POLICY_VERIFY_PEERS), true, network->connect(clearAddr), std::string(""));
else
return wrap( options->get_policy(TLSOptions::POLICY_NO_VERIFY_PEERS), true, network->connect( clearAddr ), host );
}
return network->connect( toAddr );
}
Future<std::vector<NetworkAddress>> TLSNetworkConnections::resolveTCPEndpoint( std::string host, std::string service) {
return network->resolveTCPEndpoint( host, service );
}
Reference<IListener> TLSNetworkConnections::listen( NetworkAddress localAddr ) {
if ( localAddr.isTLS() ) {
NetworkAddress clearAddr( localAddr.ip, localAddr.port, localAddr.isPublic(), false );
TraceEvent("TLSConnectionListening").detail("OnAddr", localAddr);
return Reference<IListener>(new TLSListener( options, network->listen( clearAddr ) ));
}
return network->listen( localAddr );
}
// 5MB for loading files into memory
#define CERT_FILE_MAX_SIZE (5 * 1024 * 1024)
void TLSOptions::set_cert_file( std::string const& cert_file ) {
try {
TraceEvent("TLSConnectionSettingCertFile").detail("CertFilePath", cert_file);
policyInfo.cert_path = cert_file;
set_cert_data( readFileBytes( cert_file, CERT_FILE_MAX_SIZE ) );
} catch ( Error& e) {
TraceEvent(SevError, "TLSOptionsSetCertFileError").detail("Filename", cert_file).error(e).GetLastError();
throw;
}
}
void TLSOptions::set_ca_file(std::string const& ca_file) {
try {
TraceEvent("TLSConnectionSettingCAFile").detail("CAPath", ca_file);
policyInfo.ca_path = ca_file;
set_ca_data(readFileBytes(ca_file, CERT_FILE_MAX_SIZE));
}
catch (Error& e) {
TraceEvent(SevError, "TLSOptionsSetCertAError").detail("Filename", ca_file).error(e).GetLastError();
throw;
}
}
void TLSOptions::set_ca_data(std::string const& ca_data) {
if (!policyVerifyPeersSet.get() || !policyVerifyPeersNotSet.get())
init_plugin();
TraceEvent("TLSConnectionSettingCAData").detail("CADataSize", ca_data.size());
policyInfo.ca_contents = Standalone<StringRef>(ca_data);
if (!policyVerifyPeersSet.get()->set_ca_data((const uint8_t*)&ca_data[0], ca_data.size()))
throw tls_error();
if (!policyVerifyPeersNotSet.get()->set_ca_data((const uint8_t*)&ca_data[0], ca_data.size()))
throw tls_error();
ca_set = true;
}
void TLSOptions::set_cert_data( std::string const& cert_data ) {
if (!policyVerifyPeersSet.get() || !policyVerifyPeersNotSet.get())
init_plugin();
TraceEvent("TLSConnectionSettingCertData").detail("CertDataSize", cert_data.size());
policyInfo.cert_contents = Standalone<StringRef>(cert_data);
if ( !policyVerifyPeersSet.get()->set_cert_data( (const uint8_t*)&cert_data[0], cert_data.size() ) )
throw tls_error();
if (!policyVerifyPeersNotSet.get()->set_cert_data((const uint8_t*)&cert_data[0], cert_data.size()))
throw tls_error();
certs_set = true;
}
void TLSOptions::set_key_password(std::string const& password) {
TraceEvent("TLSConnectionSettingPassword");
policyInfo.keyPassword = password;
}
void TLSOptions::set_key_file( std::string const& key_file ) {
try {
TraceEvent("TLSConnectionSettingKeyFile").detail("KeyFilePath", key_file);
policyInfo.key_path = key_file;
set_key_data( readFileBytes( key_file, CERT_FILE_MAX_SIZE ) );
} catch ( Error& e) {
TraceEvent(SevError, "TLSOptionsSetKeyFileError").detail("Filename", key_file).error(e).GetLastError();
throw;
}
}
void TLSOptions::set_key_data( std::string const& key_data ) {
if (!policyVerifyPeersSet.get() || !policyVerifyPeersNotSet.get())
init_plugin();
const char *passphrase = policyInfo.keyPassword.empty() ? NULL : policyInfo.keyPassword.c_str();
TraceEvent("TLSConnectionSettingKeyData").detail("KeyDataSize", key_data.size());
policyInfo.key_contents = Standalone<StringRef>(key_data);
if ( !policyVerifyPeersSet.get()->set_key_data( (const uint8_t*)&key_data[0], key_data.size(), passphrase) )
throw tls_error();
if (!policyVerifyPeersNotSet.get()->set_key_data((const uint8_t*)&key_data[0], key_data.size(), passphrase))
throw tls_error();
key_set = true;
}
void TLSOptions::set_verify_peers( std::vector<std::string> const& verify_peers ) {
if (!policyVerifyPeersSet.get())
init_plugin();
{
TraceEvent e("TLSConnectionSettingVerifyPeers");
for (int i = 0; i < verify_peers.size(); i++)
e.detail(std::string("Value" + std::to_string(i)).c_str(), verify_peers[i].c_str());
}
std::unique_ptr<const uint8_t *[]> verify_peers_arr(new const uint8_t*[verify_peers.size()]);
std::unique_ptr<int[]> verify_peers_len(new int[verify_peers.size()]);
for (int i = 0; i < verify_peers.size(); i++) {
verify_peers_arr[i] = (const uint8_t *)&verify_peers[i][0];
verify_peers_len[i] = verify_peers[i].size();
}
if (!policyVerifyPeersSet.get()->set_verify_peers(verify_peers.size(), verify_peers_arr.get(), verify_peers_len.get()))
throw tls_error();
policyInfo.verify_peers = verify_peers;
verify_peers_set = true;
}
void TLSOptions::register_network() {
// Simulation relies upon being able to call this multiple times, and have it override g_network
// each time it's called.
new TLSNetworkConnections( Reference<TLSOptions>::addRef( this ) );
}
ACTOR static Future<ErrorOr<Standalone<StringRef>>> readEntireFile( std::string filename ) {
state Reference<IAsyncFile> file = wait(IAsyncFileSystem::filesystem()->open(filename, IAsyncFile::OPEN_READONLY | IAsyncFile::OPEN_UNCACHED, 0));
state int64_t filesize = wait(file->size());
state Standalone<StringRef> buf = makeString(filesize);
int rc = wait(file->read(mutateString(buf), filesize, 0));
if (rc != filesize) {
// File modified during read, probably. The mtime should change, and thus we'll be called again.
return tls_error();
}
return buf;
}
ACTOR static Future<Void> watchFileForChanges( std::string filename, AsyncVar<Standalone<StringRef>> *contents_var ) {
state std::time_t lastModTime = wait(IAsyncFileSystem::filesystem()->lastWriteTime(filename));
loop {
wait(delay(FLOW_KNOBS->TLS_CERT_REFRESH_DELAY_SECONDS));
std::time_t modtime = wait(IAsyncFileSystem::filesystem()->lastWriteTime(filename));
if (lastModTime != modtime) {
lastModTime = modtime;
ErrorOr<Standalone<StringRef>> contents = wait(readEntireFile(filename));
if (contents.present()) {
contents_var->set(contents.get());
}
}
}
}
ACTOR static Future<Void> reloadConfigurationOnChange( TLSOptions::PolicyInfo *pci, Reference<ITLSPlugin> plugin, AsyncVar<Reference<ITLSPolicy>> *realVerifyPeersPolicy, AsyncVar<Reference<ITLSPolicy>> *realNoVerifyPeersPolicy ) {
if (FLOW_KNOBS->TLS_CERT_REFRESH_DELAY_SECONDS <= 0) {
return Void();
}
loop {
// Early in bootup, the filesystem might not be initialized yet. Wait until it is.
if (IAsyncFileSystem::filesystem() != nullptr) {
break;
}
wait(delay(1.0));
}
state int mismatches = 0;
state AsyncVar<Standalone<StringRef>> ca_var;
state AsyncVar<Standalone<StringRef>> key_var;
state AsyncVar<Standalone<StringRef>> cert_var;
state std::vector<Future<Void>> lifetimes;
if (!pci->ca_path.empty()) lifetimes.push_back(watchFileForChanges(pci->ca_path, &ca_var));
if (!pci->key_path.empty()) lifetimes.push_back(watchFileForChanges(pci->key_path, &key_var));
if (!pci->cert_path.empty()) lifetimes.push_back(watchFileForChanges(pci->cert_path, &cert_var));
loop {
state Future<Void> ca_changed = ca_var.onChange();
state Future<Void> key_changed = key_var.onChange();
state Future<Void> cert_changed = cert_var.onChange();
wait( ca_changed || key_changed || cert_changed );
if (ca_changed.isReady()) {
TraceEvent(SevInfo, "TLSRefreshCAChanged").detail("path", pci->ca_path).detail("length", ca_var.get().size());
pci->ca_contents = ca_var.get();
}
if (key_changed.isReady()) {
TraceEvent(SevInfo, "TLSRefreshKeyChanged").detail("path", pci->key_path).detail("length", key_var.get().size());
pci->key_contents = key_var.get();
}
if (cert_changed.isReady()) {
TraceEvent(SevInfo, "TLSRefreshCertChanged").detail("path", pci->cert_path).detail("length", cert_var.get().size());
pci->cert_contents = cert_var.get();
}
bool rc = true;
Reference<ITLSPolicy> verifypeers = Reference<ITLSPolicy>(plugin->create_policy());
Reference<ITLSPolicy> noverifypeers = Reference<ITLSPolicy>(plugin->create_policy());
loop {
// Don't actually loop. We're just using loop/break as a `goto err`.
// This loop always ends with an unconditional break.
rc = verifypeers->set_ca_data(pci->ca_contents.begin(), pci->ca_contents.size());
if (!rc) break;
rc = verifypeers->set_key_data(pci->key_contents.begin(), pci->key_contents.size(), pci->keyPassword.c_str());
if (!rc) break;
rc = verifypeers->set_cert_data(pci->cert_contents.begin(), pci->cert_contents.size());
if (!rc) break;
{
std::unique_ptr<const uint8_t *[]> verify_peers_arr(new const uint8_t*[pci->verify_peers.size()]);
std::unique_ptr<int[]> verify_peers_len(new int[pci->verify_peers.size()]);
for (int i = 0; i < pci->verify_peers.size(); i++) {
verify_peers_arr[i] = (const uint8_t *)&pci->verify_peers[i][0];
verify_peers_len[i] = pci->verify_peers[i].size();
}
rc = verifypeers->set_verify_peers(pci->verify_peers.size(), verify_peers_arr.get(), verify_peers_len.get());
if (!rc) break;
}
rc = noverifypeers->set_ca_data(pci->ca_contents.begin(), pci->ca_contents.size());
if (!rc) break;
rc = noverifypeers->set_key_data(pci->key_contents.begin(), pci->key_contents.size(), pci->keyPassword.c_str());
if (!rc) break;
rc = noverifypeers->set_cert_data(pci->cert_contents.begin(), pci->cert_contents.size());
if (!rc) break;
break;
}
if (rc) {
TraceEvent(SevInfo, "TLSCertificateRefreshSucceeded");
realVerifyPeersPolicy->set(verifypeers);
realNoVerifyPeersPolicy->set(noverifypeers);
mismatches = 0;
} else {
// Some files didn't match up, they should in the future, and we'll retry then.
mismatches++;
TraceEvent(SevWarn, "TLSCertificateRefreshMismatch").detail("mismatches", mismatches);
}
}
}
const char *defaultCertFileName = "fdb.pem";
Reference<ITLSPolicy> TLSOptions::get_policy(PolicyType type) {
if ( !certs_set ) {
if ( !platform::getEnvironmentVar( "FDB_TLS_CERTIFICATE_FILE", policyInfo.cert_path ) )
policyInfo.cert_path = fileExists(defaultCertFileName) ? defaultCertFileName : joinPath(platform::getDefaultConfigPath(), defaultCertFileName);
set_cert_file( policyInfo.cert_path );
}
if ( !key_set ) {
if ( policyInfo.keyPassword.empty() )
platform::getEnvironmentVar( "FDB_TLS_PASSWORD", policyInfo.keyPassword );
if ( !platform::getEnvironmentVar( "FDB_TLS_KEY_FILE", policyInfo.key_path ) )
policyInfo.key_path = fileExists(defaultCertFileName) ? defaultCertFileName : joinPath(platform::getDefaultConfigPath(), defaultCertFileName);
set_key_file( policyInfo.key_path );
}
if( !verify_peers_set ) {
std::string verify_peers;
if (platform::getEnvironmentVar("FDB_TLS_VERIFY_PEERS", verify_peers))
set_verify_peers({ verify_peers });
else
set_verify_peers({ std::string("Check.Valid=1")});
}
if (!ca_set) {
if (platform::getEnvironmentVar("FDB_TLS_CA_FILE", policyInfo.ca_path))
set_ca_file(policyInfo.ca_path);
}
if (!configurationReloader.present()) {
configurationReloader = reloadConfigurationOnChange(&policyInfo, plugin, &policyVerifyPeersSet, &policyVerifyPeersNotSet);
}
Reference<ITLSPolicy> policy;
switch (type) {
case POLICY_VERIFY_PEERS:
policy = policyVerifyPeersSet.get();
break;
case POLICY_NO_VERIFY_PEERS:
policy = policyVerifyPeersNotSet.get();
break;
default:
ASSERT_ABORT(0);
}
return policy;
}
void TLSOptions::init_plugin() {
TraceEvent("TLSConnectionLoadingPlugin").detail("Plugin", tlsPluginName);
plugin = loadPlugin<ITLSPlugin>( tlsPluginName );
if ( !plugin ) {
TraceEvent(SevError, "TLSConnectionPluginInitError").detail("Plugin", tlsPluginName).GetLastError();
throw tls_error();
}
policyVerifyPeersSet = AsyncVar<Reference<ITLSPolicy>>(Reference<ITLSPolicy>(plugin->create_policy()));
if ( !policyVerifyPeersSet.get()) {
// Hopefully create_policy logged something with the log func
TraceEvent(SevError, "TLSConnectionCreatePolicyVerifyPeersSetError");
throw tls_error();
}
policyVerifyPeersNotSet = AsyncVar<Reference<ITLSPolicy>>(Reference<ITLSPolicy>(plugin->create_policy()));
if (!policyVerifyPeersNotSet.get()) {
// Hopefully create_policy logged something with the log func
TraceEvent(SevError, "TLSConnectionCreatePolicyVerifyPeersNotSetError");
throw tls_error();
}
}
bool TLSOptions::enabled() {
return policyVerifyPeersSet.get().isValid() && policyVerifyPeersNotSet.get().isValid();
}

View File

@ -1,174 +0,0 @@
/*
* TLSConnection.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FLOW_TLSCONNECTION_H
#define FLOW_TLSCONNECTION_H
#pragma once
#include "flow/Platform.h"
#include "fdbrpc/ITLSPlugin.h"
struct TLSConnection : IConnection, ReferenceCounted<TLSConnection> {
Reference<IConnection> conn;
Reference<ITLSSession> session;
Future<Void> handshook;
int write_wants, read_wants;
UID uid;
bool is_client;
virtual void addref() { ReferenceCounted<TLSConnection>::addref(); }
virtual void delref() { ReferenceCounted<TLSConnection>::delref(); }
TLSConnection( Reference<IConnection> const& conn, Reference<ITLSPolicy> const& policy, bool is_client, std::string host);
~TLSConnection() {
// Here for ordering to make sure we delref the ITLSSession
// which has a pointer to this object
session.clear();
}
virtual void close() { conn->close(); }
virtual Future<Void> onWritable();
virtual Future<Void> onReadable();
virtual int read( uint8_t* begin, uint8_t* end );
virtual int write( SendBuffer const* buffer, int limit);
virtual NetworkAddress getPeerAddress() {
NetworkAddress a = conn->getPeerAddress();
return NetworkAddress(a.ip, a.port, a.isPublic(), true);
}
virtual UID getDebugID() { return uid; }
};
struct TLSOptions : ReferenceCounted<TLSOptions> {
enum { OPT_TLS = 100000, OPT_TLS_PLUGIN, OPT_TLS_CERTIFICATES, OPT_TLS_KEY, OPT_TLS_VERIFY_PEERS, OPT_TLS_CA_FILE, OPT_TLS_PASSWORD };
enum PolicyType { POLICY_VERIFY_PEERS = 1, POLICY_NO_VERIFY_PEERS };
TLSOptions() : certs_set(false), key_set(false), verify_peers_set(false), ca_set(false) {
#ifndef TLS_DISABLED
init_plugin( );
#endif
}
void set_cert_file( std::string const& cert_file );
void set_cert_data( std::string const& cert_data );
void set_ca_file(std::string const& ca_file);
void set_ca_data(std::string const& ca_data);
// If there is a passphrase, this api should be called prior to setting key for the passphrase to be used
void set_key_password( std::string const& password );
void set_key_file( std::string const& key_file );
void set_key_data( std::string const& key_data );
void set_verify_peers( std::vector<std::string> const& verify_peers );
void register_network();
Reference<ITLSPolicy> get_policy(PolicyType type);
bool enabled();
struct PolicyInfo {
std::string ca_path;
Standalone<StringRef> ca_contents;
std::string key_path;
std::string keyPassword;
Standalone<StringRef> key_contents;
std::string cert_path;
Standalone<StringRef> cert_contents;
std::vector<std::string> verify_peers;
};
private:
void init_plugin();
Reference<ITLSPlugin> plugin;
PolicyInfo policyInfo;
AsyncVar<Reference<ITLSPolicy>> policyVerifyPeersSet;
AsyncVar<Reference<ITLSPolicy>> policyVerifyPeersNotSet;
Optional<Future<Void>> configurationReloader;
bool certs_set, key_set, verify_peers_set, ca_set;
};
struct TLSListener : IListener, ReferenceCounted<TLSListener> {
Reference<IListener> listener;
Reference<TLSOptions> options;
TLSListener( Reference<TLSOptions> options, Reference<IListener> listener ) : options(options), listener(listener) {}
virtual void addref() { ReferenceCounted<TLSListener>::addref(); }
virtual void delref() { ReferenceCounted<TLSListener>::delref(); }
virtual Future<Reference<IConnection>> accept();
virtual NetworkAddress getListenAddress() { return listener->getListenAddress(); }
};
struct TLSNetworkConnections : INetworkConnections {
INetworkConnections *network;
explicit TLSNetworkConnections( Reference<TLSOptions> options );
virtual Future<Reference<IConnection>> connect( NetworkAddress toAddr, std::string host );
virtual Future<std::vector<NetworkAddress>> resolveTCPEndpoint( std::string host, std::string service);
virtual Reference<IListener> listen( NetworkAddress localAddr );
private:
Reference<TLSOptions> options;
};
#define TLS_PLUGIN_FLAG "--tls_plugin"
#define TLS_CERTIFICATE_FILE_FLAG "--tls_certificate_file"
#define TLS_KEY_FILE_FLAG "--tls_key_file"
#define TLS_VERIFY_PEERS_FLAG "--tls_verify_peers"
#define TLS_CA_FILE_FLAG "--tls_ca_file"
#define TLS_PASSWORD_FLAG "--tls_password"
#define TLS_OPTION_FLAGS \
{ TLSOptions::OPT_TLS_PLUGIN, TLS_PLUGIN_FLAG, SO_REQ_SEP }, \
{ TLSOptions::OPT_TLS_CERTIFICATES, TLS_CERTIFICATE_FILE_FLAG, SO_REQ_SEP }, \
{ TLSOptions::OPT_TLS_KEY, TLS_KEY_FILE_FLAG, SO_REQ_SEP }, \
{ TLSOptions::OPT_TLS_VERIFY_PEERS, TLS_VERIFY_PEERS_FLAG, SO_REQ_SEP }, \
{ TLSOptions::OPT_TLS_PASSWORD, TLS_PASSWORD_FLAG, SO_REQ_SEP }, \
{ TLSOptions::OPT_TLS_CA_FILE, TLS_CA_FILE_FLAG, SO_REQ_SEP },
#define TLS_HELP \
" " TLS_CERTIFICATE_FILE_FLAG " CERTFILE\n" \
" The path of a file containing the TLS certificate and CA\n" \
" chain.\n" \
" " TLS_CA_FILE_FLAG " CERTAUTHFILE\n" \
" The path of a file containing the CA certificates chain.\n" \
" " TLS_KEY_FILE_FLAG " KEYFILE\n" \
" The path of a file containing the private key corresponding\n" \
" to the TLS certificate.\n" \
" " TLS_PASSWORD_FLAG " PASSCODE\n" \
" The passphrase of encrypted private key\n" \
" " TLS_VERIFY_PEERS_FLAG " CONSTRAINTS\n" \
" The constraints by which to validate TLS peers. The contents\n" \
" and format of CONSTRAINTS are plugin-specific.\n"
#endif /* FLOW_TLSCONNECTION_H */

View File

@ -33,7 +33,6 @@
<ClCompile Include="ReplicationTypes.cpp" /> <ClCompile Include="ReplicationTypes.cpp" />
<ClCompile Include="ReplicationPolicy.cpp" /> <ClCompile Include="ReplicationPolicy.cpp" />
<ClCompile Include="sim_validation.cpp" /> <ClCompile Include="sim_validation.cpp" />
<ActorCompiler Include="TLSConnection.actor.cpp" />
<ClCompile Include="TraceFileIO.cpp" /> <ClCompile Include="TraceFileIO.cpp" />
<ClCompile Include="zlib\gzwrite.c" /> <ClCompile Include="zlib\gzwrite.c" />
<ClCompile Include="zlib\gzclose.c" /> <ClCompile Include="zlib\gzclose.c" />
@ -88,7 +87,6 @@
<ClInclude Include="Platform.h" /> <ClInclude Include="Platform.h" />
<ClInclude Include="fdbrpc.h" /> <ClInclude Include="fdbrpc.h" />
<ClInclude Include="FlowTransport.h" /> <ClInclude Include="FlowTransport.h" />
<ClInclude Include="ITLSPlugin.h" />
<ClInclude Include="libcoroutine\Base.h" /> <ClInclude Include="libcoroutine\Base.h" />
<ClInclude Include="libcoroutine\Common.h" /> <ClInclude Include="libcoroutine\Common.h" />
<ClInclude Include="libcoroutine\Coro.h" /> <ClInclude Include="libcoroutine\Coro.h" />
@ -107,7 +105,6 @@
<ClInclude Include="sim_validation.h" /> <ClInclude Include="sim_validation.h" />
<ClInclude Include="Smoother.h" /> <ClInclude Include="Smoother.h" />
<ClInclude Include="TimedRequest.h" /> <ClInclude Include="TimedRequest.h" />
<ClInclude Include="TLSConnection.h" />
<ClInclude Include="TraceFileIO.h" /> <ClInclude Include="TraceFileIO.h" />
<ClInclude Include="zlib\zlib.h" /> <ClInclude Include="zlib\zlib.h" />
<ClInclude Include="zlib\deflate.h" /> <ClInclude Include="zlib\deflate.h" />

View File

@ -10,7 +10,6 @@
<ActorCompiler Include="AsyncFileCached.actor.cpp" /> <ActorCompiler Include="AsyncFileCached.actor.cpp" />
<ActorCompiler Include="AsyncFileNonDurable.actor.h" /> <ActorCompiler Include="AsyncFileNonDurable.actor.h" />
<ActorCompiler Include="AsyncFileNonDurable.actor.cpp" /> <ActorCompiler Include="AsyncFileNonDurable.actor.cpp" />
<ActorCompiler Include="TLSConnection.actor.cpp" />
<ActorCompiler Include="dsltest.actor.cpp" /> <ActorCompiler Include="dsltest.actor.cpp" />
<ActorCompiler Include="FlowTests.actor.cpp" /> <ActorCompiler Include="FlowTests.actor.cpp" />
<ActorCompiler Include="genericactors.actor.cpp" /> <ActorCompiler Include="genericactors.actor.cpp" />
@ -128,7 +127,6 @@
<ClInclude Include="zlib\inftrees.h"> <ClInclude Include="zlib\inftrees.h">
<Filter>zlib</Filter> <Filter>zlib</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="ITLSPlugin.h" />
<ClInclude Include="FailureMonitor.h" /> <ClInclude Include="FailureMonitor.h" />
<ClInclude Include="FlowTransport.h" /> <ClInclude Include="FlowTransport.h" />
<ClInclude Include="IAsyncFile.h" /> <ClInclude Include="IAsyncFile.h" />
@ -143,7 +141,6 @@
<ClInclude Include="RangeMap.h" /> <ClInclude Include="RangeMap.h" />
<ClInclude Include="Smoother.h" /> <ClInclude Include="Smoother.h" />
<ClInclude Include="TraceFileIO.h" /> <ClInclude Include="TraceFileIO.h" />
<ClInclude Include="TLSConnection.h" />
<ClInclude Include="IRateControl.h" /> <ClInclude Include="IRateControl.h" />
<ClInclude Include="Replication.h" /> <ClInclude Include="Replication.h" />
<ClInclude Include="ReplicationTypes.h" /> <ClInclude Include="ReplicationTypes.h" />

View File

@ -200,6 +200,9 @@ struct Sim2Conn : IConnection, ReferenceCounted<Sim2Conn> {
virtual void delref() { ReferenceCounted<Sim2Conn>::delref(); } virtual void delref() { ReferenceCounted<Sim2Conn>::delref(); }
virtual void close() { closedByCaller = true; closeInternal(); } virtual void close() { closedByCaller = true; closeInternal(); }
virtual Future<Void> acceptHandshake() { return delay(0.01*deterministicRandom()->random01()); }
virtual Future<Void> connectHandshake() { return delay(0.01*deterministicRandom()->random01()); }
virtual Future<Void> onWritable() { return whenWritable(this); } virtual Future<Void> onWritable() { return whenWritable(this); }
virtual Future<Void> onReadable() { return whenReadable(this); } virtual Future<Void> onReadable() { return whenReadable(this); }
@ -756,6 +759,12 @@ public:
// Everything actually network related is delegated to the Sim2Net class; Sim2 is only concerned with simulating machines and time // Everything actually network related is delegated to the Sim2Net class; Sim2 is only concerned with simulating machines and time
virtual double now() { return time; } virtual double now() { return time; }
// timer() can be up to one second ahead of now()
virtual double timer() {
timerTime += deterministicRandom()->random01()*(time+1.0-timerTime)/2.0;
return timerTime;
}
virtual Future<class Void> delay( double seconds, TaskPriority taskID ) { virtual Future<class Void> delay( double seconds, TaskPriority taskID ) {
ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max); ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max);
return delay( seconds, taskID, currentProcess ); return delay( seconds, taskID, currentProcess );
@ -806,7 +815,7 @@ public:
} }
// Sets the taskID/priority of the current task, without yielding // Sets the taskID/priority of the current task, without yielding
virtual Future<Reference<IConnection>> connect( NetworkAddress toAddr, std::string host ) { virtual Future<Reference<IConnection>> connect( NetworkAddress toAddr, std::string host ) {
ASSERT( !toAddr.isTLS() && host.empty()); ASSERT( host.empty());
if (!addressMap.count( toAddr )) { if (!addressMap.count( toAddr )) {
return waitForProcessAndConnect( toAddr, this ); return waitForProcessAndConnect( toAddr, this );
} }
@ -824,7 +833,7 @@ public:
} else { } else {
localIp = IPAddress(getCurrentProcess()->address.ip.toV4() + deterministicRandom()->randomInt(0, 256)); localIp = IPAddress(getCurrentProcess()->address.ip.toV4() + deterministicRandom()->randomInt(0, 256));
} }
peerc->connect(myc, NetworkAddress(localIp, deterministicRandom()->randomInt(40000, 60000))); peerc->connect(myc, NetworkAddress(localIp, deterministicRandom()->randomInt(40000, 60000), false, toAddr.isTLS()));
((Sim2Listener*)peerp->getListener(toAddr).getPtr())->incomingConnection( 0.5*deterministicRandom()->random01(), Reference<IConnection>(peerc) ); ((Sim2Listener*)peerp->getListener(toAddr).getPtr())->incomingConnection( 0.5*deterministicRandom()->random01(), Reference<IConnection>(peerc) );
return onConnect( ::delay(0.5*deterministicRandom()->random01()), myc ); return onConnect( ::delay(0.5*deterministicRandom()->random01()), myc );
@ -845,7 +854,6 @@ public:
return conn; return conn;
} }
virtual Reference<IListener> listen( NetworkAddress localAddr ) { virtual Reference<IListener> listen( NetworkAddress localAddr ) {
ASSERT( !localAddr.isTLS() );
Reference<IListener> listener( getCurrentProcess()->getListener(localAddr) ); Reference<IListener> listener( getCurrentProcess()->getListener(localAddr) );
ASSERT(listener); ASSERT(listener);
return listener; return listener;
@ -994,7 +1002,7 @@ public:
Future<Void> loopFuture = runLoop(this); Future<Void> loopFuture = runLoop(this);
net2->run(); net2->run();
} }
virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, uint16_t listenPerProcess, virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, bool sslEnabled, uint16_t listenPerProcess,
LocalityData locality, ProcessClass startingClass, const char* dataFolder, LocalityData locality, ProcessClass startingClass, const char* dataFolder,
const char* coordinationFolder) { const char* coordinationFolder) {
ASSERT( locality.machineId().present() ); ASSERT( locality.machineId().present() );
@ -1023,14 +1031,14 @@ public:
} }
NetworkAddressList addresses; NetworkAddressList addresses;
addresses.address = NetworkAddress(ip, port, true, false); addresses.address = NetworkAddress(ip, port, true, sslEnabled);
if(listenPerProcess == 2) { if(listenPerProcess == 2) {
addresses.secondaryAddress = NetworkAddress(ip, port+1, true, false); addresses.secondaryAddress = NetworkAddress(ip, port+1, true, false);
} }
ProcessInfo* m = new ProcessInfo(name, locality, startingClass, addresses, this, dataFolder, coordinationFolder); ProcessInfo* m = new ProcessInfo(name, locality, startingClass, addresses, this, dataFolder, coordinationFolder);
for (int processPort = port; processPort < port + listenPerProcess; ++processPort) { for (int processPort = port; processPort < port + listenPerProcess; ++processPort) {
NetworkAddress address(ip, processPort, true, false); // SOMEDAY see above about becoming SSL! NetworkAddress address(ip, processPort, true, sslEnabled && processPort == port);
m->listenerMap[address] = Reference<IListener>( new Sim2Listener(m, address) ); m->listenerMap[address] = Reference<IListener>( new Sim2Listener(m, address) );
addressMap[address] = m; addressMap[address] = m;
} }
@ -1563,7 +1571,7 @@ public:
return processes; return processes;
} }
virtual ProcessInfo* getProcessByAddress( NetworkAddress const& address ) { virtual ProcessInfo* getProcessByAddress( NetworkAddress const& address ) {
NetworkAddress normalizedAddress(address.ip, address.port, true, false); NetworkAddress normalizedAddress(address.ip, address.port, true, address.isTLS());
ASSERT( addressMap.count( normalizedAddress ) ); ASSERT( addressMap.count( normalizedAddress ) );
return addressMap[ normalizedAddress ]; return addressMap[ normalizedAddress ];
} }
@ -1587,7 +1595,7 @@ public:
machines.erase(machineId); machines.erase(machineId);
} }
Sim2() : time(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(TaskPriority::Zero) { Sim2() : time(0.0), timerTime(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(TaskPriority::Zero) {
// Not letting currentProcess be NULL eliminates some annoying special cases // Not letting currentProcess be NULL eliminates some annoying special cases
currentProcess = new ProcessInfo("NoMachine", LocalityData(Optional<Standalone<StringRef>>(), StringRef(), StringRef(), StringRef()), ProcessClass(), {NetworkAddress()}, this, "", ""); currentProcess = new ProcessInfo("NoMachine", LocalityData(Optional<Standalone<StringRef>>(), StringRef(), StringRef(), StringRef()), ProcessClass(), {NetworkAddress()}, this, "", "");
g_network = net2 = newNet2(false, true); g_network = net2 = newNet2(false, true);
@ -1623,6 +1631,7 @@ public:
else { else {
mutex.enter(); mutex.enter();
this->time = t.time; this->time = t.time;
this->timerTime = std::max(this->timerTime, this->time);
mutex.leave(); mutex.leave();
this->currentProcess = t.machine; this->currentProcess = t.machine;
@ -1675,6 +1684,7 @@ public:
//time is guarded by ISimulator::mutex. It is not necessary to guard reads on the main thread because //time is guarded by ISimulator::mutex. It is not necessary to guard reads on the main thread because
//time should only be modified from the main thread. //time should only be modified from the main thread.
double time; double time;
double timerTime;
TaskPriority currentTaskID; TaskPriority currentTaskID;
//taskCount is guarded by ISimulator::mutex //taskCount is guarded by ISimulator::mutex
@ -1717,7 +1727,7 @@ ACTOR void doReboot( ISimulator::ProcessInfo *p, ISimulator::KillType kt ) {
TEST( kt == ISimulator::RebootAndDelete ); // Simulated machine rebooted with data and coordination state deletion TEST( kt == ISimulator::RebootAndDelete ); // Simulated machine rebooted with data and coordination state deletion
TEST( kt == ISimulator::RebootProcessAndDelete ); // Simulated process rebooted with data and coordination state deletion TEST( kt == ISimulator::RebootProcessAndDelete ); // Simulated process rebooted with data and coordination state deletion
if( p->rebooting ) if( p->rebooting || !p->isReliable() )
return; return;
TraceEvent("RebootingProcess").detail("KillType", kt).detail("Address", p->address).detail("ZoneId", p->locality.zoneId()).detail("DataHall", p->locality.dataHallId()).detail("Locality", p->locality.toString()).detail("Failed", p->failed).detail("Excluded", p->excluded).detail("Cleared", p->cleared).backtrace(); TraceEvent("RebootingProcess").detail("KillType", kt).detail("Address", p->address).detail("ZoneId", p->locality.zoneId()).detail("DataHall", p->locality.dataHallId()).detail("Locality", p->locality.toString()).detail("Failed", p->failed).detail("Excluded", p->excluded).detail("Cleared", p->cleared).backtrace();
p->rebooting = true; p->rebooting = true;

View File

@ -142,7 +142,7 @@ public:
virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0; virtual Future<Void> onProcess( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0;
virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0; virtual Future<Void> onMachine( ISimulator::ProcessInfo *process, TaskPriority taskID = TaskPriority::Zero ) = 0;
virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, uint16_t listenPerProcess, virtual ProcessInfo* newProcess(const char* name, IPAddress ip, uint16_t port, bool sslEnabled, uint16_t listenPerProcess,
LocalityData locality, ProcessClass startingClass, const char* dataFolder, LocalityData locality, ProcessClass startingClass, const char* dataFolder,
const char* coordinationFolder) = 0; const char* coordinationFolder) = 0;
virtual void killProcess( ProcessInfo* machine, KillType ) = 0; virtual void killProcess( ProcessInfo* machine, KillType ) = 0;

View File

@ -57,7 +57,6 @@ struct WorkerInfo : NonCopyable {
ReplyPromise<RegisterWorkerReply> reply; ReplyPromise<RegisterWorkerReply> reply;
Generation gen; Generation gen;
int reboots; int reboots;
double lastAvailableTime;
ProcessClass initialClass; ProcessClass initialClass;
ClusterControllerPriorityInfo priorityInfo; ClusterControllerPriorityInfo priorityInfo;
WorkerDetails details; WorkerDetails details;
@ -65,19 +64,18 @@ struct WorkerInfo : NonCopyable {
Future<Void> haltDistributor; Future<Void> haltDistributor;
Optional<uint16_t> storageCacheInfo; Optional<uint16_t> storageCacheInfo;
WorkerInfo() : gen(-1), reboots(0), lastAvailableTime(now()), priorityInfo(ProcessClass::UnsetFit, false, ClusterControllerPriorityInfo::FitnessUnknown) {} WorkerInfo() : gen(-1), reboots(0), priorityInfo(ProcessClass::UnsetFit, false, ClusterControllerPriorityInfo::FitnessUnknown) {}
WorkerInfo( Future<Void> watcher, ReplyPromise<RegisterWorkerReply> reply, Generation gen, WorkerInterface interf, ProcessClass initialClass, ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo, bool degraded ) : WorkerInfo( Future<Void> watcher, ReplyPromise<RegisterWorkerReply> reply, Generation gen, WorkerInterface interf, ProcessClass initialClass, ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo, bool degraded ) :
watcher(watcher), reply(reply), gen(gen), reboots(0), lastAvailableTime(now()), initialClass(initialClass), priorityInfo(priorityInfo), details(interf, processClass, degraded) {} watcher(watcher), reply(reply), gen(gen), reboots(0), initialClass(initialClass), priorityInfo(priorityInfo), details(interf, processClass, degraded) {}
WorkerInfo( WorkerInfo&& r ) BOOST_NOEXCEPT : watcher(std::move(r.watcher)), reply(std::move(r.reply)), gen(r.gen), WorkerInfo( WorkerInfo&& r ) BOOST_NOEXCEPT : watcher(std::move(r.watcher)), reply(std::move(r.reply)), gen(r.gen),
reboots(r.reboots), lastAvailableTime(r.lastAvailableTime), initialClass(r.initialClass), priorityInfo(r.priorityInfo), details(std::move(r.details)), reboots(r.reboots), initialClass(r.initialClass), priorityInfo(r.priorityInfo), details(std::move(r.details)),
haltRatekeeper(r.haltRatekeeper), haltDistributor(r.haltDistributor), storageCacheInfo(r.storageCacheInfo) {} haltRatekeeper(r.haltRatekeeper), haltDistributor(r.haltDistributor), storageCacheInfo(r.storageCacheInfo) {}
void operator=( WorkerInfo&& r ) BOOST_NOEXCEPT { void operator=( WorkerInfo&& r ) BOOST_NOEXCEPT {
watcher = std::move(r.watcher); watcher = std::move(r.watcher);
reply = std::move(r.reply); reply = std::move(r.reply);
gen = r.gen; gen = r.gen;
reboots = r.reboots; reboots = r.reboots;
lastAvailableTime = r.lastAvailableTime;
initialClass = r.initialClass; initialClass = r.initialClass;
priorityInfo = r.priorityInfo; priorityInfo = r.priorityInfo;
details = std::move(r.details); details = std::move(r.details);
@ -392,7 +390,8 @@ public:
std::vector<LocalityData> tLocalities; std::vector<LocalityData> tLocalities;
// Try to find the best team of servers to fulfill the policy // Try to find the best team of servers to fulfill the policy
if (findBestPolicySet(bestSet, logServerSet, policy, desired, SERVER_KNOBS->POLICY_RATING_TESTS, SERVER_KNOBS->POLICY_GENERATIONS)) { if (findBestPolicySet(bestSet, logServerSet, policy, desired, SERVER_KNOBS->POLICY_RATING_TESTS,
SERVER_KNOBS->POLICY_GENERATIONS)) {
results.reserve(results.size() + bestSet.size()); results.reserve(results.size() + bestSet.size());
for (auto& entry : bestSet) { for (auto& entry : bestSet) {
auto object = logServerMap->getObject(entry); auto object = logServerMap->getObject(entry);
@ -434,8 +433,6 @@ public:
TraceEvent("GetTLogTeamDone").detail("Completed", bCompleted).detail("Policy", policy->info()).detail("Results", results.size()).detail("Processes", logServerSet->size()).detail("Workers", id_worker.size()) TraceEvent("GetTLogTeamDone").detail("Completed", bCompleted).detail("Policy", policy->info()).detail("Results", results.size()).detail("Processes", logServerSet->size()).detail("Workers", id_worker.size())
.detail("Required", required).detail("Desired", desired).detail("RatingTests",SERVER_KNOBS->POLICY_RATING_TESTS).detail("PolicyGenerations",SERVER_KNOBS->POLICY_GENERATIONS); .detail("Required", required).detail("Desired", desired).detail("RatingTests",SERVER_KNOBS->POLICY_RATING_TESTS).detail("PolicyGenerations",SERVER_KNOBS->POLICY_GENERATIONS);
logServerSet->clear();
logServerSet.clear();
return results; return results;
} }
@ -448,7 +445,7 @@ public:
if(satelliteFallback || region.satelliteTLogUsableDcsFallback == 0) { if(satelliteFallback || region.satelliteTLogUsableDcsFallback == 0) {
throw no_more_servers(); throw no_more_servers();
} else { } else {
if(now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) { if(!goodRecruitmentTime.isReady()) {
throw operation_failed(); throw operation_failed();
} }
satelliteFallback = true; satelliteFallback = true;
@ -692,18 +689,8 @@ public:
result.logRouters.push_back(logRouters[i].interf); result.logRouters.push_back(logRouters[i].interf);
} }
if(!remoteStartTime.present()) {
double maxAvailableTime = 0;
for(auto& it : result.remoteTLogs) {
maxAvailableTime = std::max(maxAvailableTime, id_worker[it.locality.processId()].lastAvailableTime);
}
for(auto& it : result.logRouters) {
maxAvailableTime = std::max(maxAvailableTime, id_worker[it.locality.processId()].lastAvailableTime);
}
remoteStartTime = maxAvailableTime;
}
if( now() - remoteStartTime.get() < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY && if( !goodRemoteRecruitmentTime.isReady() &&
( ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredRemoteLogs(), ProcessClass::TLog).betterCount(RoleFitness(remoteLogs, ProcessClass::TLog)) ) || ( ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredRemoteLogs(), ProcessClass::TLog).betterCount(RoleFitness(remoteLogs, ProcessClass::TLog)) ) ||
( RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount, ProcessClass::LogRouter).betterCount(RoleFitness(logRouters, ProcessClass::LogRouter)) ) ) ) { ( RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount, ProcessClass::LogRouter).betterCount(RoleFitness(logRouters, ProcessClass::LogRouter)) ) ) ) {
throw operation_failed(); throw operation_failed();
@ -790,7 +777,7 @@ public:
[](const WorkerDetails& w) { return w.interf; }); [](const WorkerDetails& w) { return w.interf; });
} }
if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY && if( !goodRecruitmentTime.isReady() &&
( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) || ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
( region.satelliteTLogReplicationFactor > 0 && RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId), ProcessClass::TLog).betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog)) ) || ( region.satelliteTLogReplicationFactor > 0 && RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId), ProcessClass::TLog).betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog)) ) ||
RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), ProcessClass::Proxy).betterCount(RoleFitness(proxies, ProcessClass::Proxy)) || RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), ProcessClass::Proxy).betterCount(RoleFitness(proxies, ProcessClass::Proxy)) ||
@ -827,7 +814,7 @@ public:
} }
throw no_more_servers(); throw no_more_servers();
} catch( Error& e ) { } catch( Error& e ) {
if (now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY && regions[1].dcId != clusterControllerDcId.get()) { if (!goodRemoteRecruitmentTime.isReady() && regions[1].dcId != clusterControllerDcId.get()) {
throw operation_failed(); throw operation_failed();
} }
@ -955,7 +942,7 @@ public:
.detail("DesiredProxies", req.configuration.getDesiredProxies()).detail("ActualProxies", result.proxies.size()) .detail("DesiredProxies", req.configuration.getDesiredProxies()).detail("ActualProxies", result.proxies.size())
.detail("DesiredResolvers", req.configuration.getDesiredResolvers()).detail("ActualResolvers", result.resolvers.size()); .detail("DesiredResolvers", req.configuration.getDesiredResolvers()).detail("ActualResolvers", result.resolvers.size());
if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY && if( !goodRecruitmentTime.isReady() &&
( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) || ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), ProcessClass::Proxy).betterCount(bestFitness.proxy) || RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), ProcessClass::Proxy).betterCount(bestFitness.proxy) ||
RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers(), ProcessClass::Resolver).betterCount(bestFitness.resolver) ) ) { RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers(), ProcessClass::Resolver).betterCount(bestFitness.resolver) ) ) {
@ -1347,11 +1334,13 @@ public:
ActorCollection ac; ActorCollection ac;
UpdateWorkerList updateWorkerList; UpdateWorkerList updateWorkerList;
Future<Void> outstandingRequestChecker; Future<Void> outstandingRequestChecker;
Future<Void> outstandingRemoteRequestChecker;
DBInfo db; DBInfo db;
Database cx; Database cx;
double startTime; double startTime;
Optional<double> remoteStartTime; Future<Void> goodRecruitmentTime;
Future<Void> goodRemoteRecruitmentTime;
Version datacenterVersionDifference; Version datacenterVersionDifference;
PromiseStream<Future<Void>> addActor; PromiseStream<Future<Void>> addActor;
bool versionDifferenceUpdated; bool versionDifferenceUpdated;
@ -1375,8 +1364,9 @@ public:
ClusterControllerData( ClusterControllerFullInterface const& ccInterface, LocalityData const& locality ) ClusterControllerData( ClusterControllerFullInterface const& ccInterface, LocalityData const& locality )
: clusterControllerProcessId(locality.processId()), clusterControllerDcId(locality.dcId()), : clusterControllerProcessId(locality.processId()), clusterControllerDcId(locality.dcId()),
id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), gotProcessClasses(false), id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), outstandingRemoteRequestChecker(Void()), gotProcessClasses(false),
gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0), gotFullyRecoveredConfig(false), startTime(now()), goodRecruitmentTime(Never()),
goodRemoteRecruitmentTime(Never()), datacenterVersionDifference(0),
versionDifferenceUpdated(false), recruitingDistributor(false), recruitRatekeeper(false), versionDifferenceUpdated(false), recruitingDistributor(false), recruitRatekeeper(false),
clusterControllerMetrics("ClusterController", id.toString()), clusterControllerMetrics("ClusterController", id.toString()),
openDatabaseRequests("OpenDatabaseRequests", clusterControllerMetrics), openDatabaseRequests("OpenDatabaseRequests", clusterControllerMetrics),
@ -1424,7 +1414,7 @@ ACTOR Future<Void> clusterWatchDatabase( ClusterControllerData* cluster, Cluster
id_used[cluster->clusterControllerProcessId]++; id_used[cluster->clusterControllerProcessId]++;
state WorkerFitnessInfo masterWorker = cluster->getWorkerForRoleInDatacenter(cluster->clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db->config, id_used); state WorkerFitnessInfo masterWorker = cluster->getWorkerForRoleInDatacenter(cluster->clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db->config, id_used);
if( ( masterWorker.worker.processClass.machineClassFitness( ProcessClass::Master ) > SERVER_KNOBS->EXPECTED_MASTER_FITNESS || masterWorker.worker.interf.locality.processId() == cluster->clusterControllerProcessId ) if( ( masterWorker.worker.processClass.machineClassFitness( ProcessClass::Master ) > SERVER_KNOBS->EXPECTED_MASTER_FITNESS || masterWorker.worker.interf.locality.processId() == cluster->clusterControllerProcessId )
&& now() - cluster->startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY ) { && !cluster->goodRecruitmentTime.isReady() ) {
TraceEvent("CCWDB", cluster->id).detail("Fitness", masterWorker.worker.processClass.machineClassFitness( ProcessClass::Master )); TraceEvent("CCWDB", cluster->id).detail("Fitness", masterWorker.worker.processClass.machineClassFitness( ProcessClass::Master ));
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) ); wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
continue; continue;
@ -1703,9 +1693,11 @@ void checkBetterDDOrRK(ClusterControllerData* self) {
ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) { ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) {
try { try {
wait( delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL) ); wait( delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL) );
while( !self->goodRecruitmentTime.isReady() ) {
wait(self->goodRecruitmentTime);
}
checkOutstandingRecruitmentRequests( self ); checkOutstandingRecruitmentRequests( self );
checkOutstandingRemoteRecruitmentRequests( self );
checkOutstandingStorageRequests( self ); checkOutstandingStorageRequests( self );
checkBetterDDOrRK(self); checkBetterDDOrRK(self);
@ -1715,7 +1707,23 @@ ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) {
TraceEvent("MasterRegistrationKill", self->id).detail("MasterId", self->db.serverInfo->get().read().master.id()); TraceEvent("MasterRegistrationKill", self->id).detail("MasterId", self->db.serverInfo->get().read().master.id());
} }
} catch( Error &e ) { } catch( Error &e ) {
if(e.code() != error_code_operation_failed && e.code() != error_code_no_more_servers) { if(e.code() != error_code_no_more_servers) {
TraceEvent(SevError, "CheckOutstandingError").error(e);
}
}
return Void();
}
ACTOR Future<Void> doCheckOutstandingRemoteRequests( ClusterControllerData* self ) {
try {
wait( delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL) );
while( !self->goodRemoteRecruitmentTime.isReady() ) {
wait(self->goodRemoteRecruitmentTime);
}
checkOutstandingRemoteRecruitmentRequests( self );
} catch( Error &e ) {
if(e.code() != error_code_no_more_servers) {
TraceEvent(SevError, "CheckOutstandingError").error(e); TraceEvent(SevError, "CheckOutstandingError").error(e);
} }
} }
@ -1723,10 +1731,13 @@ ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) {
} }
void checkOutstandingRequests( ClusterControllerData* self ) { void checkOutstandingRequests( ClusterControllerData* self ) {
if( !self->outstandingRequestChecker.isReady() ) if( self->outstandingRemoteRequestChecker.isReady() ) {
return; self->outstandingRemoteRequestChecker = doCheckOutstandingRemoteRequests(self);
}
self->outstandingRequestChecker = doCheckOutstandingRequests(self); if( self->outstandingRequestChecker.isReady() ) {
self->outstandingRequestChecker = doCheckOutstandingRequests(self);
}
} }
ACTOR Future<Void> rebootAndCheck( ClusterControllerData* cluster, Optional<Standalone<StringRef>> processID ) { ACTOR Future<Void> rebootAndCheck( ClusterControllerData* cluster, Optional<Standalone<StringRef>> processID ) {
@ -1734,7 +1745,6 @@ ACTOR Future<Void> rebootAndCheck( ClusterControllerData* cluster, Optional<Stan
auto watcher = cluster->id_worker.find(processID); auto watcher = cluster->id_worker.find(processID);
ASSERT(watcher != cluster->id_worker.end()); ASSERT(watcher != cluster->id_worker.end());
watcher->second.lastAvailableTime = now();
watcher->second.reboots++; watcher->second.reboots++;
wait( delay( g_network->isSimulated() ? SERVER_KNOBS->SIM_SHUTDOWN_TIMEOUT : SERVER_KNOBS->SHUTDOWN_TIMEOUT ) ); wait( delay( g_network->isSimulated() ? SERVER_KNOBS->SIM_SHUTDOWN_TIMEOUT : SERVER_KNOBS->SHUTDOWN_TIMEOUT ) );
} }
@ -1998,7 +2008,7 @@ ACTOR Future<Void> clusterRecruitFromConfiguration( ClusterControllerData* self,
req.reply.send( rep ); req.reply.send( rep );
return Void(); return Void();
} catch (Error& e) { } catch (Error& e) {
if (e.code() == error_code_no_more_servers && now() - self->startTime >= SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) { if (e.code() == error_code_no_more_servers && self->goodRecruitmentTime.isReady()) {
self->outstandingRecruitmentRequests.push_back( req ); self->outstandingRecruitmentRequests.push_back( req );
TraceEvent(SevWarn, "RecruitFromConfigurationNotAvailable", self->id).error(e); TraceEvent(SevWarn, "RecruitFromConfigurationNotAvailable", self->id).error(e);
return Void(); return Void();
@ -2010,7 +2020,7 @@ ACTOR Future<Void> clusterRecruitFromConfiguration( ClusterControllerData* self,
throw; // goodbye, cluster controller throw; // goodbye, cluster controller
} }
} }
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) ); wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
} }
} }
@ -2026,7 +2036,7 @@ ACTOR Future<Void> clusterRecruitRemoteFromConfiguration( ClusterControllerData*
req.reply.send( rep ); req.reply.send( rep );
return Void(); return Void();
} catch (Error& e) { } catch (Error& e) {
if (e.code() == error_code_no_more_servers && self->remoteStartTime.present() && now() - self->remoteStartTime.get() >= SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) { if (e.code() == error_code_no_more_servers && self->goodRemoteRecruitmentTime.isReady()) {
self->outstandingRemoteRecruitmentRequests.push_back( req ); self->outstandingRemoteRecruitmentRequests.push_back( req );
TraceEvent(SevWarn, "RecruitRemoteFromConfigurationNotAvailable", self->id).error(e); TraceEvent(SevWarn, "RecruitRemoteFromConfigurationNotAvailable", self->id).error(e);
return Void(); return Void();
@ -2038,7 +2048,7 @@ ACTOR Future<Void> clusterRecruitRemoteFromConfiguration( ClusterControllerData*
throw; // goodbye, cluster controller throw; // goodbye, cluster controller
} }
} }
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) ); wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
} }
} }
@ -2141,6 +2151,8 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) {
if(info == self->id_worker.end()) { if(info == self->id_worker.end()) {
TraceEvent("ClusterControllerActualWorkers", self->id).detail("WorkerId",w.id()).detail("ProcessId", w.locality.processId()).detail("ZoneId", w.locality.zoneId()).detail("DataHall", w.locality.dataHallId()).detail("PClass", req.processClass.toString()).detail("Workers", self->id_worker.size()); TraceEvent("ClusterControllerActualWorkers", self->id).detail("WorkerId",w.id()).detail("ProcessId", w.locality.processId()).detail("ZoneId", w.locality.zoneId()).detail("DataHall", w.locality.dataHallId()).detail("PClass", req.processClass.toString()).detail("Workers", self->id_worker.size());
self->goodRecruitmentTime = lowPriorityDelay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY);
self->goodRemoteRecruitmentTime = lowPriorityDelay(SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY);
} else { } else {
TraceEvent("ClusterControllerWorkerAlreadyRegistered", self->id).suppressFor(1.0).detail("WorkerId",w.id()).detail("ProcessId", w.locality.processId()).detail("ZoneId", w.locality.zoneId()).detail("DataHall", w.locality.dataHallId()).detail("PClass", req.processClass.toString()).detail("Workers", self->id_worker.size()); TraceEvent("ClusterControllerWorkerAlreadyRegistered", self->id).suppressFor(1.0).detail("WorkerId",w.id()).detail("ProcessId", w.locality.processId()).detail("ZoneId", w.locality.zoneId()).detail("DataHall", w.locality.dataHallId()).detail("PClass", req.processClass.toString()).detail("Workers", self->id_worker.size());
} }
@ -2928,7 +2940,7 @@ ACTOR Future<DataDistributorInterface> startDataDistributor( ClusterControllerDa
throw; throw;
} }
} }
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) ); wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
} }
} }
@ -3002,7 +3014,7 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData *self) {
throw; throw;
} }
} }
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) ); wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
} }
} }

View File

@ -401,7 +401,7 @@ struct LeaderRegisterCollection {
if( !self->pStore->exists() ) if( !self->pStore->exists() )
return Void(); return Void();
OnDemandStore &store = *self->pStore; OnDemandStore &store = *self->pStore;
Standalone<VectorRef<KeyValueRef>> forwardingInfo = wait( store->readRange( fwdKeys ) ); Standalone<RangeResultRef> forwardingInfo = wait( store->readRange( fwdKeys ) );
for( int i = 0; i < forwardingInfo.size(); i++ ) { for( int i = 0; i < forwardingInfo.size(); i++ ) {
LeaderInfo forwardInfo; LeaderInfo forwardInfo;
forwardInfo.forward = true; forwardInfo.forward = true;

View File

@ -251,63 +251,68 @@ public:
virtual int64_t getLoadBytes( bool includeInFlight = true, double inflightPenalty = 1.0 ) { virtual int64_t getLoadBytes( bool includeInFlight = true, double inflightPenalty = 1.0 ) {
int64_t physicalBytes = getLoadAverage(); int64_t physicalBytes = getLoadAverage();
double minFreeSpaceRatio = getMinFreeSpaceRatio(includeInFlight); double minAvailableSpaceRatio = getMinAvailableSpaceRatio(includeInFlight);
int64_t inFlightBytes = includeInFlight ? getDataInFlightToTeam() / servers.size() : 0; int64_t inFlightBytes = includeInFlight ? getDataInFlightToTeam() / servers.size() : 0;
double freeSpaceMultiplier = SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF / ( std::max( std::min( SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF, minFreeSpaceRatio ), 0.000001 ) ); double availableSpaceMultiplier = SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF / ( std::max( std::min( SERVER_KNOBS->FREE_SPACE_RATIO_CUTOFF, minAvailableSpaceRatio ), 0.000001 ) );
if(servers.size()>2) {
//make sure in triple replication the penalty is high enough that you will always avoid a team with a member at 20% free space
availableSpaceMultiplier = availableSpaceMultiplier * availableSpaceMultiplier;
}
if(freeSpaceMultiplier > 1 && deterministicRandom()->random01() < 0.001) if(minAvailableSpaceRatio < SERVER_KNOBS->TARGET_AVAILABLE_SPACE_RATIO) {
TraceEvent(SevWarn, "DiskNearCapacity").detail("FreeSpaceRatio", minFreeSpaceRatio); TraceEvent(SevWarn, "DiskNearCapacity").suppressFor(1.0).detail("AvailableSpaceRatio", minAvailableSpaceRatio);
}
return (physicalBytes + (inflightPenalty*inFlightBytes)) * freeSpaceMultiplier; return (physicalBytes + (inflightPenalty*inFlightBytes)) * availableSpaceMultiplier;
} }
virtual int64_t getMinFreeSpace( bool includeInFlight = true ) { virtual int64_t getMinAvailableSpace( bool includeInFlight = true ) {
int64_t minFreeSpace = std::numeric_limits<int64_t>::max(); int64_t minAvailableSpace = std::numeric_limits<int64_t>::max();
for(int i=0; i<servers.size(); i++) { for(int i=0; i<servers.size(); i++) {
if( servers[i]->serverMetrics.present() ) { if( servers[i]->serverMetrics.present() ) {
auto& replyValue = servers[i]->serverMetrics.get(); auto& replyValue = servers[i]->serverMetrics.get();
ASSERT(replyValue.free.bytes >= 0); ASSERT(replyValue.available.bytes >= 0);
ASSERT(replyValue.capacity.bytes >= 0); ASSERT(replyValue.capacity.bytes >= 0);
int64_t bytesFree = replyValue.free.bytes; int64_t bytesAvailable = replyValue.available.bytes;
if(includeInFlight) { if(includeInFlight) {
bytesFree -= servers[i]->dataInFlightToServer; bytesAvailable -= servers[i]->dataInFlightToServer;
} }
minFreeSpace = std::min(bytesFree, minFreeSpace); minAvailableSpace = std::min(bytesAvailable, minAvailableSpace);
} }
} }
return minFreeSpace; // Could be negative return minAvailableSpace; // Could be negative
} }
virtual double getMinFreeSpaceRatio( bool includeInFlight = true ) { virtual double getMinAvailableSpaceRatio( bool includeInFlight = true ) {
double minRatio = 1.0; double minRatio = 1.0;
for(int i=0; i<servers.size(); i++) { for(int i=0; i<servers.size(); i++) {
if( servers[i]->serverMetrics.present() ) { if( servers[i]->serverMetrics.present() ) {
auto& replyValue = servers[i]->serverMetrics.get(); auto& replyValue = servers[i]->serverMetrics.get();
ASSERT(replyValue.free.bytes >= 0); ASSERT(replyValue.available.bytes >= 0);
ASSERT(replyValue.capacity.bytes >= 0); ASSERT(replyValue.capacity.bytes >= 0);
int64_t bytesFree = replyValue.free.bytes; int64_t bytesAvailable = replyValue.available.bytes;
if(includeInFlight) { if(includeInFlight) {
bytesFree = std::max((int64_t)0, bytesFree - servers[i]->dataInFlightToServer); bytesAvailable = std::max((int64_t)0, bytesAvailable - servers[i]->dataInFlightToServer);
} }
if(replyValue.capacity.bytes == 0) if(replyValue.capacity.bytes == 0)
minRatio = 0; minRatio = 0;
else else
minRatio = std::min( minRatio, ((double)bytesFree) / replyValue.capacity.bytes ); minRatio = std::min( minRatio, ((double)bytesAvailable) / replyValue.capacity.bytes );
} }
} }
return minRatio; return minRatio;
} }
virtual bool hasHealthyFreeSpace() { virtual bool hasHealthyAvailableSpace(double minRatio) {
return getMinFreeSpaceRatio() > SERVER_KNOBS->MIN_FREE_SPACE_RATIO && getMinFreeSpace() > SERVER_KNOBS->MIN_FREE_SPACE; return getMinAvailableSpaceRatio() >= minRatio && getMinAvailableSpace() > SERVER_KNOBS->MIN_AVAILABLE_SPACE;
} }
virtual Future<Void> updateStorageMetrics() { virtual Future<Void> updateStorageMetrics() {
@ -638,6 +643,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
std::vector<DDTeamCollection*> teamCollections; std::vector<DDTeamCollection*> teamCollections;
AsyncVar<Optional<Key>> healthyZone; AsyncVar<Optional<Key>> healthyZone;
Future<bool> clearHealthyZoneFuture; Future<bool> clearHealthyZoneFuture;
double medianAvailableSpace;
double lastMedianAvailableSpaceUpdate;
// clang-format on // clang-format on
void resetLocalitySet() { void resetLocalitySet() {
@ -682,8 +689,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)), initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)),
optimalTeamCount(0), recruitingStream(0), restartRecruiting(SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY), optimalTeamCount(0), recruitingStream(0), restartRecruiting(SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY),
unhealthyServers(0), includedDCs(includedDCs), otherTrackedDCs(otherTrackedDCs), unhealthyServers(0), includedDCs(includedDCs), otherTrackedDCs(otherTrackedDCs),
zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary), zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary), medianAvailableSpace(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO),
processingUnhealthy(processingUnhealthy) { lastMedianAvailableSpaceUpdate(0), processingUnhealthy(processingUnhealthy) {
if(!primary || configuration.usableRegions == 1) { if(!primary || configuration.usableRegions == 1) {
TraceEvent("DDTrackerStarting", distributorId) TraceEvent("DDTrackerStarting", distributorId)
.detail( "State", "Inactive" ) .detail( "State", "Inactive" )
@ -757,6 +764,24 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
ACTOR static Future<Void> getTeam( DDTeamCollection* self, GetTeamRequest req ) { ACTOR static Future<Void> getTeam( DDTeamCollection* self, GetTeamRequest req ) {
try { try {
wait( self->checkBuildTeams( self ) ); wait( self->checkBuildTeams( self ) );
if(now() - self->lastMedianAvailableSpaceUpdate > SERVER_KNOBS->AVAILABLE_SPACE_UPDATE_DELAY) {
self->lastMedianAvailableSpaceUpdate = now();
std::vector<double> teamAvailableSpace;
teamAvailableSpace.reserve(self->teams.size());
for( int i = 0; i < self->teams.size(); i++ ) {
if (self->teams[i]->isHealthy()) {
teamAvailableSpace.push_back(self->teams[i]->getMinAvailableSpaceRatio());
}
}
size_t pivot = teamAvailableSpace.size()/2;
if (teamAvailableSpace.size() > 1) {
std::nth_element(teamAvailableSpace.begin(), teamAvailableSpace.begin()+pivot, teamAvailableSpace.end());
self->medianAvailableSpace = std::max(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO, std::min(SERVER_KNOBS->TARGET_AVAILABLE_SPACE_RATIO, teamAvailableSpace[pivot]));
} else {
self->medianAvailableSpace = SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO;
}
}
// Select the best team // Select the best team
// Currently the metric is minimum used disk space (adjusted for data in flight) // Currently the metric is minimum used disk space (adjusted for data in flight)
@ -777,6 +802,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
std::vector<Reference<IDataDistributionTeam>> randomTeams; std::vector<Reference<IDataDistributionTeam>> randomTeams;
const std::set<UID> completeSources(req.completeSources.begin(), req.completeSources.end()); const std::set<UID> completeSources(req.completeSources.begin(), req.completeSources.end());
// Note: this block does not apply any filters from the request
if( !req.wantsNewServers ) { if( !req.wantsNewServers ) {
for( int i = 0; i < req.completeSources.size(); i++ ) { for( int i = 0; i < req.completeSources.size(); i++ ) {
if( !self->server_info.count( req.completeSources[i] ) ) { if( !self->server_info.count( req.completeSources[i] ) ) {
@ -803,7 +829,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
if( req.wantsTrueBest ) { if( req.wantsTrueBest ) {
ASSERT( !bestOption.present() ); ASSERT( !bestOption.present() );
for( int i = 0; i < self->teams.size(); i++ ) { for( int i = 0; i < self->teams.size(); i++ ) {
if( self->teams[i]->isHealthy() && (!req.preferLowerUtilization || self->teams[i]->hasHealthyFreeSpace()) ) { if (self->teams[i]->isHealthy() &&
(!req.preferLowerUtilization || self->teams[i]->hasHealthyAvailableSpace(self->medianAvailableSpace)) &&
(!req.teamMustHaveShards || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(self->teams[i]->getServerIDs(), self->primary)).size() > 0))
{
int64_t loadBytes = self->teams[i]->getLoadBytes(true, req.inflightPenalty); int64_t loadBytes = self->teams[i]->getLoadBytes(true, req.inflightPenalty);
if( !bestOption.present() || ( req.preferLowerUtilization && loadBytes < bestLoadBytes ) || ( !req.preferLowerUtilization && loadBytes > bestLoadBytes ) ) { if( !bestOption.present() || ( req.preferLowerUtilization && loadBytes < bestLoadBytes ) || ( !req.preferLowerUtilization && loadBytes > bestLoadBytes ) ) {
bestLoadBytes = loadBytes; bestLoadBytes = loadBytes;
@ -818,7 +847,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
// If unhealthy team is majority, we may not find an ok dest in this while loop // If unhealthy team is majority, we may not find an ok dest in this while loop
Reference<IDataDistributionTeam> dest = deterministicRandom()->randomChoice(self->teams); Reference<IDataDistributionTeam> dest = deterministicRandom()->randomChoice(self->teams);
bool ok = dest->isHealthy() && (!req.preferLowerUtilization || dest->hasHealthyFreeSpace()); bool ok = dest->isHealthy() &&
(!req.preferLowerUtilization || dest->hasHealthyAvailableSpace(self->medianAvailableSpace)) &&
(!req.teamMustHaveShards || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(dest->getServerIDs(), self->primary)).size() > 0);
for(int i=0; ok && i<randomTeams.size(); i++) { for(int i=0; ok && i<randomTeams.size(); i++) {
if (randomTeams[i]->getServerIDs() == dest->getServerIDs()) { if (randomTeams[i]->getServerIDs() == dest->getServerIDs()) {
ok = false; ok = false;
@ -848,6 +880,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
// Note: req.completeSources can be empty and all servers (and server teams) can be unhealthy. // Note: req.completeSources can be empty and all servers (and server teams) can be unhealthy.
// We will get stuck at this! This only happens when a DC fails. No need to consider it right now. // We will get stuck at this! This only happens when a DC fails. No need to consider it right now.
// Note: this block does not apply any filters from the request
if(!bestOption.present() && self->zeroHealthyTeams->get()) { if(!bestOption.present() && self->zeroHealthyTeams->get()) {
//Attempt to find the unhealthy source server team and return it //Attempt to find the unhealthy source server team and return it
for( int i = 0; i < req.completeSources.size(); i++ ) { for( int i = 0; i < req.completeSources.size(); i++ ) {
@ -1317,7 +1350,6 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
TraceEvent("ServerTeamInfo", distributorId) TraceEvent("ServerTeamInfo", distributorId)
.detail("TeamIndex", i++) .detail("TeamIndex", i++)
.detail("Healthy", team->isHealthy()) .detail("Healthy", team->isHealthy())
.detail("HasHealthyFreeSpace", team->hasHealthyFreeSpace())
.detail("TeamSize", team->size()) .detail("TeamSize", team->size())
.detail("MemberIDs", team->getServerIDsStr()); .detail("MemberIDs", team->getServerIDsStr());
} }

View File

@ -45,9 +45,9 @@ struct IDataDistributionTeam {
virtual void addDataInFlightToTeam( int64_t delta ) = 0; virtual void addDataInFlightToTeam( int64_t delta ) = 0;
virtual int64_t getDataInFlightToTeam() = 0; virtual int64_t getDataInFlightToTeam() = 0;
virtual int64_t getLoadBytes( bool includeInFlight = true, double inflightPenalty = 1.0 ) = 0; virtual int64_t getLoadBytes( bool includeInFlight = true, double inflightPenalty = 1.0 ) = 0;
virtual int64_t getMinFreeSpace( bool includeInFlight = true ) = 0; virtual int64_t getMinAvailableSpace( bool includeInFlight = true ) = 0;
virtual double getMinFreeSpaceRatio( bool includeInFlight = true ) = 0; virtual double getMinAvailableSpaceRatio( bool includeInFlight = true ) = 0;
virtual bool hasHealthyFreeSpace() = 0; virtual bool hasHealthyAvailableSpace( double minRatio ) = 0;
virtual Future<Void> updateStorageMetrics() = 0; virtual Future<Void> updateStorageMetrics() = 0;
virtual void addref() = 0; virtual void addref() = 0;
virtual void delref() = 0; virtual void delref() = 0;
@ -75,18 +75,22 @@ struct GetTeamRequest {
bool wantsNewServers; bool wantsNewServers;
bool wantsTrueBest; bool wantsTrueBest;
bool preferLowerUtilization; bool preferLowerUtilization;
bool teamMustHaveShards;
double inflightPenalty; double inflightPenalty;
std::vector<UID> completeSources; std::vector<UID> completeSources;
Promise< Optional< Reference<IDataDistributionTeam> > > reply; Promise< Optional< Reference<IDataDistributionTeam> > > reply;
GetTeamRequest() {} GetTeamRequest() {}
GetTeamRequest( bool wantsNewServers, bool wantsTrueBest, bool preferLowerUtilization, double inflightPenalty = 1.0 ) : wantsNewServers( wantsNewServers ), wantsTrueBest( wantsTrueBest ), preferLowerUtilization( preferLowerUtilization ), inflightPenalty( inflightPenalty ) {} GetTeamRequest( bool wantsNewServers, bool wantsTrueBest, bool preferLowerUtilization, bool teamMustHaveShards, double inflightPenalty = 1.0 )
: wantsNewServers( wantsNewServers ), wantsTrueBest( wantsTrueBest ), preferLowerUtilization( preferLowerUtilization ), teamMustHaveShards( teamMustHaveShards ), inflightPenalty( inflightPenalty ) {}
std::string getDesc() { std::string getDesc() {
std::stringstream ss; std::stringstream ss;
ss << "WantsNewServers:" << wantsNewServers << " WantsTrueBest:" << wantsTrueBest ss << "WantsNewServers:" << wantsNewServers << " WantsTrueBest:" << wantsTrueBest
<< " PreferLowerUtilization:" << preferLowerUtilization << " inflightPenalty:" << inflightPenalty << ";"; << " PreferLowerUtilization:" << preferLowerUtilization
<< " teamMustHaveShards:" << teamMustHaveShards
<< " inflightPenalty:" << inflightPenalty << ";";
ss << "CompleteSources:"; ss << "CompleteSources:";
for (auto& cs : completeSources) { for (auto& cs : completeSources) {
ss << cs.toString() << ","; ss << cs.toString() << ",";

View File

@ -170,25 +170,25 @@ public:
}); });
} }
virtual int64_t getMinFreeSpace(bool includeInFlight = true) { virtual int64_t getMinAvailableSpace(bool includeInFlight = true) {
int64_t result = std::numeric_limits<int64_t>::max(); int64_t result = std::numeric_limits<int64_t>::max();
for (auto it = teams.begin(); it != teams.end(); it++) { for (auto it = teams.begin(); it != teams.end(); it++) {
result = std::min(result, (*it)->getMinFreeSpace(includeInFlight)); result = std::min(result, (*it)->getMinAvailableSpace(includeInFlight));
} }
return result; return result;
} }
virtual double getMinFreeSpaceRatio(bool includeInFlight = true) { virtual double getMinAvailableSpaceRatio(bool includeInFlight = true) {
double result = std::numeric_limits<double>::max(); double result = std::numeric_limits<double>::max();
for (auto it = teams.begin(); it != teams.end(); it++) { for (auto it = teams.begin(); it != teams.end(); it++) {
result = std::min(result, (*it)->getMinFreeSpaceRatio(includeInFlight)); result = std::min(result, (*it)->getMinAvailableSpaceRatio(includeInFlight));
} }
return result; return result;
} }
virtual bool hasHealthyFreeSpace() { virtual bool hasHealthyAvailableSpace(double minRatio) {
return all([](Reference<IDataDistributionTeam> team) { return all([minRatio](Reference<IDataDistributionTeam> team) {
return team->hasHealthyFreeSpace(); return team->hasHealthyAvailableSpace(minRatio);
}); });
} }
@ -938,7 +938,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY; if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT; if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;
auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, inflightPenalty); auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, false, inflightPenalty);
req.completeSources = rd.completeSources; req.completeSources = rd.completeSources;
Optional<Reference<IDataDistributionTeam>> bestTeam = wait(brokenPromiseToNever(self->teamCollections[tciIndex].getTeam.getReply(req))); Optional<Reference<IDataDistributionTeam>> bestTeam = wait(brokenPromiseToNever(self->teamCollections[tciIndex].getTeam.getReply(req)));
// If a DC has no healthy team, we stop checking the other DCs until // If a DC has no healthy team, we stop checking the other DCs until
@ -1136,8 +1136,10 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
} }
// Move a random shard of sourceTeam's to destTeam if sourceTeam has much more data than destTeam // Move a random shard of sourceTeam's to destTeam if sourceTeam has much more data than destTeam
ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<IDataDistributionTeam> sourceTeam, Reference<IDataDistributionTeam> destTeam, bool primary ) { ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<IDataDistributionTeam> sourceTeam,
Reference<IDataDistributionTeam> destTeam, bool primary, TraceEvent *traceEvent ) {
if(g_network->isSimulated() && g_simulator.speedUpSimulation) { if(g_network->isSimulated() && g_simulator.speedUpSimulation) {
traceEvent->detail("CancelingDueToSimulationSpeedup", true);
return false; return false;
} }
@ -1147,6 +1149,9 @@ ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<ID
state int64_t averageShardBytes = wait(req.getFuture()); state int64_t averageShardBytes = wait(req.getFuture());
state std::vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) ); state std::vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) );
traceEvent->detail("AverageShardBytes", averageShardBytes)
.detail("ShardsInSource", shards.size());
if( !shards.size() ) if( !shards.size() )
return false; return false;
@ -1168,28 +1173,28 @@ ACTOR Future<bool> rebalanceTeams( DDQueueData* self, int priority, Reference<ID
int64_t sourceBytes = sourceTeam->getLoadBytes(false); int64_t sourceBytes = sourceTeam->getLoadBytes(false);
int64_t destBytes = destTeam->getLoadBytes(); int64_t destBytes = destTeam->getLoadBytes();
if( sourceBytes - destBytes <= 3 * std::max<int64_t>( SERVER_KNOBS->MIN_SHARD_BYTES, metrics.bytes ) || metrics.bytes == 0 )
bool sourceAndDestTooSimilar = sourceBytes - destBytes <= 3 * std::max<int64_t>(SERVER_KNOBS->MIN_SHARD_BYTES, metrics.bytes);
traceEvent->detail("SourceBytes", sourceBytes)
.detail("DestBytes", destBytes)
.detail("ShardBytes", metrics.bytes)
.detail("SourceAndDestTooSimilar", sourceAndDestTooSimilar);
if( sourceAndDestTooSimilar || metrics.bytes == 0 ) {
return false; return false;
}
{ //verify the shard is still in sabtf
//verify the shard is still in sabtf shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) );
std::vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor( ShardsAffectedByTeamFailure::Team( sourceTeam->getServerIDs(), primary ) ); for( int i = 0; i < shards.size(); i++ ) {
for( int i = 0; i < shards.size(); i++ ) { if( moveShard == shards[i] ) {
if( moveShard == shards[i] ) { traceEvent->detail("ShardStillPresent", true);
TraceEvent(priority == SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM ? "BgDDMountainChopper" : "BgDDValleyFiller", self->distributorId) self->output.send( RelocateShard( moveShard, priority ) );
.detail("SourceBytes", sourceBytes) return true;
.detail("DestBytes", destBytes)
.detail("ShardBytes", metrics.bytes)
.detail("AverageShardBytes", averageShardBytes)
.detail("SourceTeam", sourceTeam->getDesc())
.detail("DestTeam", destTeam->getDesc());
self->output.send( RelocateShard( moveShard, priority ) );
return true;
}
} }
} }
traceEvent->detail("ShardStillPresent", false);
return false; return false;
} }
@ -1200,6 +1205,15 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
state double lastRead = 0; state double lastRead = 0;
state bool skipCurrentLoop = false; state bool skipCurrentLoop = false;
loop { loop {
state bool moved = false;
state TraceEvent traceEvent("BgDDMountainChopper", self->distributorId);
traceEvent.suppressFor(5.0)
.detail("PollingInterval", rebalancePollingInterval);
if(*self->lastLimited > 0) {
traceEvent.detail("SecondsSinceLastLimited", now() - *self->lastLimited);
}
try { try {
state Future<Void> delayF = delay(rebalancePollingInterval, TaskPriority::DataDistributionLaunch); state Future<Void> delayF = delay(rebalancePollingInterval, TaskPriority::DataDistributionLaunch);
if ((now() - lastRead) > SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL) { if ((now() - lastRead) > SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL) {
@ -1212,6 +1226,9 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
} }
skipCurrentLoop = val.present(); skipCurrentLoop = val.present();
} }
traceEvent.detail("Enabled", !skipCurrentLoop);
wait(delayF); wait(delayF);
if (skipCurrentLoop) { if (skipCurrentLoop) {
// set loop interval to avoid busy wait here. // set loop interval to avoid busy wait here.
@ -1219,26 +1236,35 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
std::max(rebalancePollingInterval, SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL); std::max(rebalancePollingInterval, SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL);
continue; continue;
} }
traceEvent.detail("QueuedRelocations", self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM]);
if (self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM] < if (self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM] <
SERVER_KNOBS->DD_REBALANCE_PARALLELISM) { SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever( state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever(
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, true)))); self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, true, false))));
traceEvent.detail("DestTeam", printable(randomTeam.map<std::string>([](const Reference<IDataDistributionTeam>& team){
return team->getDesc();
})));
if (randomTeam.present()) { if (randomTeam.present()) {
// Destination team must be healthy and have healthyFreeSpace, otherwise, BestTeamStuck may occur state Optional<Reference<IDataDistributionTeam>> loadedTeam =
if (randomTeam.get()->getMinFreeSpaceRatio() > SERVER_KNOBS->FREE_SPACE_RATIO_DD_CUTOFF && wait(brokenPromiseToNever(self->teamCollections[teamCollectionIndex].getTeam.getReply(
randomTeam.get()->hasHealthyFreeSpace()) { GetTeamRequest(true, true, false, true))));
state Optional<Reference<IDataDistributionTeam>> loadedTeam =
wait(brokenPromiseToNever(self->teamCollections[teamCollectionIndex].getTeam.getReply( traceEvent.detail("SourceTeam", printable(loadedTeam.map<std::string>([](const Reference<IDataDistributionTeam>& team){
GetTeamRequest(true, true, false)))); return team->getDesc();
if (loadedTeam.present()) { })));
bool moved =
wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM, loadedTeam.get(), if (loadedTeam.present()) {
randomTeam.get(), teamCollectionIndex == 0)); bool _moved =
if (moved) { wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_OVERUTILIZED_TEAM, loadedTeam.get(),
resetCount = 0; randomTeam.get(), teamCollectionIndex == 0, &traceEvent));
} else { moved = _moved;
resetCount++; if (moved) {
} resetCount = 0;
} else {
resetCount++;
} }
} }
} }
@ -1257,10 +1283,16 @@ ACTOR Future<Void> BgDDMountainChopper( DDQueueData* self, int teamCollectionInd
rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL; rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL;
resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT; resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
} }
traceEvent.detail("ResetCount", resetCount);
tr.reset(); tr.reset();
} catch (Error& e) { } catch (Error& e) {
traceEvent.error(e, true); // Log actor_cancelled because it's not legal to suppress an event that's initialized
wait(tr.onError(e)); wait(tr.onError(e));
} }
traceEvent.detail("Moved", moved);
traceEvent.log();
} }
} }
@ -1271,6 +1303,15 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
state double lastRead = 0; state double lastRead = 0;
state bool skipCurrentLoop = false; state bool skipCurrentLoop = false;
loop { loop {
state bool moved = false;
state TraceEvent traceEvent("BgDDValleyFiller", self->distributorId);
traceEvent.suppressFor(5.0)
.detail("PollingInterval", rebalancePollingInterval);
if(*self->lastLimited > 0) {
traceEvent.detail("SecondsSinceLastLimited", now() - *self->lastLimited);
}
try { try {
state Future<Void> delayF = delay(rebalancePollingInterval, TaskPriority::DataDistributionLaunch); state Future<Void> delayF = delay(rebalancePollingInterval, TaskPriority::DataDistributionLaunch);
if ((now() - lastRead) > SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL) { if ((now() - lastRead) > SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL) {
@ -1283,6 +1324,9 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
} }
skipCurrentLoop = val.present(); skipCurrentLoop = val.present();
} }
traceEvent.detail("Enabled", !skipCurrentLoop);
wait(delayF); wait(delayF);
if (skipCurrentLoop) { if (skipCurrentLoop) {
// set loop interval to avoid busy wait here. // set loop interval to avoid busy wait here.
@ -1290,25 +1334,34 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
std::max(rebalancePollingInterval, SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL); std::max(rebalancePollingInterval, SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL);
continue; continue;
} }
traceEvent.detail("QueuedRelocations", self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM]);
if (self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM] < if (self->priority_relocations[SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM] <
SERVER_KNOBS->DD_REBALANCE_PARALLELISM) { SERVER_KNOBS->DD_REBALANCE_PARALLELISM) {
state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever( state Optional<Reference<IDataDistributionTeam>> randomTeam = wait(brokenPromiseToNever(
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, false)))); self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, false, false, true))));
traceEvent.detail("SourceTeam", printable(randomTeam.map<std::string>([](const Reference<IDataDistributionTeam>& team){
return team->getDesc();
})));
if (randomTeam.present()) { if (randomTeam.present()) {
state Optional<Reference<IDataDistributionTeam>> unloadedTeam = wait(brokenPromiseToNever( state Optional<Reference<IDataDistributionTeam>> unloadedTeam = wait(brokenPromiseToNever(
self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, true, true)))); self->teamCollections[teamCollectionIndex].getTeam.getReply(GetTeamRequest(true, true, true, false))));
traceEvent.detail("DestTeam", printable(unloadedTeam.map<std::string>([](const Reference<IDataDistributionTeam>& team){
return team->getDesc();
})));
if (unloadedTeam.present()) { if (unloadedTeam.present()) {
// Destination team must be healthy and healthyFreeSpace, otherwise, BestTeamStuck may occur bool _moved =
if (unloadedTeam.get()->getMinFreeSpaceRatio() > SERVER_KNOBS->FREE_SPACE_RATIO_DD_CUTOFF && wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, randomTeam.get(),
unloadedTeam.get()->hasHealthyFreeSpace()) { unloadedTeam.get(), teamCollectionIndex == 0, &traceEvent));
bool moved = moved = _moved;
wait(rebalanceTeams(self, SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, randomTeam.get(), if (moved) {
unloadedTeam.get(), teamCollectionIndex == 0)); resetCount = 0;
if (moved) { } else {
resetCount = 0; resetCount++;
} else {
resetCount++;
}
} }
} }
} }
@ -1327,10 +1380,16 @@ ACTOR Future<Void> BgDDValleyFiller( DDQueueData* self, int teamCollectionIndex)
rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL; rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL;
resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT; resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
} }
traceEvent.detail("ResetCount", resetCount);
tr.reset(); tr.reset();
} catch (Error& e) { } catch (Error& e) {
traceEvent.error(e, true); // Log actor_cancelled because it's not legal to suppress an event that's initialized
wait(tr.onError(e)); wait(tr.onError(e));
} }
traceEvent.detail("Moved", moved);
traceEvent.log();
} }
} }

View File

@ -49,7 +49,7 @@ public:
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending // If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
// The total size of the returned value (less the last entry) will be less than byteLimit // The total size of the returned value (less the last entry) will be less than byteLimit
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) = 0; virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) = 0;
// To debug MEMORY_RADIXTREE type ONLY // To debug MEMORY_RADIXTREE type ONLY
// Returns (1) how many key & value pairs have been inserted (2) how many nodes have been created (3) how many // Returns (1) how many key & value pairs have been inserted (2) how many nodes have been created (3) how many

View File

@ -77,12 +77,12 @@ struct KeyValueStoreCompressTestData : IKeyValueStore {
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending // If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
// The total size of the returned value (less the last entry) will be less than byteLimit // The total size of the returned value (less the last entry) will be less than byteLimit
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) { virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) {
return doReadRange(store, keys, rowLimit, byteLimit); return doReadRange(store, keys, rowLimit, byteLimit);
} }
ACTOR Future<Standalone<VectorRef<KeyValueRef>>> doReadRange( IKeyValueStore* store, KeyRangeRef keys, int rowLimit, int byteLimit ) { ACTOR Future<Standalone<RangeResultRef>> doReadRange( IKeyValueStore* store, KeyRangeRef keys, int rowLimit, int byteLimit ) {
Standalone<VectorRef<KeyValueRef>> _vs = wait( store->readRange(keys, rowLimit, byteLimit) ); Standalone<RangeResultRef> _vs = wait( store->readRange(keys, rowLimit, byteLimit) );
Standalone<VectorRef<KeyValueRef>> vs = _vs; // Get rid of implicit const& from wait statement Standalone<RangeResultRef> vs = _vs; // Get rid of implicit const& from wait statement
Arena& a = vs.arena(); Arena& a = vs.arena();
for(int i=0; i<vs.size(); i++) for(int i=0; i<vs.size(); i++)
vs[i].value = ValueRef( a, (ValueRef const&)unpack(vs[i].value) ); vs[i].value = ValueRef( a, (ValueRef const&)unpack(vs[i].value) );

View File

@ -209,15 +209,18 @@ public:
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending // If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
// The total size of the returned value (less the last entry) will be less than byteLimit // The total size of the returned value (less the last entry) will be less than byteLimit
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange(KeyRangeRef keys, int rowLimit = 1 << 30, virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) {
int byteLimit = 1 << 30) { if(recovering.isError()) throw recovering.getError();
if (recovering.isError()) throw recovering.getError();
if (!recovering.isReady()) return waitAndReadRange(this, keys, rowLimit, byteLimit); if (!recovering.isReady()) return waitAndReadRange(this, keys, rowLimit, byteLimit);
Standalone<VectorRef<KeyValueRef>> result; Standalone<RangeResultRef> result;
if (rowLimit >= 0) { if (rowLimit == 0) {
return result;
}
if (rowLimit > 0) {
auto it = data.lower_bound(keys.begin); auto it = data.lower_bound(keys.begin);
while (it != data.end() && rowLimit && byteLimit >= 0) { while (it != data.end() && rowLimit && byteLimit > 0) {
StringRef tempKey = it.getKey(reserved_buffer); StringRef tempKey = it.getKey(reserved_buffer);
if (tempKey >= keys.end) break; if (tempKey >= keys.end) break;
@ -229,7 +232,7 @@ public:
} else { } else {
rowLimit = -rowLimit; rowLimit = -rowLimit;
auto it = data.previous(data.lower_bound(keys.end)); auto it = data.previous(data.lower_bound(keys.end));
while (it != data.end() && rowLimit && byteLimit >= 0) { while (it != data.end() && rowLimit && byteLimit > 0) {
StringRef tempKey = it.getKey(reserved_buffer); StringRef tempKey = it.getKey(reserved_buffer);
if (tempKey < keys.begin) break; if (tempKey < keys.begin) break;
@ -239,6 +242,12 @@ public:
--rowLimit; --rowLimit;
} }
} }
result.more = rowLimit == 0 || byteLimit <= 0;
if(result.more) {
ASSERT(result.size() > 0);
result.readThrough = result[result.size()-1].key;
}
return result; return result;
} }
@ -689,7 +698,7 @@ private:
wait( self->recovering ); wait( self->recovering );
return self->readValuePrefix(key, maxLength).get(); return self->readValuePrefix(key, maxLength).get();
} }
ACTOR static Future<Standalone<VectorRef<KeyValueRef>>> waitAndReadRange( KeyValueStoreMemory* self, KeyRange keys, int rowLimit, int byteLimit ) { ACTOR static Future<Standalone<RangeResultRef>> waitAndReadRange( KeyValueStoreMemory* self, KeyRange keys, int rowLimit, int byteLimit ) {
wait( self->recovering ); wait( self->recovering );
return self->readRange(keys, rowLimit, byteLimit).get(); return self->readRange(keys, rowLimit, byteLimit).get();
} }

View File

@ -1076,21 +1076,26 @@ struct RawCursor {
} }
return Optional<Value>(); return Optional<Value>();
} }
Standalone<VectorRef<KeyValueRef>> getRange( KeyRangeRef keys, int rowLimit, int byteLimit ) { Standalone<RangeResultRef> getRange( KeyRangeRef keys, int rowLimit, int byteLimit ) {
Standalone<VectorRef<KeyValueRef>> result; Standalone<RangeResultRef> result;
int accumulatedBytes = 0; int accumulatedBytes = 0;
ASSERT( byteLimit > 0 ); ASSERT( byteLimit > 0 );
if(rowLimit == 0) {
return result;
}
if(db.fragment_values) { if(db.fragment_values) {
if(rowLimit >= 0) { if(rowLimit > 0) {
int r = moveTo(keys.begin); int r = moveTo(keys.begin);
if (r < 0) if (r < 0)
moveNext(); moveNext();
DefragmentingReader i(*this, result.arena(), true); DefragmentingReader i(*this, result.arena(), true);
Optional<KeyRef> nextKey = i.peek(); Optional<KeyRef> nextKey = i.peek();
while(nextKey.present() && nextKey.get() < keys.end && rowLimit-- && accumulatedBytes < byteLimit) { while(nextKey.present() && nextKey.get() < keys.end && rowLimit != 0 && accumulatedBytes < byteLimit) {
Optional<KeyValueRef> kv = i.getNext(); Optional<KeyValueRef> kv = i.getNext();
result.push_back(result.arena(), kv.get()); result.push_back(result.arena(), kv.get());
--rowLimit;
accumulatedBytes += sizeof(KeyValueRef) + kv.get().expectedSize(); accumulatedBytes += sizeof(KeyValueRef) + kv.get().expectedSize();
nextKey = i.peek(); nextKey = i.peek();
} }
@ -1101,37 +1106,45 @@ struct RawCursor {
movePrevious(); movePrevious();
DefragmentingReader i(*this, result.arena(), false); DefragmentingReader i(*this, result.arena(), false);
Optional<KeyRef> nextKey = i.peek(); Optional<KeyRef> nextKey = i.peek();
while(nextKey.present() && nextKey.get() >= keys.begin && rowLimit++ && accumulatedBytes < byteLimit) { while(nextKey.present() && nextKey.get() >= keys.begin && rowLimit != 0 && accumulatedBytes < byteLimit) {
Optional<KeyValueRef> kv = i.getNext(); Optional<KeyValueRef> kv = i.getNext();
result.push_back(result.arena(), kv.get()); result.push_back(result.arena(), kv.get());
++rowLimit;
accumulatedBytes += sizeof(KeyValueRef) + kv.get().expectedSize(); accumulatedBytes += sizeof(KeyValueRef) + kv.get().expectedSize();
nextKey = i.peek(); nextKey = i.peek();
} }
} }
} }
else { else {
if (rowLimit >= 0) { if (rowLimit > 0) {
int r = moveTo( keys.begin ); int r = moveTo( keys.begin );
if (r < 0) moveNext(); if (r < 0) moveNext();
while (this->valid && rowLimit-- && accumulatedBytes < byteLimit) { while (this->valid && rowLimit != 0 && accumulatedBytes < byteLimit) {
KeyValueRef kv = decodeKV( getEncodedRow( result.arena() ) ); KeyValueRef kv = decodeKV( getEncodedRow( result.arena() ) );
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
if (kv.key >= keys.end) break; if (kv.key >= keys.end) break;
--rowLimit;
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
result.push_back( result.arena(), kv ); result.push_back( result.arena(), kv );
moveNext(); moveNext();
} }
} else { } else {
int r = moveTo( keys.end ); int r = moveTo( keys.end );
if (r >= 0) movePrevious(); if (r >= 0) movePrevious();
while (this->valid && rowLimit++ && accumulatedBytes < byteLimit) { while (this->valid && rowLimit != 0 && accumulatedBytes < byteLimit) {
KeyValueRef kv = decodeKV( getEncodedRow( result.arena() ) ); KeyValueRef kv = decodeKV( getEncodedRow( result.arena() ) );
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
if (kv.key < keys.begin) break; if (kv.key < keys.begin) break;
++rowLimit;
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
result.push_back( result.arena(), kv ); result.push_back( result.arena(), kv );
movePrevious(); movePrevious();
} }
} }
} }
result.more = rowLimit == 0 || accumulatedBytes >= byteLimit;
if(result.more) {
ASSERT(result.size() > 0);
result.readThrough = result[result.size()-1].key;
}
return result; return result;
} }
@ -1451,7 +1464,7 @@ public:
virtual Future<Optional<Value>> readValue( KeyRef key, Optional<UID> debugID ); virtual Future<Optional<Value>> readValue( KeyRef key, Optional<UID> debugID );
virtual Future<Optional<Value>> readValuePrefix( KeyRef key, int maxLength, Optional<UID> debugID ); virtual Future<Optional<Value>> readValuePrefix( KeyRef key, int maxLength, Optional<UID> debugID );
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ); virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 );
KeyValueStoreSQLite(std::string const& filename, UID logID, KeyValueStoreType type, bool checkChecksums, bool checkIntegrity); KeyValueStoreSQLite(std::string const& filename, UID logID, KeyValueStoreType type, bool checkChecksums, bool checkIntegrity);
~KeyValueStoreSQLite(); ~KeyValueStoreSQLite();
@ -1550,7 +1563,7 @@ private:
struct ReadRangeAction : TypedAction<Reader, ReadRangeAction>, FastAllocated<ReadRangeAction> { struct ReadRangeAction : TypedAction<Reader, ReadRangeAction>, FastAllocated<ReadRangeAction> {
KeyRange keys; KeyRange keys;
int rowLimit, byteLimit; int rowLimit, byteLimit;
ThreadReturnPromise<Standalone<VectorRef<KeyValueRef>>> result; ThreadReturnPromise<Standalone<RangeResultRef>> result;
ReadRangeAction(KeyRange keys, int rowLimit, int byteLimit) : keys(keys), rowLimit(rowLimit), byteLimit(byteLimit) {} ReadRangeAction(KeyRange keys, int rowLimit, int byteLimit) : keys(keys), rowLimit(rowLimit), byteLimit(byteLimit) {}
virtual double getTimeEstimate() { return SERVER_KNOBS->READ_RANGE_TIME_ESTIMATE; } virtual double getTimeEstimate() { return SERVER_KNOBS->READ_RANGE_TIME_ESTIMATE; }
}; };
@ -2000,7 +2013,7 @@ Future<Optional<Value>> KeyValueStoreSQLite::readValuePrefix( KeyRef key, int ma
readThreads->post(p); readThreads->post(p);
return f; return f;
} }
Future<Standalone<VectorRef<KeyValueRef>>> KeyValueStoreSQLite::readRange( KeyRangeRef keys, int rowLimit, int byteLimit ) { Future<Standalone<RangeResultRef>> KeyValueStoreSQLite::readRange( KeyRangeRef keys, int rowLimit, int byteLimit ) {
++readsRequested; ++readsRequested;
auto p = new Reader::ReadRangeAction(keys, rowLimit, byteLimit); auto p = new Reader::ReadRangeAction(keys, rowLimit, byteLimit);
auto f = p->result.getFuture(); auto f = p->result.getFuture();

View File

@ -79,7 +79,6 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
init( DISK_QUEUE_FILE_EXTENSION_BYTES, 10<<20 ); // BUGGIFYd per file within the DiskQueue init( DISK_QUEUE_FILE_EXTENSION_BYTES, 10<<20 ); // BUGGIFYd per file within the DiskQueue
init( DISK_QUEUE_FILE_SHRINK_BYTES, 100<<20 ); // BUGGIFYd per file within the DiskQueue init( DISK_QUEUE_FILE_SHRINK_BYTES, 100<<20 ); // BUGGIFYd per file within the DiskQueue
init( DISK_QUEUE_MAX_TRUNCATE_BYTES, 2<<30 ); if ( randomize && BUGGIFY ) DISK_QUEUE_MAX_TRUNCATE_BYTES = 0; init( DISK_QUEUE_MAX_TRUNCATE_BYTES, 2<<30 ); if ( randomize && BUGGIFY ) DISK_QUEUE_MAX_TRUNCATE_BYTES = 0;
init( TLOG_DEGRADED_DELAY_COUNT, 5 );
init( TLOG_DEGRADED_DURATION, 5.0 ); init( TLOG_DEGRADED_DURATION, 5.0 );
init( MAX_CACHE_VERSIONS, 10e6 ); init( MAX_CACHE_VERSIONS, 10e6 );
init( TLOG_IGNORE_POP_AUTO_ENABLE_DELAY, 300.0 ); init( TLOG_IGNORE_POP_AUTO_ENABLE_DELAY, 300.0 );
@ -91,8 +90,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
// Data distribution queue // Data distribution queue
init( HEALTH_POLL_TIME, 1.0 ); init( HEALTH_POLL_TIME, 1.0 );
init( BEST_TEAM_STUCK_DELAY, 1.0 ); init( BEST_TEAM_STUCK_DELAY, 1.0 );
init(BG_REBALANCE_POLLING_INTERVAL, 10.0); init( BG_REBALANCE_POLLING_INTERVAL, 10.0 );
init(BG_REBALANCE_SWITCH_CHECK_INTERVAL, 5.0); if (randomize && BUGGIFY) BG_REBALANCE_SWITCH_CHECK_INTERVAL = 1.0; init( BG_REBALANCE_SWITCH_CHECK_INTERVAL, 5.0 ); if (randomize && BUGGIFY) BG_REBALANCE_SWITCH_CHECK_INTERVAL = 1.0;
init( DD_QUEUE_LOGGING_INTERVAL, 5.0 ); init( DD_QUEUE_LOGGING_INTERVAL, 5.0 );
init( RELOCATION_PARALLELISM_PER_SOURCE_SERVER, 2 ); if( randomize && BUGGIFY ) RELOCATION_PARALLELISM_PER_SOURCE_SERVER = 1; init( RELOCATION_PARALLELISM_PER_SOURCE_SERVER, 2 ); if( randomize && BUGGIFY ) RELOCATION_PARALLELISM_PER_SOURCE_SERVER = 1;
init( DD_QUEUE_MAX_KEY_SERVERS, 100 ); if( randomize && BUGGIFY ) DD_QUEUE_MAX_KEY_SERVERS = 1; init( DD_QUEUE_MAX_KEY_SERVERS, 100 ); if( randomize && BUGGIFY ) DD_QUEUE_MAX_KEY_SERVERS = 1;
@ -104,7 +103,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
init( BG_DD_DECREASE_RATE, 1.02 ); init( BG_DD_DECREASE_RATE, 1.02 );
init( BG_DD_SATURATION_DELAY, 1.0 ); init( BG_DD_SATURATION_DELAY, 1.0 );
init( INFLIGHT_PENALTY_HEALTHY, 1.0 ); init( INFLIGHT_PENALTY_HEALTHY, 1.0 );
init( INFLIGHT_PENALTY_UNHEALTHY, 10.0 ); init( INFLIGHT_PENALTY_UNHEALTHY, 500.0 );
init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 ); init( INFLIGHT_PENALTY_ONE_LEFT, 1000.0 );
init( PRIORITY_RECOVER_MOVE, 110 ); init( PRIORITY_RECOVER_MOVE, 110 );
@ -175,7 +174,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
If this value is too small relative to SHARD_MIN_BYTES_PER_KSEC immediate merging work will be generated. If this value is too small relative to SHARD_MIN_BYTES_PER_KSEC immediate merging work will be generated.
*/ */
init( STORAGE_METRIC_TIMEOUT, 600.0 ); if( randomize && BUGGIFY ) STORAGE_METRIC_TIMEOUT = deterministicRandom()->coinflip() ? 10.0 : 60.0; init( STORAGE_METRIC_TIMEOUT, isSimulated ? 60.0 : 600.0 ); if( randomize && BUGGIFY ) STORAGE_METRIC_TIMEOUT = deterministicRandom()->coinflip() ? 10.0 : 30.0;
init( METRIC_DELAY, 0.1 ); if( randomize && BUGGIFY ) METRIC_DELAY = 1.0; init( METRIC_DELAY, 0.1 ); if( randomize && BUGGIFY ) METRIC_DELAY = 1.0;
init( ALL_DATA_REMOVED_DELAY, 1.0 ); init( ALL_DATA_REMOVED_DELAY, 1.0 );
init( INITIAL_FAILURE_REACTION_DELAY, 30.0 ); if( randomize && BUGGIFY ) INITIAL_FAILURE_REACTION_DELAY = 0.0; init( INITIAL_FAILURE_REACTION_DELAY, 30.0 ); if( randomize && BUGGIFY ) INITIAL_FAILURE_REACTION_DELAY = 0.0;
@ -190,12 +189,11 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
init( DATA_DISTRIBUTION_LOGGING_INTERVAL, 5.0 ); init( DATA_DISTRIBUTION_LOGGING_INTERVAL, 5.0 );
init( DD_ENABLED_CHECK_DELAY, 1.0 ); init( DD_ENABLED_CHECK_DELAY, 1.0 );
init( DD_STALL_CHECK_DELAY, 0.4 ); //Must be larger than 2*MAX_BUGGIFIED_DELAY init( DD_STALL_CHECK_DELAY, 0.4 ); //Must be larger than 2*MAX_BUGGIFIED_DELAY
init( DD_LOW_BANDWIDTH_DELAY, isSimulated ? 90.0 : 240.0 ); if( randomize && BUGGIFY ) DD_LOW_BANDWIDTH_DELAY = 0; //Because of delayJitter, this should be less than 0.9 * DD_MERGE_COALESCE_DELAY init( DD_LOW_BANDWIDTH_DELAY, isSimulated ? 15.0 : 240.0 ); if( randomize && BUGGIFY ) DD_LOW_BANDWIDTH_DELAY = 0; //Because of delayJitter, this should be less than 0.9 * DD_MERGE_COALESCE_DELAY
init( DD_MERGE_COALESCE_DELAY, isSimulated ? 120.0 : 300.0 ); if( randomize && BUGGIFY ) DD_MERGE_COALESCE_DELAY = 0.001; init( DD_MERGE_COALESCE_DELAY, isSimulated ? 30.0 : 300.0 ); if( randomize && BUGGIFY ) DD_MERGE_COALESCE_DELAY = 0.001;
init( STORAGE_METRICS_POLLING_DELAY, 2.0 ); if( randomize && BUGGIFY ) STORAGE_METRICS_POLLING_DELAY = 15.0; init( STORAGE_METRICS_POLLING_DELAY, 2.0 ); if( randomize && BUGGIFY ) STORAGE_METRICS_POLLING_DELAY = 15.0;
init( STORAGE_METRICS_RANDOM_DELAY, 0.2 ); init( STORAGE_METRICS_RANDOM_DELAY, 0.2 );
init( FREE_SPACE_RATIO_CUTOFF, 0.1 ); init( FREE_SPACE_RATIO_CUTOFF, 0.35 );
init( FREE_SPACE_RATIO_DD_CUTOFF, 0.2 );
init( DESIRED_TEAMS_PER_SERVER, 5 ); if( randomize && BUGGIFY ) DESIRED_TEAMS_PER_SERVER = 1; init( DESIRED_TEAMS_PER_SERVER, 5 ); if( randomize && BUGGIFY ) DESIRED_TEAMS_PER_SERVER = 1;
init( MAX_TEAMS_PER_SERVER, 5*DESIRED_TEAMS_PER_SERVER ); init( MAX_TEAMS_PER_SERVER, 5*DESIRED_TEAMS_PER_SERVER );
init( DD_SHARD_SIZE_GRANULARITY, 5000000 ); init( DD_SHARD_SIZE_GRANULARITY, 5000000 );
@ -215,10 +213,10 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
init( DD_CHECK_INVALID_LOCALITY_DELAY, 60 ); if( randomize && BUGGIFY ) DD_CHECK_INVALID_LOCALITY_DELAY = 1 + deterministicRandom()->random01() * 600; init( DD_CHECK_INVALID_LOCALITY_DELAY, 60 ); if( randomize && BUGGIFY ) DD_CHECK_INVALID_LOCALITY_DELAY = 1 + deterministicRandom()->random01() * 600;
// TeamRemover // TeamRemover
TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = false; if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true init( TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER, false ); if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
init( TR_REMOVE_MACHINE_TEAM_DELAY, 60.0 ); if( randomize && BUGGIFY ) TR_REMOVE_MACHINE_TEAM_DELAY = deterministicRandom()->random01() * 60.0; init( TR_REMOVE_MACHINE_TEAM_DELAY, 60.0 ); if( randomize && BUGGIFY ) TR_REMOVE_MACHINE_TEAM_DELAY = deterministicRandom()->random01() * 60.0;
TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS = true; if( randomize && BUGGIFY ) TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS = deterministicRandom()->random01() < 0.1 ? true : false; init( TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS, true ); if( randomize && BUGGIFY ) TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS = deterministicRandom()->random01() < 0.1 ? true : false;
TR_FLAG_DISABLE_SERVER_TEAM_REMOVER = false; if( randomize && BUGGIFY ) TR_FLAG_DISABLE_SERVER_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true init( TR_FLAG_DISABLE_SERVER_TEAM_REMOVER, false ); if( randomize && BUGGIFY ) TR_FLAG_DISABLE_SERVER_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
init( TR_REMOVE_SERVER_TEAM_DELAY, 60.0 ); if( randomize && BUGGIFY ) TR_REMOVE_SERVER_TEAM_DELAY = deterministicRandom()->random01() * 60.0; init( TR_REMOVE_SERVER_TEAM_DELAY, 60.0 ); if( randomize && BUGGIFY ) TR_REMOVE_SERVER_TEAM_DELAY = deterministicRandom()->random01() * 60.0;
init( TR_REMOVE_SERVER_TEAM_EXTRA_DELAY, 5.0 ); if( randomize && BUGGIFY ) TR_REMOVE_SERVER_TEAM_EXTRA_DELAY = deterministicRandom()->random01() * 10.0; init( TR_REMOVE_SERVER_TEAM_EXTRA_DELAY, 5.0 ); if( randomize && BUGGIFY ) TR_REMOVE_SERVER_TEAM_EXTRA_DELAY = deterministicRandom()->random01() * 10.0;
@ -226,7 +224,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
// Redwood Storage Engine // Redwood Storage Engine
init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT, 30 ); init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT, 30 );
init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_MIN, 0 ); init( PREFIX_TREE_IMMEDIATE_KEY_SIZE_MIN, 0 );
// KeyValueStore SQLITE // KeyValueStore SQLITE
init( CLEAR_BUFFER_SIZE, 20000 ); init( CLEAR_BUFFER_SIZE, 20000 );
@ -331,6 +329,9 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
init( REQUIRED_MIN_RECOVERY_DURATION, 0.080 ); if( shortRecoveryDuration ) REQUIRED_MIN_RECOVERY_DURATION = 0.01; init( REQUIRED_MIN_RECOVERY_DURATION, 0.080 ); if( shortRecoveryDuration ) REQUIRED_MIN_RECOVERY_DURATION = 0.01;
init( ALWAYS_CAUSAL_READ_RISKY, false ); init( ALWAYS_CAUSAL_READ_RISKY, false );
init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1; init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1;
init( MIN_PROXY_COMPUTE, 0.001 );
init( PROXY_COMPUTE_BUCKETS, 5000 );
init( PROXY_COMPUTE_GROWTH_RATE, 0.01 );
// Master Server // Master Server
// masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution) // masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution)
@ -387,11 +388,11 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
init( CLIENT_REGISTER_INTERVAL, 600.0 ); init( CLIENT_REGISTER_INTERVAL, 600.0 );
init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0; init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0;
init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit ); init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit );
init( EXPECTED_TLOG_FITNESS, ProcessClass::UnsetFit ); init( EXPECTED_TLOG_FITNESS, ProcessClass::UnsetFit );
init( EXPECTED_LOG_ROUTER_FITNESS, ProcessClass::UnsetFit ); init( EXPECTED_LOG_ROUTER_FITNESS, ProcessClass::UnsetFit );
init( EXPECTED_PROXY_FITNESS, ProcessClass::UnsetFit ); init( EXPECTED_PROXY_FITNESS, ProcessClass::UnsetFit );
init( EXPECTED_RESOLVER_FITNESS, ProcessClass::UnsetFit ); init( EXPECTED_RESOLVER_FITNESS, ProcessClass::UnsetFit );
init( RECRUITMENT_TIMEOUT, 600 ); if( randomize && BUGGIFY ) RECRUITMENT_TIMEOUT = deterministicRandom()->coinflip() ? 60.0 : 1.0; init( RECRUITMENT_TIMEOUT, 600 ); if( randomize && BUGGIFY ) RECRUITMENT_TIMEOUT = deterministicRandom()->coinflip() ? 60.0 : 1.0;
init( POLICY_RATING_TESTS, 200 ); if( randomize && BUGGIFY ) POLICY_RATING_TESTS = 20; init( POLICY_RATING_TESTS, 200 ); if( randomize && BUGGIFY ) POLICY_RATING_TESTS = 20;
@ -412,7 +413,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
init( MIN_REBOOT_TIME, 4.0 ); if( longReboots ) MIN_REBOOT_TIME = 10.0; init( MIN_REBOOT_TIME, 4.0 ); if( longReboots ) MIN_REBOOT_TIME = 10.0;
init( MAX_REBOOT_TIME, 5.0 ); if( longReboots ) MAX_REBOOT_TIME = 20.0; init( MAX_REBOOT_TIME, 5.0 ); if( longReboots ) MAX_REBOOT_TIME = 20.0;
init( LOG_DIRECTORY, "."); // Will be set to the command line flag. init( LOG_DIRECTORY, "."); // Will be set to the command line flag.
init(SERVER_MEM_LIMIT, 8LL << 30); init( SERVER_MEM_LIMIT, 8LL << 30 );
//Ratekeeper //Ratekeeper
bool slowRatekeeper = randomize && BUGGIFY; bool slowRatekeeper = randomize && BUGGIFY;
@ -443,8 +444,10 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
init( MAX_TRANSACTIONS_PER_BYTE, 1000 ); init( MAX_TRANSACTIONS_PER_BYTE, 1000 );
init( MIN_FREE_SPACE, 1e8 ); init( MIN_AVAILABLE_SPACE, 1e8 );
init( MIN_FREE_SPACE_RATIO, 0.05 ); init( MIN_AVAILABLE_SPACE_RATIO, 0.05 );
init( TARGET_AVAILABLE_SPACE_RATIO, 0.30 );
init( AVAILABLE_SPACE_UPDATE_DELAY, 5.0 );
init( MAX_TL_SS_VERSION_DIFFERENCE, 1e99 ); // if( randomize && BUGGIFY ) MAX_TL_SS_VERSION_DIFFERENCE = std::max(1.0, 0.25 * VERSIONS_PER_SECOND); // spring starts at half this value //FIXME: this knob causes ratekeeper to clamp on idle cluster in simulation that have a large number of logs init( MAX_TL_SS_VERSION_DIFFERENCE, 1e99 ); // if( randomize && BUGGIFY ) MAX_TL_SS_VERSION_DIFFERENCE = std::max(1.0, 0.25 * VERSIONS_PER_SECOND); // spring starts at half this value //FIXME: this knob causes ratekeeper to clamp on idle cluster in simulation that have a large number of logs
init( MAX_TL_SS_VERSION_DIFFERENCE_BATCH, 1e99 ); init( MAX_TL_SS_VERSION_DIFFERENCE_BATCH, 1e99 );
@ -497,7 +500,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
init( BEHIND_CHECK_DELAY, 2.0 ); init( BEHIND_CHECK_DELAY, 2.0 );
init( BEHIND_CHECK_COUNT, 2 ); init( BEHIND_CHECK_COUNT, 2 );
init( BEHIND_CHECK_VERSIONS, 5 * VERSIONS_PER_SECOND ); init( BEHIND_CHECK_VERSIONS, 5 * VERSIONS_PER_SECOND );
init( WAIT_METRICS_WRONG_SHARD_CHANCE, 0.1 ); init( WAIT_METRICS_WRONG_SHARD_CHANCE, isSimulated ? 1.0 : 0.1 );
//Wait Failure //Wait Failure
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2; init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
@ -519,7 +522,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
// Buggification // Buggification
init( BUGGIFIED_EVENTUAL_CONSISTENCY, 1.0 ); init( BUGGIFIED_EVENTUAL_CONSISTENCY, 1.0 );
BUGGIFY_ALL_COORDINATION = false; if( randomize && BUGGIFY ) BUGGIFY_ALL_COORDINATION = true; init( BUGGIFY_ALL_COORDINATION, false ); if( randomize && BUGGIFY ) BUGGIFY_ALL_COORDINATION = true;
// Status // Status
init( STATUS_MIN_TIME_BETWEEN_REQUESTS, 0.0 ); init( STATUS_MIN_TIME_BETWEEN_REQUESTS, 0.0 );
@ -537,7 +540,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
// Timekeeper // Timekeeper
init( TIME_KEEPER_DELAY, 10 ); init( TIME_KEEPER_DELAY, 10 );
init( TIME_KEEPER_MAX_ENTRIES, 3600 * 24 * 30 * 6); if( randomize && BUGGIFY ) { TIME_KEEPER_MAX_ENTRIES = 2; } init( TIME_KEEPER_MAX_ENTRIES, 3600 * 24 * 30 * 6 ); if( randomize && BUGGIFY ) { TIME_KEEPER_MAX_ENTRIES = 2; }
// Fast Restore // Fast Restore
init( FASTRESTORE_FAILURE_TIMEOUT, 3600 ); init( FASTRESTORE_FAILURE_TIMEOUT, 3600 );

View File

@ -82,7 +82,6 @@ public:
int64_t DISK_QUEUE_FILE_EXTENSION_BYTES; // When we grow the disk queue, by how many bytes should it grow? int64_t DISK_QUEUE_FILE_EXTENSION_BYTES; // When we grow the disk queue, by how many bytes should it grow?
int64_t DISK_QUEUE_FILE_SHRINK_BYTES; // When we shrink the disk queue, by how many bytes should it shrink? int64_t DISK_QUEUE_FILE_SHRINK_BYTES; // When we shrink the disk queue, by how many bytes should it shrink?
int DISK_QUEUE_MAX_TRUNCATE_BYTES; // A truncate larger than this will cause the file to be replaced instead. int DISK_QUEUE_MAX_TRUNCATE_BYTES; // A truncate larger than this will cause the file to be replaced instead.
int TLOG_DEGRADED_DELAY_COUNT;
double TLOG_DEGRADED_DURATION; double TLOG_DEGRADED_DURATION;
int64_t MAX_CACHE_VERSIONS; int64_t MAX_CACHE_VERSIONS;
double TXS_POPPED_MAX_DELAY; double TXS_POPPED_MAX_DELAY;
@ -155,7 +154,7 @@ public:
double STORAGE_METRICS_POLLING_DELAY; double STORAGE_METRICS_POLLING_DELAY;
double STORAGE_METRICS_RANDOM_DELAY; double STORAGE_METRICS_RANDOM_DELAY;
double FREE_SPACE_RATIO_CUTOFF; double FREE_SPACE_RATIO_CUTOFF;
double FREE_SPACE_RATIO_DD_CUTOFF; double FREE_SPACE_CUTOFF_PENALTY;
int DESIRED_TEAMS_PER_SERVER; int DESIRED_TEAMS_PER_SERVER;
int MAX_TEAMS_PER_SERVER; int MAX_TEAMS_PER_SERVER;
int64_t DD_SHARD_SIZE_GRANULARITY; int64_t DD_SHARD_SIZE_GRANULARITY;
@ -272,6 +271,9 @@ public:
double REQUIRED_MIN_RECOVERY_DURATION; double REQUIRED_MIN_RECOVERY_DURATION;
bool ALWAYS_CAUSAL_READ_RISKY; bool ALWAYS_CAUSAL_READ_RISKY;
int MAX_COMMIT_UPDATES; int MAX_COMMIT_UPDATES;
double MIN_PROXY_COMPUTE;
int PROXY_COMPUTE_BUCKETS;
double PROXY_COMPUTE_GROWTH_RATE;
// Master Server // Master Server
double COMMIT_SLEEP_TIME; double COMMIT_SLEEP_TIME;
@ -378,8 +380,10 @@ public:
double MAX_TRANSACTIONS_PER_BYTE; double MAX_TRANSACTIONS_PER_BYTE;
int64_t MIN_FREE_SPACE; int64_t MIN_AVAILABLE_SPACE;
double MIN_FREE_SPACE_RATIO; double MIN_AVAILABLE_SPACE_RATIO;
double TARGET_AVAILABLE_SPACE_RATIO;
double AVAILABLE_SPACE_UPDATE_DELAY;
double MAX_TL_SS_VERSION_DIFFERENCE; // spring starts at half this value double MAX_TL_SS_VERSION_DIFFERENCE; // spring starts at half this value
double MAX_TL_SS_VERSION_DIFFERENCE_BATCH; double MAX_TL_SS_VERSION_DIFFERENCE_BATCH;

View File

@ -1055,7 +1055,12 @@ ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, TaskPriori
loop { loop {
wait( allLoaders || delay(SERVER_KNOBS->DESIRED_GET_MORE_DELAY, taskID) ); wait( allLoaders || delay(SERVER_KNOBS->DESIRED_GET_MORE_DELAY, taskID) );
minVersion = self->end; minVersion = self->end;
for(auto cursor : self->cursors) { for(int i = 0; i < self->cursors.size(); i++) {
auto cursor = self->cursors[i];
while(cursor->hasMessage()) {
self->cursorMessages[i].push_back(ILogSystem::BufferedCursor::BufferedMessage(cursor->arena(), (!self->withTags || self->collectTags) ? cursor->getMessage() : cursor->getMessageWithTags(), !self->withTags ? VectorRef<Tag>() : cursor->getTags(), cursor->version()));
cursor->nextMessage();
}
minVersion = std::min(minVersion, cursor->version().version); minVersion = std::min(minVersion, cursor->version().version);
} }
if(minVersion > self->messageVersion.version) { if(minVersion > self->messageVersion.version) {

View File

@ -279,6 +279,8 @@ struct ProxyCommitData {
int updateCommitRequests = 0; int updateCommitRequests = 0;
NotifiedDouble lastCommitTime; NotifiedDouble lastCommitTime;
vector<double> commitComputePerOperation;
//The tag related to a storage server rarely change, so we keep a vector of tags for each key range to be slightly more CPU efficient. //The tag related to a storage server rarely change, so we keep a vector of tags for each key range to be slightly more CPU efficient.
//When a tag related to a storage server does change, we empty out all of these vectors to signify they must be repopulated. //When a tag related to a storage server does change, we empty out all of these vectors to signify they must be repopulated.
//We do not repopulate them immediately to avoid a slow task. //We do not repopulate them immediately to avoid a slow task.
@ -345,7 +347,9 @@ struct ProxyCommitData {
localCommitBatchesStarted(0), locked(false), commitBatchInterval(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_MIN), localCommitBatchesStarted(0), locked(false), commitBatchInterval(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_MIN),
firstProxy(firstProxy), cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, true, true)), db(db), firstProxy(firstProxy), cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, true, true)), db(db),
singleKeyMutationEvent(LiteralStringRef("SingleKeyMutation")), commitBatchesMemBytesCount(0), lastTxsPop(0), lastStartCommit(0), lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION), lastCommitTime(0) singleKeyMutationEvent(LiteralStringRef("SingleKeyMutation")), commitBatchesMemBytesCount(0), lastTxsPop(0), lastStartCommit(0), lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION), lastCommitTime(0)
{} {
commitComputePerOperation.resize(SERVER_KNOBS->PROXY_COMPUTE_BUCKETS,0.0);
}
}; };
struct ResolutionRequestBuilder { struct ResolutionRequestBuilder {
@ -528,7 +532,7 @@ bool isWhitelisted(const vector<Standalone<StringRef>>& binPathVec, StringRef bi
} }
ACTOR Future<Void> addBackupMutations(ProxyCommitData* self, std::map<Key, MutationListRef>* logRangeMutations, ACTOR Future<Void> addBackupMutations(ProxyCommitData* self, std::map<Key, MutationListRef>* logRangeMutations,
LogPushData* toCommit, Version commitVersion) { LogPushData* toCommit, Version commitVersion, double* computeDuration, double* computeStart) {
state std::map<Key, MutationListRef>::iterator logRangeMutation = logRangeMutations->begin(); state std::map<Key, MutationListRef>::iterator logRangeMutation = logRangeMutations->begin();
state int32_t version = commitVersion / CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE; state int32_t version = commitVersion / CLIENT_KNOBS->LOG_RANGE_BLOCK_SIZE;
state int yieldBytes = 0; state int yieldBytes = 0;
@ -545,7 +549,11 @@ ACTOR Future<Void> addBackupMutations(ProxyCommitData* self, std::map<Key, Mutat
while(blobIter) { while(blobIter) {
if(yieldBytes > SERVER_KNOBS->DESIRED_TOTAL_BYTES) { if(yieldBytes > SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
yieldBytes = 0; yieldBytes = 0;
wait(yield(TaskPriority::ProxyCommitYield2)); if(g_network->check_yield(TaskPriority::ProxyCommitYield1)) {
*computeDuration += g_network->timer() - *computeStart;
wait(delay(0, TaskPriority::ProxyCommitYield1));
*computeStart = g_network->timer();
}
} }
valueWriter.serializeBytes(blobIter->data); valueWriter.serializeBytes(blobIter->data);
yieldBytes += blobIter->data.size(); yieldBytes += blobIter->data.size();
@ -603,6 +611,13 @@ ACTOR Future<Void> addBackupMutations(ProxyCommitData* self, std::map<Key, Mutat
return Void(); return Void();
} }
ACTOR Future<Void> releaseResolvingAfter(ProxyCommitData* self, Future<Void> releaseDelay, int64_t localBatchNumber) {
wait(releaseDelay);
ASSERT(self->latestLocalCommitBatchResolving.get() == localBatchNumber-1);
self->latestLocalCommitBatchResolving.set(localBatchNumber);
return Void();
}
ACTOR Future<Void> commitBatch( ACTOR Future<Void> commitBatch(
ProxyCommitData* self, ProxyCommitData* self,
vector<CommitTransactionRequest> trs, vector<CommitTransactionRequest> trs,
@ -613,6 +628,13 @@ ACTOR Future<Void> commitBatch(
state double t1 = now(); state double t1 = now();
state Optional<UID> debugID; state Optional<UID> debugID;
state bool forceRecovery = false; state bool forceRecovery = false;
state int batchOperations = 0;
int64_t batchBytes = 0;
for (int t = 0; t<trs.size(); t++) {
batchOperations += trs[t].transaction.mutations.size();
batchBytes += trs[t].transaction.mutations.expectedSize();
}
state int latencyBucket = batchOperations == 0 ? 0 : std::min<int>(SERVER_KNOBS->PROXY_COMPUTE_BUCKETS-1,SERVER_KNOBS->PROXY_COMPUTE_BUCKETS*batchBytes/(batchOperations*(CLIENT_KNOBS->VALUE_SIZE_LIMIT+CLIENT_KNOBS->KEY_SIZE_LIMIT)));
ASSERT(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS <= SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT); // since we are using just the former to limit the number of versions actually in flight! ASSERT(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS <= SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT); // since we are using just the former to limit the number of versions actually in flight!
@ -644,7 +666,7 @@ ACTOR Future<Void> commitBatch(
// Queuing pre-resolution commit processing // Queuing pre-resolution commit processing
TEST(self->latestLocalCommitBatchResolving.get() < localBatchNumber - 1); TEST(self->latestLocalCommitBatchResolving.get() < localBatchNumber - 1);
wait(self->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber-1)); wait(self->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber-1));
wait(yield(TaskPriority::ProxyCommitYield1)); state Future<Void> releaseDelay = delay(batchOperations*self->commitComputePerOperation[latencyBucket], TaskPriority::ProxyMasterVersionReply);
if (debugID.present()) if (debugID.present())
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.GettingCommitVersion"); g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.GettingCommitVersion");
@ -695,9 +717,7 @@ ACTOR Future<Void> commitBatch(
} }
state vector<vector<int>> transactionResolverMap = std::move( requests.transactionResolverMap ); state vector<vector<int>> transactionResolverMap = std::move( requests.transactionResolverMap );
state Future<Void> releaseFuture = releaseResolvingAfter(self, releaseDelay, localBatchNumber);
ASSERT(self->latestLocalCommitBatchResolving.get() == localBatchNumber-1);
self->latestLocalCommitBatchResolving.set(localBatchNumber);
/////// Phase 2: Resolution (waiting on the network; pipelined) /////// Phase 2: Resolution (waiting on the network; pipelined)
state vector<ResolveTransactionBatchReply> resolution = wait( getAll(replies) ); state vector<ResolveTransactionBatchReply> resolution = wait( getAll(replies) );
@ -708,8 +728,10 @@ ACTOR Future<Void> commitBatch(
////// Phase 3: Post-resolution processing (CPU bound except for very rare situations; ordered; currently atomic but doesn't need to be) ////// Phase 3: Post-resolution processing (CPU bound except for very rare situations; ordered; currently atomic but doesn't need to be)
TEST(self->latestLocalCommitBatchLogging.get() < localBatchNumber - 1); // Queuing post-resolution commit processing TEST(self->latestLocalCommitBatchLogging.get() < localBatchNumber - 1); // Queuing post-resolution commit processing
wait(self->latestLocalCommitBatchLogging.whenAtLeast(localBatchNumber-1)); wait(self->latestLocalCommitBatchLogging.whenAtLeast(localBatchNumber-1));
wait(yield(TaskPriority::ProxyCommitYield2)); wait(yield(TaskPriority::ProxyCommitYield1));
state double computeStart = g_network->timer();
state double computeDuration = 0;
self->stats.txnCommitResolved += trs.size(); self->stats.txnCommitResolved += trs.size();
if (debugID.present()) if (debugID.present())
@ -866,7 +888,11 @@ ACTOR Future<Void> commitBatch(
for (; mutationNum < pMutations->size(); mutationNum++) { for (; mutationNum < pMutations->size(); mutationNum++) {
if(yieldBytes > SERVER_KNOBS->DESIRED_TOTAL_BYTES) { if(yieldBytes > SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
yieldBytes = 0; yieldBytes = 0;
wait(yield(TaskPriority::ProxyCommitYield2)); if(g_network->check_yield(TaskPriority::ProxyCommitYield1)) {
computeDuration += g_network->timer() - computeStart;
wait(delay(0, TaskPriority::ProxyCommitYield1));
computeStart = g_network->timer();
}
} }
auto& m = (*pMutations)[mutationNum]; auto& m = (*pMutations)[mutationNum];
@ -968,7 +994,7 @@ ACTOR Future<Void> commitBatch(
// Serialize and backup the mutations as a single mutation // Serialize and backup the mutations as a single mutation
if ((self->vecBackupKeys.size() > 1) && logRangeMutations.size()) { if ((self->vecBackupKeys.size() > 1) && logRangeMutations.size()) {
wait( addBackupMutations(self, &logRangeMutations, &toCommit, commitVersion) ); wait( addBackupMutations(self, &logRangeMutations, &toCommit, commitVersion, &computeDuration, &computeStart) );
} }
self->stats.mutations += mutationCount; self->stats.mutations += mutationCount;
@ -976,29 +1002,33 @@ ACTOR Future<Void> commitBatch(
// Storage servers mustn't make durable versions which are not fully committed (because then they are impossible to roll back) // Storage servers mustn't make durable versions which are not fully committed (because then they are impossible to roll back)
// We prevent this by limiting the number of versions which are semi-committed but not fully committed to be less than the MVCC window // We prevent this by limiting the number of versions which are semi-committed but not fully committed to be less than the MVCC window
while (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) { if(self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) {
// This should be *extremely* rare in the real world, but knob buggification should make it happen in simulation computeDuration += g_network->timer() - computeStart;
TEST(true); // Semi-committed pipeline limited by MVCC window while (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) {
//TraceEvent("ProxyWaitingForCommitted", self->dbgid).detail("CommittedVersion", self->committedVersion.get()).detail("NeedToCommit", commitVersion); // This should be *extremely* rare in the real world, but knob buggification should make it happen in simulation
choose{ TEST(true); // Semi-committed pipeline limited by MVCC window
when(wait(self->committedVersion.whenAtLeast(commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS))) { //TraceEvent("ProxyWaitingForCommitted", self->dbgid).detail("CommittedVersion", self->committedVersion.get()).detail("NeedToCommit", commitVersion);
wait(yield()); choose{
break; when(wait(self->committedVersion.whenAtLeast(commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS))) {
} wait(yield());
when(GetReadVersionReply v = wait(self->getConsistentReadVersion.getReply(GetReadVersionRequest(0, GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE | GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY)))) { break;
if(v.version > self->committedVersion.get()) { }
self->locked = v.locked; when(GetReadVersionReply v = wait(self->getConsistentReadVersion.getReply(GetReadVersionRequest(0, GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE | GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY)))) {
self->metadataVersion = v.metadataVersion; if(v.version > self->committedVersion.get()) {
self->committedVersion.set(v.version); self->locked = v.locked;
self->metadataVersion = v.metadataVersion;
self->committedVersion.set(v.version);
}
if (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS)
wait(delay(SERVER_KNOBS->PROXY_SPIN_DELAY));
} }
if (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS)
wait(delay(SERVER_KNOBS->PROXY_SPIN_DELAY));
} }
} }
computeStart = g_network->timer();
} }
state LogSystemDiskQueueAdapter::CommitMessage msg = wait(storeCommits.back().first); // Should just be doing yields state LogSystemDiskQueueAdapter::CommitMessage msg = storeCommits.back().first.get();
if (debugID.present()) if (debugID.present())
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.AfterStoreCommits"); g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.AfterStoreCommits");
@ -1031,6 +1061,16 @@ ACTOR Future<Void> commitBatch(
self->latestLocalCommitBatchLogging.set(localBatchNumber); self->latestLocalCommitBatchLogging.set(localBatchNumber);
} }
computeDuration += g_network->timer() - computeStart;
if(computeDuration > SERVER_KNOBS->MIN_PROXY_COMPUTE && batchOperations > 0) {
double computePerOperation = computeDuration/batchOperations;
if(computePerOperation <= self->commitComputePerOperation[latencyBucket] || self->commitComputePerOperation[latencyBucket] == 0.0) {
self->commitComputePerOperation[latencyBucket] = computePerOperation;
} else {
self->commitComputePerOperation[latencyBucket] = SERVER_KNOBS->PROXY_COMPUTE_GROWTH_RATE*computePerOperation + ((1.0-SERVER_KNOBS->PROXY_COMPUTE_GROWTH_RATE)*self->commitComputePerOperation[latencyBucket]);
}
}
/////// Phase 4: Logging (network bound; pipelined up to MAX_READ_TRANSACTION_LIFE_VERSIONS (limited by loop above)) /////// Phase 4: Logging (network bound; pipelined up to MAX_READ_TRANSACTION_LIFE_VERSIONS (limited by loop above))
try { try {
@ -1048,7 +1088,7 @@ ACTOR Future<Void> commitBatch(
} }
self->lastCommitLatency = now()-commitStartTime; self->lastCommitLatency = now()-commitStartTime;
self->lastCommitTime = std::max(self->lastCommitTime.get(), commitStartTime); self->lastCommitTime = std::max(self->lastCommitTime.get(), commitStartTime);
wait(yield(TaskPriority::ProxyCommitYield3)); wait(yield(TaskPriority::ProxyCommitYield2));
if( self->popRemoteTxs && msg.popTo > ( self->txsPopVersions.size() ? self->txsPopVersions.back().second : self->lastTxsPop ) ) { if( self->popRemoteTxs && msg.popTo > ( self->txsPopVersions.size() ? self->txsPopVersions.back().second : self->lastTxsPop ) ) {
if(self->txsPopVersions.size() >= SERVER_KNOBS->MAX_TXS_POP_VERSION_HISTORY) { if(self->txsPopVersions.size() >= SERVER_KNOBS->MAX_TXS_POP_VERSION_HISTORY) {
@ -1087,7 +1127,7 @@ ACTOR Future<Void> commitBatch(
} }
// Send replies to clients // Send replies to clients
double endTime = timer(); double endTime = g_network->timer();
for (int t = 0; t < trs.size(); t++) { for (int t = 0; t < trs.size(); t++) {
if (committed[t] == ConflictBatch::TransactionCommitted && (!locked || trs[t].isLockAware())) { if (committed[t] == ConflictBatch::TransactionCommitted && (!locked || trs[t].isLockAware())) {
ASSERT_WE_THINK(commitVersion != invalidVersion); ASSERT_WE_THINK(commitVersion != invalidVersion);
@ -1138,6 +1178,7 @@ ACTOR Future<Void> commitBatch(
self->commitBatchesMemBytesCount -= currentBatchMemBytesCount; self->commitBatchesMemBytesCount -= currentBatchMemBytesCount;
ASSERT_ABORT(self->commitBatchesMemBytesCount >= 0); ASSERT_ABORT(self->commitBatchesMemBytesCount >= 0);
wait(releaseFuture);
return Void(); return Void();
} }
@ -1201,8 +1242,7 @@ ACTOR Future<GetReadVersionReply> getLiveCommittedVersion(ProxyCommitData* commi
ACTOR Future<Void> sendGrvReplies(Future<GetReadVersionReply> replyFuture, std::vector<GetReadVersionRequest> requests, ACTOR Future<Void> sendGrvReplies(Future<GetReadVersionReply> replyFuture, std::vector<GetReadVersionRequest> requests,
ProxyStats* stats, Version minKnownCommittedVersion) { ProxyStats* stats, Version minKnownCommittedVersion) {
GetReadVersionReply reply = wait(replyFuture); GetReadVersionReply reply = wait(replyFuture);
double end = g_network->timer();
double end = timer();
for(GetReadVersionRequest const& request : requests) { for(GetReadVersionRequest const& request : requests) {
if(request.priority() >= GetReadVersionRequest::PRIORITY_DEFAULT) { if(request.priority() >= GetReadVersionRequest::PRIORITY_DEFAULT) {
stats->grvLatencyBands.addMeasurement(end - request.requestTime()); stats->grvLatencyBands.addMeasurement(end - request.requestTime());
@ -1410,7 +1450,7 @@ ACTOR static Future<Void> rejoinServer( MasterProxyInterface proxy, ProxyCommitD
GetStorageServerRejoinInfoReply rep; GetStorageServerRejoinInfoReply rep;
rep.version = commitData->version; rep.version = commitData->version;
rep.tag = decodeServerTagValue( commitData->txnStateStore->readValue(serverTagKeyFor(req.id)).get().get() ); rep.tag = decodeServerTagValue( commitData->txnStateStore->readValue(serverTagKeyFor(req.id)).get().get() );
Standalone<VectorRef<KeyValueRef>> history = commitData->txnStateStore->readRange(serverTagHistoryRangeFor(req.id)).get(); Standalone<RangeResultRef> history = commitData->txnStateStore->readRange(serverTagHistoryRangeFor(req.id)).get();
for(int i = history.size()-1; i >= 0; i-- ) { for(int i = history.size()-1; i >= 0; i-- ) {
rep.history.push_back(std::make_pair(decodeServerTagHistoryKey(history[i].key), decodeServerTagValue(history[i].value))); rep.history.push_back(std::make_pair(decodeServerTagHistoryKey(history[i].key), decodeServerTagValue(history[i].value)));
} }
@ -1794,7 +1834,7 @@ ACTOR Future<Void> masterProxyServerCore(
state KeyRange txnKeys = allKeys; state KeyRange txnKeys = allKeys;
loop { loop {
wait(yield()); wait(yield());
Standalone<VectorRef<KeyValueRef>> data = commitData.txnStateStore->readRange(txnKeys, SERVER_KNOBS->BUGGIFIED_ROW_LIMIT, SERVER_KNOBS->APPLY_MUTATION_BYTES).get(); Standalone<RangeResultRef> data = commitData.txnStateStore->readRange(txnKeys, SERVER_KNOBS->BUGGIFIED_ROW_LIMIT, SERVER_KNOBS->APPLY_MUTATION_BYTES).get();
if(!data.size()) break; if(!data.size()) break;
((KeyRangeRef&)txnKeys) = KeyRangeRef( keyAfter(data.back().key, txnKeys.arena()), txnKeys.end ); ((KeyRangeRef&)txnKeys) = KeyRangeRef( keyAfter(data.back().key, txnKeys.arena()), txnKeys.end );

View File

@ -270,6 +270,7 @@ namespace oldTLog_4_6 {
std::map<UID, Reference<struct LogData>> id_data; std::map<UID, Reference<struct LogData>> id_data;
UID dbgid; UID dbgid;
UID workerID;
IKeyValueStore* persistentData; IKeyValueStore* persistentData;
IDiskQueue* rawPersistentQueue; IDiskQueue* rawPersistentQueue;
@ -303,8 +304,8 @@ namespace oldTLog_4_6 {
PromiseStream<Future<Void>> sharedActors; PromiseStream<Future<Void>> sharedActors;
bool terminated; bool terminated;
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> const& dbInfo) TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> const& dbInfo)
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()), : dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)), persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
dbInfo(dbInfo), queueCommitBegin(0), queueCommitEnd(0), prevVersion(0), dbInfo(dbInfo), queueCommitBegin(0), queueCommitEnd(0), prevVersion(0),
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false),
@ -412,7 +413,7 @@ namespace oldTLog_4_6 {
// These are initialized differently on init() or recovery // These are initialized differently on init() or recovery
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), recovery(Void()) recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), recovery(Void())
{ {
startRole(Role::TRANSACTION_LOG,interf.id(), UID()); startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, "Restored");
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id); persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id); persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
@ -954,7 +955,7 @@ namespace oldTLog_4_6 {
peekMessagesFromMemory( logData, req, messages2, endVersion ); peekMessagesFromMemory( logData, req, messages2, endVersion );
Standalone<VectorRef<KeyValueRef>> kvs = wait( Standalone<RangeResultRef> kvs = wait(
self->persistentData->readRange(KeyRangeRef( self->persistentData->readRange(KeyRangeRef(
persistTagMessagesKey(logData->logId, oldTag, req.begin), persistTagMessagesKey(logData->logId, oldTag, req.begin),
persistTagMessagesKey(logData->logId, oldTag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES)); persistTagMessagesKey(logData->logId, oldTag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
@ -1101,7 +1102,7 @@ namespace oldTLog_4_6 {
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface // The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
TLogRejoinRequest req; TLogRejoinRequest req;
req.myInterface = tli; req.myInterface = tli;
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id()); TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id());
choose { choose {
when(TLogRejoinReply rep = when(TLogRejoinReply rep =
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) { wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
@ -1249,8 +1250,8 @@ namespace oldTLog_4_6 {
IKeyValueStore *storage = self->persistentData; IKeyValueStore *storage = self->persistentData;
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key); state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys); state Future<Standalone<RangeResultRef>> fVers = storage->readRange(persistCurrentVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys); state Future<Standalone<RangeResultRef>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
// FIXME: metadata in queue? // FIXME: metadata in queue?
@ -1263,7 +1264,7 @@ namespace oldTLog_4_6 {
} }
if (!fFormat.get().present()) { if (!fFormat.get().present()) {
Standalone<VectorRef<KeyValueRef>> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) ); Standalone<RangeResultRef> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
if (!v.size()) { if (!v.size()) {
TEST(true); // The DB is completely empty, so it was never initialized. Delete it. TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
throw worker_removed(); throw worker_removed();
@ -1316,7 +1317,7 @@ namespace oldTLog_4_6 {
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) ); tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
loop { loop {
if(logData->removed.isReady()) break; if(logData->removed.isReady()) break;
Standalone<VectorRef<KeyValueRef>> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) ); Standalone<RangeResultRef> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
if (!data.size()) break; if (!data.size()) break;
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end ); ((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
@ -1402,9 +1403,9 @@ namespace oldTLog_4_6 {
return Void(); return Void();
} }
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId ) ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId, UID workerID )
{ {
state TLogData self( tlogId, persistentData, persistentQueue, db ); state TLogData self( tlogId, workerID, persistentData, persistentQueue, db );
state Future<Void> error = actorCollection( self.sharedActors.getFuture() ); state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
TraceEvent("SharedTlog", tlogId); TraceEvent("SharedTlog", tlogId);

View File

@ -245,6 +245,7 @@ struct TLogData : NonCopyable {
std::map<UID, Reference<struct LogData>> id_data; std::map<UID, Reference<struct LogData>> id_data;
UID dbgid; UID dbgid;
UID workerID;
IKeyValueStore* persistentData; IKeyValueStore* persistentData;
IDiskQueue* rawPersistentQueue; IDiskQueue* rawPersistentQueue;
@ -286,8 +287,8 @@ struct TLogData : NonCopyable {
Reference<AsyncVar<bool>> degraded; Reference<AsyncVar<bool>> degraded;
std::vector<TagsAndMessage> tempTagMessages; std::vector<TagsAndMessage> tempTagMessages;
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder) TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()), : dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)), persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0), dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
@ -439,14 +440,15 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
bool execOpCommitInProgress; bool execOpCommitInProgress;
int txsTags; int txsTags;
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()), explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, std::vector<Tag> tags, std::string context)
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()), cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID),
// These are initialized differently on init() or recovery logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0), // These are initialized differently on init() or recovery
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false) recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
{ {
startRole(Role::TRANSACTION_LOG, interf.id(), UID()); startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, context);
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id); persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id); persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
@ -1172,7 +1174,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
peekMessagesFromMemory( logData, req, messages2, endVersion ); peekMessagesFromMemory( logData, req, messages2, endVersion );
} }
Standalone<VectorRef<KeyValueRef>> kvs = wait( Standalone<RangeResultRef> kvs = wait(
self->persistentData->readRange(KeyRangeRef( self->persistentData->readRange(KeyRangeRef(
persistTagMessagesKey(logData->logId, req.tag, req.begin), persistTagMessagesKey(logData->logId, req.tag, req.begin),
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES)); persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
@ -1236,12 +1238,8 @@ ACTOR Future<Void> watchDegraded(TLogData* self) {
return Void(); return Void();
} }
//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
state int loopCount = 0;
while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {
wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskPriority::Low));
loopCount++;
}
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid); TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
TEST(true); //6.0 TLog degraded TEST(true); //6.0 TLog degraded
self->degraded->set(true); self->degraded->set(true);
@ -1482,7 +1480,7 @@ ACTOR Future<Void> rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC
if ( self->dbInfo->get().master.id() != lastMasterID) { if ( self->dbInfo->get().master.id() != lastMasterID) {
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface // The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
TLogRejoinRequest req(tli); TLogRejoinRequest req(tli);
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id()); TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id());
choose { choose {
when(TLogRejoinReply rep = when(TLogRejoinReply rep =
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) { wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
@ -1930,12 +1928,12 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
state IKeyValueStore *storage = self->persistentData; state IKeyValueStore *storage = self->persistentData;
wait(storage->init()); wait(storage->init());
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key); state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys); state Future<Standalone<RangeResultRef>> fVers = storage->readRange(persistCurrentVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys); state Future<Standalone<RangeResultRef>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys); state Future<Standalone<RangeResultRef>> fLocality = storage->readRange(persistLocalityKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys); state Future<Standalone<RangeResultRef>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fTxsTags = storage->readRange(persistTxsTagsKeys); state Future<Standalone<RangeResultRef>> fTxsTags = storage->readRange(persistTxsTagsKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys); state Future<Standalone<RangeResultRef>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
// FIXME: metadata in queue? // FIXME: metadata in queue?
@ -1954,7 +1952,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
} }
if (!fFormat.get().present()) { if (!fFormat.get().present()) {
Standalone<VectorRef<KeyValueRef>> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) ); Standalone<RangeResultRef> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
if (!v.size()) { if (!v.size()) {
TEST(true); // The DB is completely empty, so it was never initialized. Delete it. TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
throw worker_removed(); throw worker_removed();
@ -1976,7 +1974,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
tlogRequests.getFuture().pop().reply.sendError(recruitment_failed()); tlogRequests.getFuture().pop().reply.sendError(recruitment_failed());
} }
wait( oldTLog_4_6::tLog(self->persistentData, self->rawPersistentQueue, self->dbInfo, locality, self->dbgid) ); wait( oldTLog_4_6::tLog(self->persistentData, self->rawPersistentQueue, self->dbInfo, locality, self->dbgid, self->workerID) );
throw internal_error(); throw internal_error();
} }
@ -2022,7 +2020,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
DUMPTOKEN( recruited.confirmRunning ); DUMPTOKEN( recruited.confirmRunning );
//We do not need the remoteTag, because we will not be loading any additional data //We do not need the remoteTag, because we will not be loading any additional data
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), std::vector<Tag>()) ); logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), std::vector<Tag>(), "Restored") );
logData->locality = id_locality[id1]; logData->locality = id_locality[id1];
logData->stopped = true; logData->stopped = true;
self->id_data[id1] = logData; self->id_data[id1] = logData;
@ -2044,7 +2042,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) ); tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
loop { loop {
if(logData->removed.isReady()) break; if(logData->removed.isReady()) break;
Standalone<VectorRef<KeyValueRef>> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) ); Standalone<RangeResultRef> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
if (!data.size()) break; if (!data.size()) break;
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end ); ((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
@ -2205,7 +2203,8 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
it.second->stopCommit.trigger(); it.second->stopCommit.trigger();
} }
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, req.allTags) ); bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, req.allTags, recovering ? "Recovered" : "Recruited") );
self->id_data[recruited.id()] = logData; self->id_data[recruited.id()] = logData;
logData->locality = req.locality; logData->locality = req.locality;
logData->recoveryCount = req.epoch; logData->recoveryCount = req.epoch;
@ -2220,7 +2219,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
throw logData->removed.getError(); throw logData->removed.getError();
} }
if (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned) { if (recovering) {
logData->unrecoveredBefore = req.startVersion; logData->unrecoveredBefore = req.startVersion;
logData->recoveredAt = req.recoverAt; logData->recoveredAt = req.recoverAt;
logData->knownCommittedVersion = req.startVersion - 1; logData->knownCommittedVersion = req.startVersion - 1;
@ -2326,13 +2325,11 @@ ACTOR Future<Void> startSpillingInTenSeconds(TLogData* self, UID tlogId, Referen
} }
// New tLog (if !recoverFrom.size()) or restore from network // New tLog (if !recoverFrom.size()) or restore from network
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog) { ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog) {
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder ); state TLogData self( tlogId, workerID, persistentData, persistentQueue, db, degraded, folder );
state Future<Void> error = actorCollection( self.sharedActors.getFuture() ); state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
TraceEvent("SharedTlog", tlogId); TraceEvent("SharedTlog", tlogId);
// FIXME: Pass the worker id instead of stubbing it
startRole(Role::SHARED_TRANSACTION_LOG, tlogId, UID());
try { try {
if(restoreFromDisk) { if(restoreFromDisk) {
wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) ); wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) );
@ -2373,7 +2370,6 @@ ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ
} catch (Error& e) { } catch (Error& e) {
self.terminated.send(Void()); self.terminated.send(Void());
TraceEvent("TLogError", tlogId).error(e, true); TraceEvent("TLogError", tlogId).error(e, true);
endRole(Role::SHARED_TRANSACTION_LOG, tlogId, "Error", true);
if(recovered.canBeSet()) recovered.send(Void()); if(recovered.canBeSet()) recovered.send(Void());
while(!tlogRequests.isEmpty()) { while(!tlogRequests.isEmpty()) {

View File

@ -306,6 +306,7 @@ struct TLogData : NonCopyable {
std::map<UID, Reference<struct LogData>> id_data; std::map<UID, Reference<struct LogData>> id_data;
UID dbgid; UID dbgid;
UID workerID;
IKeyValueStore* persistentData; // Durable data on disk that were spilled. IKeyValueStore* persistentData; // Durable data on disk that were spilled.
IDiskQueue* rawPersistentQueue; // The physical queue the persistentQueue below stores its data. Ideally, log interface should work without directly accessing rawPersistentQueue IDiskQueue* rawPersistentQueue; // The physical queue the persistentQueue below stores its data. Ideally, log interface should work without directly accessing rawPersistentQueue
@ -347,8 +348,8 @@ struct TLogData : NonCopyable {
// that came when ignorePopRequest was set // that came when ignorePopRequest was set
Reference<AsyncVar<bool>> degraded; Reference<AsyncVar<bool>> degraded;
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder) TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()), : dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)), persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0), dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
@ -511,7 +512,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
bool execOpCommitInProgress; bool execOpCommitInProgress;
int txsTags; int txsTags;
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()), explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, std::vector<Tag> tags, std::string context) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion), cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion),
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()), logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
minPoppedTagVersion(0), minPoppedTag(invalidTag), minPoppedTagVersion(0), minPoppedTag(invalidTag),
@ -519,7 +520,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0), recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false) logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
{ {
startRole(Role::TRANSACTION_LOG, interf.id(), UID()); startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, context);
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id); persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id); persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
@ -697,7 +698,7 @@ ACTOR Future<Void> updatePoppedLocation( TLogData* self, Reference<LogData> logD
// us to remove data that still is pointed to by SpilledData in the btree. // us to remove data that still is pointed to by SpilledData in the btree.
if (data->persistentPopped <= logData->persistentDataVersion) { if (data->persistentPopped <= logData->persistentDataVersion) {
// Recover the next needed location in the Disk Queue from the index. // Recover the next needed location in the Disk Queue from the index.
Standalone<VectorRef<KeyValueRef>> kvrefs = wait( Standalone<RangeResultRef> kvrefs = wait(
self->persistentData->readRange(KeyRangeRef( self->persistentData->readRange(KeyRangeRef(
persistTagMessageRefsKey(logData->logId, data->tag, data->persistentPopped), persistTagMessageRefsKey(logData->logId, data->tag, data->persistentPopped),
persistTagMessageRefsKey(logData->logId, data->tag, logData->persistentDataVersion + 1)), 1)); persistTagMessageRefsKey(logData->logId, data->tag, logData->persistentDataVersion + 1)), 1));
@ -1479,7 +1480,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
} }
if (req.tag.locality == tagLocalityTxs || req.tag == txsTag) { if (req.tag.locality == tagLocalityTxs || req.tag == txsTag) {
Standalone<VectorRef<KeyValueRef>> kvs = wait( Standalone<RangeResultRef> kvs = wait(
self->persistentData->readRange(KeyRangeRef( self->persistentData->readRange(KeyRangeRef(
persistTagMessagesKey(logData->logId, req.tag, req.begin), persistTagMessagesKey(logData->logId, req.tag, req.begin),
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES)); persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
@ -1498,7 +1499,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
} }
} else { } else {
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow. // FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
Standalone<VectorRef<KeyValueRef>> kvrefs = wait( Standalone<RangeResultRef> kvrefs = wait(
self->persistentData->readRange(KeyRangeRef( self->persistentData->readRange(KeyRangeRef(
persistTagMessageRefsKey(logData->logId, req.tag, req.begin), persistTagMessageRefsKey(logData->logId, req.tag, req.begin),
persistTagMessageRefsKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), persistTagMessageRefsKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)),
@ -1630,12 +1631,8 @@ ACTOR Future<Void> watchDegraded(TLogData* self) {
return Void(); return Void();
} }
//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
state int loopCount = 0;
while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {
wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskPriority::Low));
loopCount++;
}
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid); TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
TEST(true); //TLog degraded TEST(true); //TLog degraded
self->degraded->set(true); self->degraded->set(true);
@ -1876,7 +1873,7 @@ ACTOR Future<Void> rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC
if ( self->dbInfo->get().master.id() != lastMasterID) { if ( self->dbInfo->get().master.id() != lastMasterID) {
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface // The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
TLogRejoinRequest req(tli); TLogRejoinRequest req(tli);
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id()); TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id());
choose { choose {
when(TLogRejoinReply rep = when(TLogRejoinReply rep =
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) { wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
@ -2340,13 +2337,13 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
wait(storage->init()); wait(storage->init());
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key); state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
state Future<Optional<Value>> fRecoveryLocation = storage->readValue(persistRecoveryLocationKey); state Future<Optional<Value>> fRecoveryLocation = storage->readValue(persistRecoveryLocationKey);
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys); state Future<Standalone<RangeResultRef>> fVers = storage->readRange(persistCurrentVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys); state Future<Standalone<RangeResultRef>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys); state Future<Standalone<RangeResultRef>> fLocality = storage->readRange(persistLocalityKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys); state Future<Standalone<RangeResultRef>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fTxsTags = storage->readRange(persistTxsTagsKeys); state Future<Standalone<RangeResultRef>> fTxsTags = storage->readRange(persistTxsTagsKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys); state Future<Standalone<RangeResultRef>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fProtocolVersions = storage->readRange(persistProtocolVersionKeys); state Future<Standalone<RangeResultRef>> fProtocolVersions = storage->readRange(persistProtocolVersionKeys);
// FIXME: metadata in queue? // FIXME: metadata in queue?
@ -2365,7 +2362,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
} }
if (!fFormat.get().present()) { if (!fFormat.get().present()) {
Standalone<VectorRef<KeyValueRef>> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) ); Standalone<RangeResultRef> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
if (!v.size()) { if (!v.size()) {
TEST(true); // The DB is completely empty, so it was never initialized. Delete it. TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
throw worker_removed(); throw worker_removed();
@ -2431,7 +2428,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
ProtocolVersion protocolVersion = BinaryReader::fromStringRef<ProtocolVersion>( fProtocolVersions.get()[idx].value, Unversioned() ); ProtocolVersion protocolVersion = BinaryReader::fromStringRef<ProtocolVersion>( fProtocolVersions.get()[idx].value, Unversioned() );
//We do not need the remoteTag, because we will not be loading any additional data //We do not need the remoteTag, because we will not be loading any additional data
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, std::vector<Tag>()) ); logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, std::vector<Tag>(), "Restored") );
logData->locality = id_locality[id1]; logData->locality = id_locality[id1];
logData->stopped = true; logData->stopped = true;
self->id_data[id1] = logData; self->id_data[id1] = logData;
@ -2453,7 +2450,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) ); tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
loop { loop {
if(logData->removed.isReady()) break; if(logData->removed.isReady()) break;
Standalone<VectorRef<KeyValueRef>> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) ); Standalone<RangeResultRef> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
if (!data.size()) break; if (!data.size()) break;
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end ); ((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
@ -2635,7 +2632,9 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
it.second->stopCommit.trigger(); it.second->stopCommit.trigger();
} }
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.allTags) ); bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.allTags, recovering ? "Recovered" : "Recruited") );
self->id_data[recruited.id()] = logData; self->id_data[recruited.id()] = logData;
logData->locality = req.locality; logData->locality = req.locality;
logData->recoveryCount = req.epoch; logData->recoveryCount = req.epoch;
@ -2652,7 +2651,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
throw logData->removed.getError(); throw logData->removed.getError();
} }
if (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned) { if (recovering) {
logData->unrecoveredBefore = req.startVersion; logData->unrecoveredBefore = req.startVersion;
logData->recoveredAt = req.recoverAt; logData->recoveredAt = req.recoverAt;
logData->knownCommittedVersion = req.startVersion - 1; logData->knownCommittedVersion = req.startVersion - 1;
@ -2760,13 +2759,11 @@ ACTOR Future<Void> startSpillingInTenSeconds(TLogData* self, UID tlogId, Referen
} }
// New tLog (if !recoverFrom.size()) or restore from network // New tLog (if !recoverFrom.size()) or restore from network
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog ) { ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog ) {
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder ); state TLogData self( tlogId, workerID, persistentData, persistentQueue, db, degraded, folder );
state Future<Void> error = actorCollection( self.sharedActors.getFuture() ); state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
TraceEvent("SharedTlog", tlogId); TraceEvent("SharedTlog", tlogId);
// FIXME: Pass the worker id instead of stubbing it
startRole(Role::SHARED_TRANSACTION_LOG, tlogId, UID());
try { try {
if(restoreFromDisk) { if(restoreFromDisk) {
wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) ); wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) );
@ -2805,7 +2802,6 @@ ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ
} catch (Error& e) { } catch (Error& e) {
self.terminated.send(Void()); self.terminated.send(Void());
TraceEvent("TLogError", tlogId).error(e, true); TraceEvent("TLogError", tlogId).error(e, true);
endRole(Role::SHARED_TRANSACTION_LOG, tlogId, "Error", true);
if(recovered.canBeSet()) recovered.send(Void()); if(recovered.canBeSet()) recovered.send(Void());
while(!tlogRequests.isEmpty()) { while(!tlogRequests.isEmpty()) {

View File

@ -390,14 +390,14 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
limitReason_t ssLimitReason = limitReason_t::unlimited; limitReason_t ssLimitReason = limitReason_t::unlimited;
int64_t minFreeSpace = std::max(SERVER_KNOBS->MIN_FREE_SPACE, (int64_t)(SERVER_KNOBS->MIN_FREE_SPACE_RATIO * ss.smoothTotalSpace.smoothTotal())); int64_t minFreeSpace = std::max(SERVER_KNOBS->MIN_AVAILABLE_SPACE, (int64_t)(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO * ss.smoothTotalSpace.smoothTotal()));
worstFreeSpaceStorageServer = std::min(worstFreeSpaceStorageServer, (int64_t)ss.smoothFreeSpace.smoothTotal() - minFreeSpace); worstFreeSpaceStorageServer = std::min(worstFreeSpaceStorageServer, (int64_t)ss.smoothFreeSpace.smoothTotal() - minFreeSpace);
int64_t springBytes = std::max<int64_t>(1, std::min<int64_t>(limits->storageSpringBytes, (ss.smoothFreeSpace.smoothTotal() - minFreeSpace) * 0.2)); int64_t springBytes = std::max<int64_t>(1, std::min<int64_t>(limits->storageSpringBytes, (ss.smoothFreeSpace.smoothTotal() - minFreeSpace) * 0.2));
int64_t targetBytes = std::max<int64_t>(1, std::min(limits->storageTargetBytes, (int64_t)ss.smoothFreeSpace.smoothTotal() - minFreeSpace)); int64_t targetBytes = std::max<int64_t>(1, std::min(limits->storageTargetBytes, (int64_t)ss.smoothFreeSpace.smoothTotal() - minFreeSpace));
if (targetBytes != limits->storageTargetBytes) { if (targetBytes != limits->storageTargetBytes) {
if (minFreeSpace == SERVER_KNOBS->MIN_FREE_SPACE) { if (minFreeSpace == SERVER_KNOBS->MIN_AVAILABLE_SPACE) {
ssLimitReason = limitReason_t::storage_server_min_free_space; ssLimitReason = limitReason_t::storage_server_min_free_space;
} else { } else {
ssLimitReason = limitReason_t::storage_server_min_free_space_ratio; ssLimitReason = limitReason_t::storage_server_min_free_space_ratio;
@ -574,14 +574,14 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
limitReason_t tlogLimitReason = limitReason_t::log_server_write_queue; limitReason_t tlogLimitReason = limitReason_t::log_server_write_queue;
int64_t minFreeSpace = std::max( SERVER_KNOBS->MIN_FREE_SPACE, (int64_t)(SERVER_KNOBS->MIN_FREE_SPACE_RATIO * tl.smoothTotalSpace.smoothTotal())); int64_t minFreeSpace = std::max( SERVER_KNOBS->MIN_AVAILABLE_SPACE, (int64_t)(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO * tl.smoothTotalSpace.smoothTotal()));
worstFreeSpaceTLog = std::min(worstFreeSpaceTLog, (int64_t)tl.smoothFreeSpace.smoothTotal() - minFreeSpace); worstFreeSpaceTLog = std::min(worstFreeSpaceTLog, (int64_t)tl.smoothFreeSpace.smoothTotal() - minFreeSpace);
int64_t springBytes = std::max<int64_t>(1, std::min<int64_t>(limits->logSpringBytes, (tl.smoothFreeSpace.smoothTotal() - minFreeSpace) * 0.2)); int64_t springBytes = std::max<int64_t>(1, std::min<int64_t>(limits->logSpringBytes, (tl.smoothFreeSpace.smoothTotal() - minFreeSpace) * 0.2));
int64_t targetBytes = std::max<int64_t>(1, std::min(limits->logTargetBytes, (int64_t)tl.smoothFreeSpace.smoothTotal() - minFreeSpace)); int64_t targetBytes = std::max<int64_t>(1, std::min(limits->logTargetBytes, (int64_t)tl.smoothFreeSpace.smoothTotal() - minFreeSpace));
if (targetBytes != limits->logTargetBytes) { if (targetBytes != limits->logTargetBytes) {
if (minFreeSpace == SERVER_KNOBS->MIN_FREE_SPACE) { if (minFreeSpace == SERVER_KNOBS->MIN_AVAILABLE_SPACE) {
tlogLimitReason = limitReason_t::log_server_min_free_space; tlogLimitReason = limitReason_t::log_server_min_free_space;
} else { } else {
tlogLimitReason = limitReason_t::log_server_min_free_space_ratio; tlogLimitReason = limitReason_t::log_server_min_free_space_ratio;

View File

@ -29,7 +29,6 @@
#include "fdbserver/CoordinationInterface.h" #include "fdbserver/CoordinationInterface.h"
#include "fdbmonitor/SimpleIni.h" #include "fdbmonitor/SimpleIni.h"
#include "fdbrpc/AsyncFileNonDurable.actor.h" #include "fdbrpc/AsyncFileNonDurable.actor.h"
#include "fdbrpc/TLSConnection.h"
#include "fdbclient/ManagementAPI.actor.h" #include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/NativeAPI.actor.h" #include "fdbclient/NativeAPI.actor.h"
#include "fdbclient/BackupAgent.actor.h" #include "fdbclient/BackupAgent.actor.h"
@ -48,60 +47,6 @@ const int MACHINE_REBOOT_TIME = 10;
bool destructed = false; bool destructed = false;
static const char* certBytes =
"-----BEGIN CERTIFICATE-----\n"
"MIIEGzCCAwOgAwIBAgIJANUQj1rRA2XMMA0GCSqGSIb3DQEBBQUAMIGjMQswCQYD\n"
"VQQGEwJVUzELMAkGA1UECAwCVkExDzANBgNVBAcMBlZpZW5uYTEaMBgGA1UECgwR\n"
"Rm91bmRhdGlvbkRCLCBMTEMxGTAXBgNVBAsMEFRlc3QgZW5naW5lZXJpbmcxFTAT\n"
"BgNVBAMMDE1yLiBCaWcgVHVuYTEoMCYGCSqGSIb3DQEJARYZYmlnLnR1bmFAZm91\n"
"bmRhdGlvbmRiLmNvbTAeFw0xNDEyMDUxNTEyMjFaFw0yNDEyMDIxNTEyMjFaMIGj\n"
"MQswCQYDVQQGEwJVUzELMAkGA1UECAwCVkExDzANBgNVBAcMBlZpZW5uYTEaMBgG\n"
"A1UECgwRRm91bmRhdGlvbkRCLCBMTEMxGTAXBgNVBAsMEFRlc3QgZW5naW5lZXJp\n"
"bmcxFTATBgNVBAMMDE1yLiBCaWcgVHVuYTEoMCYGCSqGSIb3DQEJARYZYmlnLnR1\n"
"bmFAZm91bmRhdGlvbmRiLmNvbTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC\n"
"ggEBAKZTL2edDkiet4HBTZnjysn6gOVZH2MP02KVBIv/H7e+3w7ZOIRvcPzhZe9M\n"
"3cGH1t/pkr9DSXvzIb42EffMVlpLD2VQn2H8VC2QSdJCIQcf802u+Taf+XtW6K1h\n"
"p/YPL1uhdopUs3c1oon8ykKwnOfrQYgv5pUa7jQdMkltI2MQJU3uFq3Z/LHTvIKe\n"
"FN+bqK0iYhZthwMG7Rld4+RgKZoT4u1B6w/duEWk9KLjgs7fTf3Oe6JHCYNqwBJi\n"
"78sJalwXz9Wf8wmMaYSG0XNA7vBOdpTFhVPSsh6e3rkydf5HydMade/II98MWpMe\n"
"hFg7FFMaJP6ig8p5iL+9QP2VMCkCAwEAAaNQME4wHQYDVR0OBBYEFIXGmIcKptBP\n"
"v3i9WS/mK78o5E/MMB8GA1UdIwQYMBaAFIXGmIcKptBPv3i9WS/mK78o5E/MMAwG\n"
"A1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADggEBAJkVgNGOXT+ZHCNEYLjr/6OM\n"
"UCHvwlMeaEyqxaOmK26J2kAADPhjBZ7lZOHWb2Wzb+BiQUIFGwNIMoRvsg8skpJa\n"
"OCqpVciHVXY/U8BiYY70DKozRza93Ab9om3pySGDJ/akdCjqbMT1Cb7Kloyw+hNh\n"
"XD4MML0lYiUE9KK35xyK6FgTx4A7IXl4b3lWBgglqTh4+P5J1+xy8AYJ0VfPoP7y\n"
"OoZgwAmkpkMnalReNkN7LALHGqMzv/qH04ODlkU/HUGgExtnINMxK9VEDIe/yLGm\n"
"DHy7gcQMj5Hyymack/d4ZF8CSrYpGZQeZGXoxOmTDwWcXgnYA+2o7lOYPb5Uu08=\n"
"-----END CERTIFICATE-----\n"
"-----BEGIN PRIVATE KEY-----\n"
"MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQCmUy9nnQ5InreB\n"
"wU2Z48rJ+oDlWR9jD9NilQSL/x+3vt8O2TiEb3D84WXvTN3Bh9bf6ZK/Q0l78yG+\n"
"NhH3zFZaSw9lUJ9h/FQtkEnSQiEHH/NNrvk2n/l7VuitYaf2Dy9boXaKVLN3NaKJ\n"
"/MpCsJzn60GIL+aVGu40HTJJbSNjECVN7hat2fyx07yCnhTfm6itImIWbYcDBu0Z\n"
"XePkYCmaE+LtQesP3bhFpPSi44LO3039znuiRwmDasASYu/LCWpcF8/Vn/MJjGmE\n"
"htFzQO7wTnaUxYVT0rIent65MnX+R8nTGnXvyCPfDFqTHoRYOxRTGiT+ooPKeYi/\n"
"vUD9lTApAgMBAAECggEBAIYCmDtfq9aPK0P8v82yX/4FPD2OZV+nrKXNc3BpCuE9\n"
"hPOtyX/LWrol0b/Rqwr3rAWVaIt6Z4bbCuD7J9cEaL8voyP6pbCJYjmj/BbQ+VOI\n"
"Rrzcsid1Fcpu5+JqwK3c5kdp/NzQChmOuXt8lmrNal7iilZ0YdDZdfu/WnkW2mBB\n"
"oQHkujlnWr4PNYdwMOnBU6TwdOuz+inPVMLohOO0Vr585OxPsGzG2Ud3yQ/t34Cq\n"
"F9nmOXQoszftGKsL1yuh/3fGj/O86g/CRsUy05qZhDDBEYQD6qZCvD5+yp8oOWIR\n"
"SljM3GXDBnJqRPhP+Nyf6e6/GoQtfVZ9MPRzDDPzIBECgYEA2kX/zAs6taOiNqCb\n"
"6nVGe7/3uQJz/CkmOSKIFKUu7lCEUjmMYpK3Xzp26RTUR9cT+g9y+cnJO1Vbaxtf\n"
"Qidje6K+Oi1pQyUGQ6W+U8cPJHz43PVa7IB5Az5i/sS2tu0BGhvGo9G6iYQjxXeD\n"
"1197DRACgnm5AORQMum616XvSPMCgYEAwxKbkAzJzfZF6A3Ys+/0kycNfDP8xZoC\n"
"1zV3d1b2JncsdAPCHYSKtpniRrQN9ASa3RMdkh+wrMN/KlbtU9Ddoc4NHxSTFV7F\n"
"wypFMzLZslqkQ6uHnVVewHV7prfoKsMci2c9iHO7W8TEv4aqW8XDd8OozP3/q2j4\n"
"hvL7VIAVqXMCgYEAwAFnfOQ75uBkp00tGlfDgsRhc5vWz3CbMRNRRWfxGq41V+dL\n"
"uMJ7EAfr5ijue6uU5RmF+HkqzUjOvC894oGnn3CPibm8qNX+5q7799JZXa2ZdTVX\n"
"oEd7LAFLL/V3DP77Qy4/1Id/Ycydcu0pSuGw6tK0gnX06fXtHnxAYcaT8UUCgYAE\n"
"MytcP5o8r/ezVlD7Fsh6PpYAvZHMo1M6VPFchWfJTjmLyeTtA8SEx+1iPlAql8rJ\n"
"xbaWRc5k+dSMEdEMQ+vxpuELcUL1a9PwLsHMp2SefWsZ9eB2l7bxh9YAsebyvL6p\n"
"lbBydqNrB2KBCSIz1Z8uveytdS6C/0CSjzqwCA3vVwKBgQDAXqjo3xrzMlHeXm5o\n"
"qH/OjajjqbnPXHolHDitbLubyQ4E6KhMBMxfChBe/8VptB/Gs0efVbMVGuabxY7Q\n"
"iastGId8HyONy3UPGPxCn4b95cIxKvdpt+hvWtYHIBCfHXluQK7zsDMgvtXjYNiz\n"
"peZRikYlwmu1K2YRTf7oLE2Ogw==\n"
"-----END PRIVATE KEY-----\n";
template <class T> template <class T>
T simulate( const T& in ) { T simulate( const T& in ) {
BinaryWriter writer(AssumeVersion(currentProtocolVersion)); BinaryWriter writer(AssumeVersion(currentProtocolVersion));
@ -112,13 +57,6 @@ T simulate( const T& in ) {
return out; return out;
} }
static void simInitTLS(Reference<TLSOptions> tlsOptions) {
tlsOptions->set_cert_data( certBytes );
tlsOptions->set_key_data( certBytes );
tlsOptions->set_verify_peers(std::vector<std::string>(1, "Check.Valid=0"));
tlsOptions->register_network();
}
ACTOR Future<Void> runBackup( Reference<ClusterConnectionFile> connFile ) { ACTOR Future<Void> runBackup( Reference<ClusterConnectionFile> connFile ) {
state std::vector<Future<Void>> agentFutures; state std::vector<Future<Void>> agentFutures;
@ -195,7 +133,7 @@ enum AgentMode {
// a loop{} will be needed around the waiting on simulatedFDBD(). For now this simply // a loop{} will be needed around the waiting on simulatedFDBD(). For now this simply
// takes care of house-keeping such as context switching and file closing. // takes care of house-keeping such as context switching and file closing.
ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnectionFile> connFile, IPAddress ip, ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnectionFile> connFile, IPAddress ip,
bool sslEnabled, Reference<TLSOptions> tlsOptions, bool sslEnabled,
uint16_t port, uint16_t listenPerProcess, uint16_t port, uint16_t listenPerProcess,
LocalityData localities, ProcessClass processClass, LocalityData localities, ProcessClass processClass,
std::string* dataFolder, std::string* coordFolder, std::string* dataFolder, std::string* coordFolder,
@ -217,7 +155,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
wait( delay( waitTime ) ); wait( delay( waitTime ) );
state ISimulator::ProcessInfo* process = state ISimulator::ProcessInfo* process =
g_simulator.newProcess("Server", ip, port, listenPerProcess, localities, processClass, dataFolder->c_str(), g_simulator.newProcess("Server", ip, port, sslEnabled, listenPerProcess, localities, processClass, dataFolder->c_str(),
coordFolder->c_str()); coordFolder->c_str());
wait(g_simulator.onProcess(process, wait(g_simulator.onProcess(process,
TaskPriority::DefaultYield)); // Now switch execution to the process on which we will run TaskPriority::DefaultYield)); // Now switch execution to the process on which we will run
@ -246,9 +184,6 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(Reference<ClusterConnec
//SOMEDAY: test lower memory limits, without making them too small and causing the database to stop making progress //SOMEDAY: test lower memory limits, without making them too small and causing the database to stop making progress
FlowTransport::createInstance(processClass == ProcessClass::TesterClass || runBackupAgents == AgentOnly, 1); FlowTransport::createInstance(processClass == ProcessClass::TesterClass || runBackupAgents == AgentOnly, 1);
Sim2FileSystem::newFileSystem(); Sim2FileSystem::newFileSystem();
if (sslEnabled) {
tlsOptions->register_network();
}
vector<Future<Void>> futures; vector<Future<Void>> futures;
for (int listenPort = port; listenPort < port + listenPerProcess; ++listenPort) { for (int listenPort = port; listenPort < port + listenPerProcess; ++listenPort) {
@ -362,8 +297,7 @@ std::string describe(int const& val) {
// Since a datacenter kill is considered to be the same as killing a machine, files cannot be swapped across datacenters // Since a datacenter kill is considered to be the same as killing a machine, files cannot be swapped across datacenters
std::map< Optional<Standalone<StringRef>>, std::vector< std::vector< std::string > > > availableFolders; std::map< Optional<Standalone<StringRef>>, std::vector< std::vector< std::string > > > availableFolders;
// process count is no longer needed because it is now the length of the vector of ip's, because it was one ip per process // process count is no longer needed because it is now the length of the vector of ip's, because it was one ip per process
ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector<IPAddress> ips, bool sslEnabled, ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector<IPAddress> ips, bool sslEnabled, LocalityData localities,
Reference<TLSOptions> tlsOptions, LocalityData localities,
ProcessClass processClass, std::string baseFolder, bool restarting, ProcessClass processClass, std::string baseFolder, bool restarting,
bool useSeedFile, AgentMode runBackupAgents, bool sslOnly, std::string whitelistBinPaths) { bool useSeedFile, AgentMode runBackupAgents, bool sslOnly, std::string whitelistBinPaths) {
state int bootCount = 0; state int bootCount = 0;
@ -408,7 +342,7 @@ ACTOR Future<Void> simulatedMachine(ClusterConnectionString connStr, std::vector
Reference<ClusterConnectionFile> clusterFile(useSeedFile ? new ClusterConnectionFile(path, connStr.toString()) : new ClusterConnectionFile(path)); Reference<ClusterConnectionFile> clusterFile(useSeedFile ? new ClusterConnectionFile(path, connStr.toString()) : new ClusterConnectionFile(path));
const int listenPort = i*listenPerProcess + 1; const int listenPort = i*listenPerProcess + 1;
AgentMode agentMode = runBackupAgents == AgentOnly ? ( i == ips.size()-1 ? AgentOnly : AgentNone ) : runBackupAgents; AgentMode agentMode = runBackupAgents == AgentOnly ? ( i == ips.size()-1 ? AgentOnly : AgentNone ) : runBackupAgents;
processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, tlsOptions, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, agentMode, whitelistBinPaths)); processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, listenPort, listenPerProcess, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, agentMode, whitelistBinPaths));
TraceEvent("SimulatedMachineProcess", randomId).detail("Address", NetworkAddress(ips[i], listenPort, true, false)).detail("ZoneId", localities.zoneId()).detail("DataHall", localities.dataHallId()).detail("Folder", myFolders[i]); TraceEvent("SimulatedMachineProcess", randomId).detail("Address", NetworkAddress(ips[i], listenPort, true, false)).detail("ZoneId", localities.zoneId()).detail("DataHall", localities.dataHallId()).detail("Folder", myFolders[i]);
} }
@ -613,7 +547,7 @@ IPAddress makeIPAddressForSim(bool isIPv6, std::array<int, 4> parts) {
ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount, ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount,
Optional<ClusterConnectionString>* pConnString, Optional<ClusterConnectionString>* pConnString,
Standalone<StringRef>* pStartingConfiguration, Standalone<StringRef>* pStartingConfiguration,
Reference<TLSOptions> tlsOptions, int extraDB, std::string whitelistBinPaths) { int extraDB, std::string whitelistBinPaths) {
CSimpleIni ini; CSimpleIni ini;
ini.SetUnicode(); ini.SetUnicode();
ini.LoadFile(joinPath(baseFolder, "restartInfo.ini").c_str()); ini.LoadFile(joinPath(baseFolder, "restartInfo.ini").c_str());
@ -709,7 +643,7 @@ ACTOR Future<Void> restartSimulatedSystem(vector<Future<Void>>* systemActors, st
// SOMEDAY: parse backup agent from test file // SOMEDAY: parse backup agent from test file
systemActors->push_back(reportErrors( systemActors->push_back(reportErrors(
simulatedMachine(conn, ipAddrs, usingSSL, tlsOptions, localities, processClass, baseFolder, true, simulatedMachine(conn, ipAddrs, usingSSL, localities, processClass, baseFolder, true,
i == useSeedForMachine, enableExtraDB ? AgentAddition : AgentNone, i == useSeedForMachine, enableExtraDB ? AgentAddition : AgentNone,
usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass), whitelistBinPaths), usingSSL && (listenersPerProcess == 1 || processClass == ProcessClass::TesterClass), whitelistBinPaths),
processClass == ProcessClass::TesterClass ? "SimulatedTesterMachine" : "SimulatedMachine")); processClass == ProcessClass::TesterClass ? "SimulatedTesterMachine" : "SimulatedMachine"));
@ -1108,12 +1042,14 @@ void SimulationConfig::generateNormalConfig(int minimumReplication, int minimumR
void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount, void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFolder, int* pTesterCount,
Optional<ClusterConnectionString>* pConnString, Standalone<StringRef>* pStartingConfiguration, Optional<ClusterConnectionString>* pConnString, Standalone<StringRef>* pStartingConfiguration,
int extraDB, int minimumReplication, int minimumRegions, Reference<TLSOptions> tlsOptions, int extraDB, int minimumReplication, int minimumRegions, std::string whitelistBinPaths, bool configureLocked) {
std::string whitelistBinPaths) {
// SOMEDAY: this does not test multi-interface configurations // SOMEDAY: this does not test multi-interface configurations
SimulationConfig simconfig(extraDB, minimumReplication, minimumRegions); SimulationConfig simconfig(extraDB, minimumReplication, minimumRegions);
StatusObject startingConfigJSON = simconfig.db.toJSON(true); StatusObject startingConfigJSON = simconfig.db.toJSON(true);
std::string startingConfigString = "new"; std::string startingConfigString = "new";
if (configureLocked) {
startingConfigString += " locked";
}
for( auto kv : startingConfigJSON) { for( auto kv : startingConfigJSON) {
startingConfigString += " "; startingConfigString += " ";
if( kv.second.type() == json_spirit::int_type ) { if( kv.second.type() == json_spirit::int_type ) {
@ -1180,7 +1116,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
bool assignClasses = machineCount - dataCenters > 4 && deterministicRandom()->random01() < 0.5; bool assignClasses = machineCount - dataCenters > 4 && deterministicRandom()->random01() < 0.5;
// Use SSL 5% of the time // Use SSL 5% of the time
bool sslEnabled = deterministicRandom()->random01() < 0.10 && tlsOptions->enabled(); bool sslEnabled = deterministicRandom()->random01() < 0.10;
bool sslOnly = sslEnabled && deterministicRandom()->coinflip(); bool sslOnly = sslEnabled && deterministicRandom()->coinflip();
g_simulator.listenersPerProcess = sslEnabled && !sslOnly ? 2 : 1; g_simulator.listenersPerProcess = sslEnabled && !sslOnly ? 2 : 1;
TEST( sslEnabled ); // SSL enabled TEST( sslEnabled ); // SSL enabled
@ -1239,9 +1175,9 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
.detail("Address", coordinatorAddresses[i]) .detail("Address", coordinatorAddresses[i])
.detail("Coordinators", describe(coordinatorAddresses)); .detail("Coordinators", describe(coordinatorAddresses));
g_simulator.protectedAddresses.insert( g_simulator.protectedAddresses.insert(
NetworkAddress(coordinatorAddresses[i].ip, coordinatorAddresses[i].port, true, false)); NetworkAddress(coordinatorAddresses[i].ip, coordinatorAddresses[i].port, true, coordinatorAddresses[i].isTLS()));
if(coordinatorAddresses[i].port==2) { if(coordinatorAddresses[i].port==2) {
g_simulator.protectedAddresses.insert(NetworkAddress(coordinatorAddresses[i].ip, 1, true, false)); g_simulator.protectedAddresses.insert(NetworkAddress(coordinatorAddresses[i].ip, 1, true, true));
} }
} }
deterministicRandom()->randomShuffle(coordinatorAddresses); deterministicRandom()->randomShuffle(coordinatorAddresses);
@ -1324,7 +1260,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
// check the sslEnablementMap using only one ip( // check the sslEnablementMap using only one ip(
LocalityData localities(Optional<Standalone<StringRef>>(), zoneId, machineId, dcUID); LocalityData localities(Optional<Standalone<StringRef>>(), zoneId, machineId, dcUID);
localities.set(LiteralStringRef("data_hall"), dcUID); localities.set(LiteralStringRef("data_hall"), dcUID);
systemActors->push_back(reportErrors(simulatedMachine(conn, ips, sslEnabled, tlsOptions, systemActors->push_back(reportErrors(simulatedMachine(conn, ips, sslEnabled,
localities, processClass, baseFolder, false, machine == useSeedForMachine, requiresExtraDBMachines ? AgentOnly : AgentAddition, sslOnly, whitelistBinPaths ), "SimulatedMachine")); localities, processClass, baseFolder, false, machine == useSeedForMachine, requiresExtraDBMachines ? AgentOnly : AgentAddition, sslOnly, whitelistBinPaths ), "SimulatedMachine"));
if (requiresExtraDBMachines) { if (requiresExtraDBMachines) {
@ -1337,7 +1273,7 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newMachineId, dcUID); LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newMachineId, dcUID);
localities.set(LiteralStringRef("data_hall"), dcUID); localities.set(LiteralStringRef("data_hall"), dcUID);
systemActors->push_back(reportErrors(simulatedMachine(*g_simulator.extraDB, extraIps, sslEnabled, tlsOptions, systemActors->push_back(reportErrors(simulatedMachine(*g_simulator.extraDB, extraIps, sslEnabled,
localities, localities,
processClass, baseFolder, false, machine == useSeedForMachine, AgentNone, sslOnly, whitelistBinPaths ), "SimulatedMachine")); processClass, baseFolder, false, machine == useSeedForMachine, AgentNone, sslOnly, whitelistBinPaths ), "SimulatedMachine"));
} }
@ -1365,9 +1301,9 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
Standalone<StringRef> newZoneId = Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()); Standalone<StringRef> newZoneId = Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString());
LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newZoneId, Optional<Standalone<StringRef>>()); LocalityData localities(Optional<Standalone<StringRef>>(), newZoneId, newZoneId, Optional<Standalone<StringRef>>());
systemActors->push_back( reportErrors( simulatedMachine( systemActors->push_back( reportErrors( simulatedMachine(
conn, ips, sslEnabled, tlsOptions, conn, ips, sslEnabled && sslOnly,
localities, ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource), localities, ProcessClass(ProcessClass::TesterClass, ProcessClass::CommandLineSource),
baseFolder, false, i == useSeedForMachine, AgentNone, sslEnabled, whitelistBinPaths ), baseFolder, false, i == useSeedForMachine, AgentNone, sslEnabled && sslOnly, whitelistBinPaths ),
"SimulatedTesterMachine") ); "SimulatedTesterMachine") );
} }
*pStartingConfiguration = startingConfigString; *pStartingConfiguration = startingConfigString;
@ -1386,7 +1322,8 @@ void setupSimulatedSystem(vector<Future<Void>>* systemActors, std::string baseFo
.detail("StartingConfiguration", pStartingConfiguration->toString()); .detail("StartingConfiguration", pStartingConfiguration->toString());
} }
void checkExtraDB(const char *testFile, int &extraDB, int &minimumReplication, int &minimumRegions) { void checkTestConf(const char* testFile, int& extraDB, int& minimumReplication, int& minimumRegions,
int& configureLocked) {
std::ifstream ifs; std::ifstream ifs;
ifs.open(testFile, std::ifstream::in); ifs.open(testFile, std::ifstream::in);
if (!ifs.good()) if (!ifs.good())
@ -1418,12 +1355,16 @@ void checkExtraDB(const char *testFile, int &extraDB, int &minimumReplication, i
if (attrib == "minimumRegions") { if (attrib == "minimumRegions") {
sscanf( value.c_str(), "%d", &minimumRegions ); sscanf( value.c_str(), "%d", &minimumRegions );
} }
if (attrib == "configureLocked") {
sscanf(value.c_str(), "%d", &configureLocked);
}
} }
ifs.close(); ifs.close();
} }
ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, bool restoring, std::string whitelistBinPaths, Reference<TLSOptions> tlsOptions) { ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool rebooting, bool restoring, std::string whitelistBinPaths) {
state vector<Future<Void>> systemActors; state vector<Future<Void>> systemActors;
state Optional<ClusterConnectionString> connFile; state Optional<ClusterConnectionString> connFile;
state Standalone<StringRef> startingConfiguration; state Standalone<StringRef> startingConfiguration;
@ -1431,11 +1372,12 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
state int extraDB = 0; state int extraDB = 0;
state int minimumReplication = 0; state int minimumReplication = 0;
state int minimumRegions = 0; state int minimumRegions = 0;
checkExtraDB(testFile, extraDB, minimumReplication, minimumRegions); state int configureLocked = 0;
checkTestConf(testFile, extraDB, minimumReplication, minimumRegions, configureLocked);
// TODO (IPv6) Use IPv6? // TODO (IPv6) Use IPv6?
wait(g_simulator.onProcess( wait(g_simulator.onProcess(
g_simulator.newProcess("TestSystem", IPAddress(0x01010101), 1, 1, g_simulator.newProcess("TestSystem", IPAddress(0x01010101), 1, false, 1,
LocalityData(Optional<Standalone<StringRef>>(), LocalityData(Optional<Standalone<StringRef>>(),
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()), Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()), Standalone<StringRef>(deterministicRandom()->randomUniqueID().toString()),
@ -1444,16 +1386,12 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
TaskPriority::DefaultYield)); TaskPriority::DefaultYield));
Sim2FileSystem::newFileSystem(); Sim2FileSystem::newFileSystem();
FlowTransport::createInstance(true, 1); FlowTransport::createInstance(true, 1);
if (tlsOptions->enabled()) {
simInitTLS(tlsOptions);
}
TEST(true); // Simulation start TEST(true); // Simulation start
try { try {
//systemActors.push_back( startSystemMonitor(dataFolder) ); //systemActors.push_back( startSystemMonitor(dataFolder) );
if (rebooting) { if (rebooting) {
wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, tlsOptions, extraDB, whitelistBinPaths), 100.0 ) ); wait( timeoutError( restartSimulatedSystem( &systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB, whitelistBinPaths), 100.0 ) );
// FIXME: snapshot restore does not support multi-region restore, hence restore it as single region always // FIXME: snapshot restore does not support multi-region restore, hence restore it as single region always
if (restoring) { if (restoring) {
startingConfiguration = LiteralStringRef("usable_regions=1"); startingConfiguration = LiteralStringRef("usable_regions=1");
@ -1462,7 +1400,7 @@ ACTOR void setupAndRun(std::string dataFolder, const char *testFile, bool reboot
else { else {
g_expect_full_pointermap = 1; g_expect_full_pointermap = 1;
setupSimulatedSystem(&systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB, setupSimulatedSystem(&systemActors, dataFolder, &testerCount, &connFile, &startingConfiguration, extraDB,
minimumReplication, minimumRegions, tlsOptions, whitelistBinPaths); minimumReplication, minimumRegions, whitelistBinPaths, configureLocked);
wait( delay(1.0) ); // FIXME: WHY!!! //wait for machines to boot wait( delay(1.0) ); // FIXME: WHY!!! //wait for machines to boot
} }
std::string clusterFileDir = joinPath( dataFolder, deterministicRandom()->randomUniqueID().toString() ); std::string clusterFileDir = joinPath( dataFolder, deterministicRandom()->randomUniqueID().toString() );

View File

@ -18,12 +18,10 @@
* limitations under the License. * limitations under the License.
*/ */
#include "fdbrpc/TLSConnection.h"
#ifndef FDBSERVER_SIMULATEDCLUSTER_H #ifndef FDBSERVER_SIMULATEDCLUSTER_H
#define FDBSERVER_SIMULATEDCLUSTER_H #define FDBSERVER_SIMULATEDCLUSTER_H
#pragma once #pragma once
void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, bool const& restoring, std::string const& whitelistBinPath, Reference<TLSOptions> const& useSSL); void setupAndRun(std::string const& dataFolder, const char* const& testFile, bool const& rebooting, bool const& restoring, std::string const& whitelistBinPath);
#endif #endif

View File

@ -287,14 +287,10 @@ private:
int nPointers, valueLength; int nPointers, valueLength;
}; };
static force_inline bool less(const uint8_t* a, int aLen, const uint8_t* b, int bLen) { static force_inline bool less( const uint8_t* a, int aLen, const uint8_t* b, int bLen ) {
int len = min(aLen, bLen); int c = memcmp(a,b,min(aLen,bLen));
for (int i = 0; i < len; i++) if (c<0) return true;
if (a[i] < b[i]) if (c>0) return false;
return true;
else if (a[i] > b[i])
return false;
return aLen < bLen; return aLen < bLen;
} }

View File

@ -1547,17 +1547,9 @@ ACTOR static Future<vector<std::pair<TLogInterface, EventMap>>> getTLogsAndMetri
return results; return results;
} }
ACTOR static Future<vector<std::pair<MasterProxyInterface, EventMap>>> getProxiesAndMetrics(Database cx, std::unordered_map<NetworkAddress, WorkerInterface> address_workers) { ACTOR static Future<vector<std::pair<MasterProxyInterface, EventMap>>> getProxiesAndMetrics(Reference<AsyncVar<CachedSerialization<ServerDBInfo>>> db, std::unordered_map<NetworkAddress, WorkerInterface> address_workers) {
Reference<ProxyInfo> proxyInfo = cx->getMasterProxies(false);
std::vector<MasterProxyInterface> servers;
if(proxyInfo) {
for(int i = 0; i < proxyInfo->size(); ++i) {
servers.push_back(proxyInfo->getInterface(i));
}
}
vector<std::pair<MasterProxyInterface, EventMap>> results = wait(getServerMetrics( vector<std::pair<MasterProxyInterface, EventMap>> results = wait(getServerMetrics(
servers, address_workers, std::vector<std::string>{ "GRVLatencyMetrics", "CommitLatencyMetrics" })); db->get().read().client.proxies, address_workers, std::vector<std::string>{ "GRVLatencyMetrics", "CommitLatencyMetrics" }));
return results; return results;
} }
@ -2313,7 +2305,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
state Future<ErrorOr<vector<std::pair<StorageServerInterface, EventMap>>>> storageServerFuture = errorOr(getStorageServersAndMetrics(cx, address_workers)); state Future<ErrorOr<vector<std::pair<StorageServerInterface, EventMap>>>> storageServerFuture = errorOr(getStorageServersAndMetrics(cx, address_workers));
state Future<ErrorOr<vector<std::pair<TLogInterface, EventMap>>>> tLogFuture = errorOr(getTLogsAndMetrics(db, address_workers)); state Future<ErrorOr<vector<std::pair<TLogInterface, EventMap>>>> tLogFuture = errorOr(getTLogsAndMetrics(db, address_workers));
state Future<ErrorOr<vector<std::pair<MasterProxyInterface, EventMap>>>> proxyFuture = errorOr(getProxiesAndMetrics(cx, address_workers)); state Future<ErrorOr<vector<std::pair<MasterProxyInterface, EventMap>>>> proxyFuture = errorOr(getProxiesAndMetrics(db, address_workers));
state int minReplicasRemaining = -1; state int minReplicasRemaining = -1;
std::vector<Future<JsonBuilderObject>> futures2; std::vector<Future<JsonBuilderObject>> futures2;

View File

@ -393,10 +393,10 @@ struct StorageServerMetrics {
.detail("Load", rep.load.bytes); .detail("Load", rep.load.bytes);
} }
rep.free.bytes = sb.free; rep.available.bytes = sb.available;
rep.free.iosPerKSecond = 10e6; rep.available.iosPerKSecond = 10e6;
rep.free.bytesPerKSecond = 100e9; rep.available.bytesPerKSecond = 100e9;
rep.free.bytesReadPerKSecond = 100e9; rep.available.bytesReadPerKSecond = 100e9;
rep.capacity.bytes = sb.total; rep.capacity.bytes = sb.total;
rep.capacity.iosPerKSecond = 10e6; rep.capacity.iosPerKSecond = 10e6;

View File

@ -301,6 +301,7 @@ struct TLogData : NonCopyable {
std::map<UID, Reference<struct LogData>> id_data; std::map<UID, Reference<struct LogData>> id_data;
UID dbgid; UID dbgid;
UID workerID;
IKeyValueStore* persistentData; // Durable data on disk that were spilled. IKeyValueStore* persistentData; // Durable data on disk that were spilled.
IDiskQueue* rawPersistentQueue; // The physical queue the persistentQueue below stores its data. Ideally, log interface should work without directly accessing rawPersistentQueue IDiskQueue* rawPersistentQueue; // The physical queue the persistentQueue below stores its data. Ideally, log interface should work without directly accessing rawPersistentQueue
@ -343,8 +344,8 @@ struct TLogData : NonCopyable {
Reference<AsyncVar<bool>> degraded; Reference<AsyncVar<bool>> degraded;
std::vector<TagsAndMessage> tempTagMessages; std::vector<TagsAndMessage> tempTagMessages;
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder) TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()), : dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)), persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0), dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
@ -508,15 +509,16 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
bool execOpCommitInProgress; bool execOpCommitInProgress;
int txsTags; int txsTags;
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, TLogSpillType logSpillType, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()), explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, TLogSpillType logSpillType, std::vector<Tag> tags, std::string context)
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion), logSpillType(logSpillType), : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()), cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion), logSpillType(logSpillType),
minPoppedTagVersion(0), minPoppedTag(invalidTag), logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
minPoppedTagVersion(0), minPoppedTag(invalidTag),
// These are initialized differently on init() or recovery // These are initialized differently on init() or recovery
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0), recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false) logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
{ {
startRole(Role::TRANSACTION_LOG, interf.id(), UID()); startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, context);
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id); persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id); persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
@ -711,7 +713,7 @@ ACTOR Future<Void> updatePoppedLocation( TLogData* self, Reference<LogData> logD
// us to remove data that still is pointed to by SpilledData in the btree. // us to remove data that still is pointed to by SpilledData in the btree.
if (data->persistentPopped <= logData->persistentDataVersion) { if (data->persistentPopped <= logData->persistentDataVersion) {
// Recover the next needed location in the Disk Queue from the index. // Recover the next needed location in the Disk Queue from the index.
Standalone<VectorRef<KeyValueRef>> kvrefs = wait( Standalone<RangeResultRef> kvrefs = wait(
self->persistentData->readRange(KeyRangeRef( self->persistentData->readRange(KeyRangeRef(
persistTagMessageRefsKey(logData->logId, data->tag, data->persistentPopped), persistTagMessageRefsKey(logData->logId, data->tag, data->persistentPopped),
persistTagMessageRefsKey(logData->logId, data->tag, logData->persistentDataVersion + 1)), 1)); persistTagMessageRefsKey(logData->logId, data->tag, logData->persistentDataVersion + 1)), 1));
@ -1493,7 +1495,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
} }
if ( logData->shouldSpillByValue(req.tag) ) { if ( logData->shouldSpillByValue(req.tag) ) {
Standalone<VectorRef<KeyValueRef>> kvs = wait( Standalone<RangeResultRef> kvs = wait(
self->persistentData->readRange(KeyRangeRef( self->persistentData->readRange(KeyRangeRef(
persistTagMessagesKey(logData->logId, req.tag, req.begin), persistTagMessagesKey(logData->logId, req.tag, req.begin),
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES)); persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
@ -1512,7 +1514,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
} }
} else { } else {
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow. // FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
Standalone<VectorRef<KeyValueRef>> kvrefs = wait( Standalone<RangeResultRef> kvrefs = wait(
self->persistentData->readRange(KeyRangeRef( self->persistentData->readRange(KeyRangeRef(
persistTagMessageRefsKey(logData->logId, req.tag, req.begin), persistTagMessageRefsKey(logData->logId, req.tag, req.begin),
persistTagMessageRefsKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), persistTagMessageRefsKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)),
@ -1649,12 +1651,8 @@ ACTOR Future<Void> watchDegraded(TLogData* self) {
return Void(); return Void();
} }
//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
state int loopCount = 0;
while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {
wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskPriority::Low));
loopCount++;
}
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid); TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
TEST(true); //TLog degraded TEST(true); //TLog degraded
self->degraded->set(true); self->degraded->set(true);
@ -1897,7 +1895,7 @@ ACTOR Future<Void> rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC
if ( self->dbInfo->get().master.id() != lastMasterID) { if ( self->dbInfo->get().master.id() != lastMasterID) {
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface // The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
TLogRejoinRequest req(tli); TLogRejoinRequest req(tli);
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id()); TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id());
choose { choose {
when(TLogRejoinReply rep = when(TLogRejoinReply rep =
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) { wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
@ -2358,14 +2356,14 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
wait(storage->init()); wait(storage->init());
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key); state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
state Future<Optional<Value>> fRecoveryLocation = storage->readValue(persistRecoveryLocationKey); state Future<Optional<Value>> fRecoveryLocation = storage->readValue(persistRecoveryLocationKey);
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys); state Future<Standalone<RangeResultRef>> fVers = storage->readRange(persistCurrentVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys); state Future<Standalone<RangeResultRef>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys); state Future<Standalone<RangeResultRef>> fLocality = storage->readRange(persistLocalityKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys); state Future<Standalone<RangeResultRef>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fTxsTags = storage->readRange(persistTxsTagsKeys); state Future<Standalone<RangeResultRef>> fTxsTags = storage->readRange(persistTxsTagsKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys); state Future<Standalone<RangeResultRef>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fProtocolVersions = storage->readRange(persistProtocolVersionKeys); state Future<Standalone<RangeResultRef>> fProtocolVersions = storage->readRange(persistProtocolVersionKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fTLogSpillTypes = storage->readRange(persistTLogSpillTypeKeys); state Future<Standalone<RangeResultRef>> fTLogSpillTypes = storage->readRange(persistTLogSpillTypeKeys);
// FIXME: metadata in queue? // FIXME: metadata in queue?
@ -2384,7 +2382,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
} }
if (!fFormat.get().present()) { if (!fFormat.get().present()) {
Standalone<VectorRef<KeyValueRef>> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) ); Standalone<RangeResultRef> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
if (!v.size()) { if (!v.size()) {
TEST(true); // The DB is completely empty, so it was never initialized. Delete it. TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
throw worker_removed(); throw worker_removed();
@ -2451,7 +2449,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
TLogSpillType logSpillType = BinaryReader::fromStringRef<TLogSpillType>( fTLogSpillTypes.get()[idx].value, AssumeVersion(protocolVersion) ); TLogSpillType logSpillType = BinaryReader::fromStringRef<TLogSpillType>( fTLogSpillTypes.get()[idx].value, AssumeVersion(protocolVersion) );
//We do not need the remoteTag, because we will not be loading any additional data //We do not need the remoteTag, because we will not be loading any additional data
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, logSpillType, std::vector<Tag>()) ); logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, logSpillType, std::vector<Tag>(), "Restored") );
logData->locality = id_locality[id1]; logData->locality = id_locality[id1];
logData->stopped = true; logData->stopped = true;
self->id_data[id1] = logData; self->id_data[id1] = logData;
@ -2473,7 +2471,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) ); tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
loop { loop {
if(logData->removed.isReady()) break; if(logData->removed.isReady()) break;
Standalone<VectorRef<KeyValueRef>> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) ); Standalone<RangeResultRef> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
if (!data.size()) break; if (!data.size()) break;
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end ); ((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
@ -2657,7 +2655,8 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
stopAllTLogs(self, recruited.id()); stopAllTLogs(self, recruited.id());
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.spillType, req.allTags) ); bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.spillType, req.allTags, recovering ? "Recovered" : "Recruited") );
self->id_data[recruited.id()] = logData; self->id_data[recruited.id()] = logData;
logData->locality = req.locality; logData->locality = req.locality;
logData->recoveryCount = req.epoch; logData->recoveryCount = req.epoch;
@ -2674,7 +2673,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
throw logData->removed.getError(); throw logData->removed.getError();
} }
if (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned) { if (recovering) {
logData->unrecoveredBefore = req.startVersion; logData->unrecoveredBefore = req.startVersion;
logData->recoveredAt = req.recoverAt; logData->recoveredAt = req.recoverAt;
logData->knownCommittedVersion = req.startVersion - 1; logData->knownCommittedVersion = req.startVersion - 1;
@ -2783,13 +2782,11 @@ ACTOR Future<Void> startSpillingInTenSeconds(TLogData* self, UID tlogId, Referen
} }
// New tLog (if !recoverFrom.size()) or restore from network // New tLog (if !recoverFrom.size()) or restore from network
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog ) { ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog ) {
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder ); state TLogData self( tlogId, workerID, persistentData, persistentQueue, db, degraded, folder );
state Future<Void> error = actorCollection( self.sharedActors.getFuture() ); state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
TraceEvent("SharedTlog", tlogId); TraceEvent("SharedTlog", tlogId);
// FIXME: Pass the worker id instead of stubbing it
startRole(Role::SHARED_TRANSACTION_LOG, tlogId, UID());
try { try {
if(restoreFromDisk) { if(restoreFromDisk) {
wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) ); wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) );
@ -2833,7 +2830,6 @@ ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ
} catch (Error& e) { } catch (Error& e) {
self.terminated.send(Void()); self.terminated.send(Void());
TraceEvent("TLogError", tlogId).error(e, true); TraceEvent("TLogError", tlogId).error(e, true);
endRole(Role::SHARED_TRANSACTION_LOG, tlogId, "Error", true);
if(recovered.canBeSet()) recovered.send(Void()); if(recovered.canBeSet()) recovered.send(Void());
while(!tlogRequests.isEmpty()) { while(!tlogRequests.isEmpty()) {

View File

@ -4859,22 +4859,26 @@ public:
m_tree->set(keyValue); m_tree->set(keyValue);
} }
Future< Standalone< VectorRef< KeyValueRef > > > readRange(KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30) { Future< Standalone< RangeResultRef > > readRange(KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30) {
debug_printf("READRANGE %s\n", printable(keys).c_str()); debug_printf("READRANGE %s\n", printable(keys).c_str());
return catchError(readRange_impl(this, keys, rowLimit, byteLimit)); return catchError(readRange_impl(this, keys, rowLimit, byteLimit));
} }
ACTOR static Future< Standalone< VectorRef< KeyValueRef > > > readRange_impl(KeyValueStoreRedwoodUnversioned *self, KeyRange keys, int rowLimit, int byteLimit) { ACTOR static Future< Standalone< RangeResultRef > > readRange_impl(KeyValueStoreRedwoodUnversioned *self, KeyRange keys, int rowLimit, int byteLimit) {
self->m_tree->counts.getRanges++; self->m_tree->counts.getRanges++;
state Standalone<VectorRef<KeyValueRef>> result; state Standalone<RangeResultRef> result;
state int accumulatedBytes = 0; state int accumulatedBytes = 0;
ASSERT( byteLimit > 0 ); ASSERT( byteLimit > 0 );
if(rowLimit == 0) {
return result;
}
state Reference<IStoreCursor> cur = self->m_tree->readAtVersion(self->m_tree->getLastCommittedVersion()); state Reference<IStoreCursor> cur = self->m_tree->readAtVersion(self->m_tree->getLastCommittedVersion());
// Prefetch is currently only done in the forward direction // Prefetch is currently only done in the forward direction
state int prefetchBytes = rowLimit > 1 ? byteLimit : 0; state int prefetchBytes = rowLimit > 1 ? byteLimit : 0;
if(rowLimit >= 0) { if(rowLimit > 0) {
wait(cur->findFirstEqualOrGreater(keys.begin, prefetchBytes)); wait(cur->findFirstEqualOrGreater(keys.begin, prefetchBytes));
while(cur->isValid() && cur->getKey() < keys.end) { while(cur->isValid() && cur->getKey() < keys.end) {
KeyValueRef kv(KeyRef(result.arena(), cur->getKey()), ValueRef(result.arena(), cur->getValue())); KeyValueRef kv(KeyRef(result.arena(), cur->getKey()), ValueRef(result.arena(), cur->getValue()));
@ -4900,6 +4904,12 @@ public:
wait(cur->prev()); wait(cur->prev());
} }
} }
result.more = rowLimit == 0 || accumulatedBytes >= byteLimit;
if(result.more) {
ASSERT(result.size() > 0);
result.readThrough = result[result.size()-1].key;
}
return result; return result;
} }

View File

@ -454,7 +454,7 @@ private:
} }
}; };
void startRole(const Role &role, UID roleId, UID workerId, std::map<std::string, std::string> details = std::map<std::string, std::string>(), std::string origination = "Recruited"); void startRole(const Role &role, UID roleId, UID workerId, const std::map<std::string, std::string> &details = std::map<std::string, std::string>(), const std::string &origination = "Recruited");
void endRole(const Role &role, UID id, std::string reason, bool ok = true, Error e = Error()); void endRole(const Role &role, UID id, std::string reason, bool ok = true, Error e = Error());
struct ServerDBInfo; struct ServerDBInfo;
@ -491,8 +491,8 @@ ACTOR Future<Void> masterProxyServer(MasterProxyInterface proxy, InitializeMaste
Reference<AsyncVar<ServerDBInfo>> db, std::string whitelistBinPaths); Reference<AsyncVar<ServerDBInfo>> db, std::string whitelistBinPaths);
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID,
Promise<Void> oldLog, Promise<Void> recovered, std::string folder, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog); Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog);
ACTOR Future<Void> monitorServerDBInfo(Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface, ACTOR Future<Void> monitorServerDBInfo(Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
@ -512,20 +512,20 @@ void updateCpuProfiler(ProfilerRequest req);
namespace oldTLog_4_6 { namespace oldTLog_4_6 {
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId); Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId, UID workerID);
} }
namespace oldTLog_6_0 { namespace oldTLog_6_0 {
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID,
Promise<Void> oldLog, Promise<Void> recovered, std::string folder, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog); Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog);
} }
namespace oldTLog_6_2 { namespace oldTLog_6_2 {
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue, ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID,
Promise<Void> oldLog, Promise<Void> recovered, std::string folder, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog); Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog);
} }

View File

@ -51,11 +51,11 @@
#include "fdbserver/workloads/workloads.actor.h" #include "fdbserver/workloads/workloads.actor.h"
#include <time.h> #include <time.h>
#include "fdbserver/Status.h" #include "fdbserver/Status.h"
#include "fdbrpc/TLSConnection.h"
#include "fdbrpc/Net2FileSystem.h" #include "fdbrpc/Net2FileSystem.h"
#include "fdbrpc/Platform.h" #include "fdbrpc/Platform.h"
#include "fdbrpc/AsyncFileCached.actor.h" #include "fdbrpc/AsyncFileCached.actor.h"
#include "fdbserver/CoroFlow.h" #include "fdbserver/CoroFlow.h"
#include "flow/TLSPolicy.h"
#if defined(CMAKE_BUILD) || !defined(WIN32) #if defined(CMAKE_BUILD) || !defined(WIN32)
#include "versions.h" #include "versions.h"
#endif #endif
@ -942,8 +942,8 @@ struct CLIOptions {
int minTesterCount = 1; int minTesterCount = 1;
bool testOnServers = false; bool testOnServers = false;
Reference<TLSOptions> tlsOptions = Reference<TLSOptions>(new TLSOptions); Reference<TLSPolicy> tlsPolicy = Reference<TLSPolicy>(new TLSPolicy(TLSPolicy::Is::SERVER));
std::string tlsCertPath, tlsKeyPath, tlsCAPath, tlsPassword; TLSParams tlsParams;
std::vector<std::string> tlsVerifyPeers; std::vector<std::string> tlsVerifyPeers;
double fileIoTimeout = 0.0; double fileIoTimeout = 0.0;
bool fileIoWarnOnly = false; bool fileIoWarnOnly = false;
@ -1371,22 +1371,22 @@ private:
break; break;
#ifndef TLS_DISABLED #ifndef TLS_DISABLED
case TLSOptions::OPT_TLS_PLUGIN: case TLSParams::OPT_TLS_PLUGIN:
args.OptionArg(); args.OptionArg();
break; break;
case TLSOptions::OPT_TLS_CERTIFICATES: case TLSParams::OPT_TLS_CERTIFICATES:
tlsCertPath = args.OptionArg(); tlsParams.tlsCertPath = args.OptionArg();
break; break;
case TLSOptions::OPT_TLS_PASSWORD: case TLSParams::OPT_TLS_PASSWORD:
tlsPassword = args.OptionArg(); tlsParams.tlsPassword = args.OptionArg();
break; break;
case TLSOptions::OPT_TLS_CA_FILE: case TLSParams::OPT_TLS_CA_FILE:
tlsCAPath = args.OptionArg(); tlsParams.tlsCAPath = args.OptionArg();
break; break;
case TLSOptions::OPT_TLS_KEY: case TLSParams::OPT_TLS_KEY:
tlsKeyPath = args.OptionArg(); tlsParams.tlsKeyPath = args.OptionArg();
break; break;
case TLSOptions::OPT_TLS_VERIFY_PEERS: case TLSParams::OPT_TLS_VERIFY_PEERS:
tlsVerifyPeers.push_back(args.OptionArg()); tlsVerifyPeers.push_back(args.OptionArg());
break; break;
#endif #endif
@ -1626,7 +1626,12 @@ int main(int argc, char* argv[]) {
startNewSimulator(); startNewSimulator();
openTraceFile(NetworkAddress(), opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup); openTraceFile(NetworkAddress(), opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", opts.logGroup);
} else { } else {
g_network = newNet2(opts.useThreadPool, true); #ifndef TLS_DISABLED
if ( opts.tlsVerifyPeers.size() ) {
opts.tlsPolicy->set_verify_peers( opts.tlsVerifyPeers );
}
#endif
g_network = newNet2(opts.useThreadPool, true, opts.tlsPolicy, opts.tlsParams);
FlowTransport::createInstance(false, 1); FlowTransport::createInstance(false, 1);
const bool expectsPublicAddress = (role == FDBD || role == NetworkTestServer || role == Restore); const bool expectsPublicAddress = (role == FDBD || role == NetworkTestServer || role == Restore);
@ -1641,18 +1646,6 @@ int main(int argc, char* argv[]) {
openTraceFile(opts.publicAddresses.address, opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace", openTraceFile(opts.publicAddresses.address, opts.rollsize, opts.maxLogsSize, opts.logFolder, "trace",
opts.logGroup); opts.logGroup);
#ifndef TLS_DISABLED
if (opts.tlsCertPath.size()) opts.tlsOptions->set_cert_file(opts.tlsCertPath);
if (opts.tlsCAPath.size()) opts.tlsOptions->set_ca_file(opts.tlsCAPath);
if (opts.tlsKeyPath.size()) {
if (opts.tlsPassword.size()) opts.tlsOptions->set_key_password(opts.tlsPassword);
opts.tlsOptions->set_key_file(opts.tlsKeyPath);
}
if (opts.tlsVerifyPeers.size()) opts.tlsOptions->set_verify_peers(opts.tlsVerifyPeers);
opts.tlsOptions->register_network();
#endif
if (expectsPublicAddress) { if (expectsPublicAddress) {
for (int ii = 0; ii < (opts.publicAddresses.secondaryAddress.present() ? 2 : 1); ++ii) { for (int ii = 0; ii < (opts.publicAddresses.secondaryAddress.present() ? 2 : 1); ++ii) {
const NetworkAddress& publicAddress = const NetworkAddress& publicAddress =
@ -1853,8 +1846,7 @@ int main(int argc, char* argv[]) {
} }
} }
} }
setupAndRun(dataFolder, opts.testFile, opts.restarting, (isRestoring >= 1), opts.whitelistBinPaths, setupAndRun(dataFolder, opts.testFile, opts.restarting, (isRestoring >= 1), opts.whitelistBinPaths);
opts.tlsOptions);
g_simulator.run(); g_simulator.run();
} else if (role == FDBD) { } else if (role == FDBD) {
// Call fast restore for the class FastRestoreClass. This is a short-cut to run fast restore in circus // Call fast restore for the class FastRestoreClass. This is a short-cut to run fast restore in circus
@ -2070,6 +2062,11 @@ int main(int argc, char* argv[]) {
TraceEvent(SevError, "MainError").error(e); TraceEvent(SevError, "MainError").error(e);
//printf("\n%d tests passed; %d tests failed\n", passCount, failCount); //printf("\n%d tests passed; %d tests failed\n", passCount, failCount);
flushAndExit(FDB_EXIT_MAIN_ERROR); flushAndExit(FDB_EXIT_MAIN_ERROR);
} catch (boost::system::system_error& e) {
fprintf(stderr, "boost::system::system_error: %s (%d)", e.what(), e.code().value());
TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what());
//printf("\n%d tests passed; %d tests failed\n", passCount, failCount);
flushAndExit(FDB_EXIT_MAIN_EXCEPTION);
} catch (std::exception& e) { } catch (std::exception& e) {
fprintf(stderr, "std::exception: %s\n", e.what()); fprintf(stderr, "std::exception: %s\n", e.what());
TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what()); TraceEvent(SevError, "MainError").error(unknown_error()).detail("RootException", e.what());

View File

@ -672,8 +672,8 @@ ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Refer
TraceEvent("MasterRecovering", self->dbgid).detail("LastEpochEnd", self->lastEpochEnd).detail("RecoveryTransactionVersion", self->recoveryTransactionVersion); TraceEvent("MasterRecovering", self->dbgid).detail("LastEpochEnd", self->lastEpochEnd).detail("RecoveryTransactionVersion", self->recoveryTransactionVersion);
Standalone<VectorRef<KeyValueRef>> rawConf = wait( self->txnStateStore->readRange( configKeys ) ); Standalone<RangeResultRef> rawConf = wait( self->txnStateStore->readRange( configKeys ) );
self->configuration.fromKeyValues( rawConf ); self->configuration.fromKeyValues( rawConf.castTo<VectorRef<KeyValueRef>>() );
self->originalConfiguration = self->configuration; self->originalConfiguration = self->configuration;
self->hasConfiguration = true; self->hasConfiguration = true;
@ -683,13 +683,13 @@ ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Refer
.detail("Conf", self->configuration.toString()) .detail("Conf", self->configuration.toString())
.trackLatest("RecoveredConfig"); .trackLatest("RecoveredConfig");
Standalone<VectorRef<KeyValueRef>> rawLocalities = wait( self->txnStateStore->readRange( tagLocalityListKeys ) ); Standalone<RangeResultRef> rawLocalities = wait( self->txnStateStore->readRange( tagLocalityListKeys ) );
self->dcId_locality.clear(); self->dcId_locality.clear();
for(auto& kv : rawLocalities) { for(auto& kv : rawLocalities) {
self->dcId_locality[decodeTagLocalityListKey(kv.key)] = decodeTagLocalityListValue(kv.value); self->dcId_locality[decodeTagLocalityListKey(kv.key)] = decodeTagLocalityListValue(kv.value);
} }
Standalone<VectorRef<KeyValueRef>> rawTags = wait( self->txnStateStore->readRange( serverTagKeys ) ); Standalone<RangeResultRef> rawTags = wait( self->txnStateStore->readRange( serverTagKeys ) );
self->allTags.clear(); self->allTags.clear();
if(self->lastEpochEnd > 0) { if(self->lastEpochEnd > 0) {
self->allTags.push_back(cacheTag); self->allTags.push_back(cacheTag);
@ -709,7 +709,7 @@ ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Refer
} }
} }
Standalone<VectorRef<KeyValueRef>> rawHistoryTags = wait( self->txnStateStore->readRange( serverTagHistoryKeys ) ); Standalone<RangeResultRef> rawHistoryTags = wait( self->txnStateStore->readRange( serverTagHistoryKeys ) );
for(auto& kv : rawHistoryTags) { for(auto& kv : rawHistoryTags) {
self->allTags.push_back(decodeServerTagValue( kv.value )); self->allTags.push_back(decodeServerTagValue( kv.value ));
} }
@ -732,13 +732,13 @@ ACTOR Future<Void> sendInitialCommitToResolvers( Reference<MasterData> self ) {
state Sequence txnSequence = 0; state Sequence txnSequence = 0;
ASSERT(self->recoveryTransactionVersion); ASSERT(self->recoveryTransactionVersion);
state Standalone<VectorRef<KeyValueRef>> data = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get(); state Standalone<RangeResultRef> data = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get();
state vector<Future<Void>> txnReplies; state vector<Future<Void>> txnReplies;
state int64_t dataOutstanding = 0; state int64_t dataOutstanding = 0;
loop { loop {
if(!data.size()) break; if(!data.size()) break;
((KeyRangeRef&)txnKeys) = KeyRangeRef( keyAfter(data.back().key, txnKeys.arena()), txnKeys.end ); ((KeyRangeRef&)txnKeys) = KeyRangeRef( keyAfter(data.back().key, txnKeys.arena()), txnKeys.end );
Standalone<VectorRef<KeyValueRef>> nextData = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get(); Standalone<RangeResultRef> nextData = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get();
for(auto& r : self->proxies) { for(auto& r : self->proxies) {
TxnStateRequest req; TxnStateRequest req;

View File

@ -168,7 +168,7 @@ struct StorageServerDisk {
Future<Key> readNextKeyInclusive( KeyRef key ) { return readFirstKey(storage, KeyRangeRef(key, allKeys.end)); } Future<Key> readNextKeyInclusive( KeyRef key ) { return readFirstKey(storage, KeyRangeRef(key, allKeys.end)); }
Future<Optional<Value>> readValue( KeyRef key, Optional<UID> debugID = Optional<UID>() ) { return storage->readValue(key, debugID); } Future<Optional<Value>> readValue( KeyRef key, Optional<UID> debugID = Optional<UID>() ) { return storage->readValue(key, debugID); }
Future<Optional<Value>> readValuePrefix( KeyRef key, int maxLength, Optional<UID> debugID = Optional<UID>() ) { return storage->readValuePrefix(key, maxLength, debugID); } Future<Optional<Value>> readValuePrefix( KeyRef key, int maxLength, Optional<UID> debugID = Optional<UID>() ) { return storage->readValuePrefix(key, maxLength, debugID); }
Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) { return storage->readRange(keys, rowLimit, byteLimit); } Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) { return storage->readRange(keys, rowLimit, byteLimit); }
KeyValueStoreType getKeyValueStoreType() { return storage->getType(); } KeyValueStoreType getKeyValueStoreType() { return storage->getType(); }
StorageBytes getStorageBytes() { return storage->getStorageBytes(); } StorageBytes getStorageBytes() { return storage->getStorageBytes(); }
@ -181,7 +181,7 @@ private:
void writeMutations( MutationListRef mutations, Version debugVersion, const char* debugContext ); void writeMutations( MutationListRef mutations, Version debugVersion, const char* debugContext );
ACTOR static Future<Key> readFirstKey( IKeyValueStore* storage, KeyRangeRef range ) { ACTOR static Future<Key> readFirstKey( IKeyValueStore* storage, KeyRangeRef range ) {
Standalone<VectorRef<KeyValueRef>> r = wait( storage->readRange( range, 1 ) ); Standalone<RangeResultRef> r = wait( storage->readRange( range, 1 ) );
if (r.size()) return r[0].key; if (r.size()) return r[0].key;
else return range.end; else return range.end;
} }
@ -1052,17 +1052,19 @@ void merge( Arena& arena, VectorRef<KeyValueRef, VecSerStrategy::String>& output
// Combines data from base (at an older version) with sets from newer versions in [start, end) and appends the first (up to) |limit| rows to output // Combines data from base (at an older version) with sets from newer versions in [start, end) and appends the first (up to) |limit| rows to output
// If limit<0, base and output are in descending order, and start->key()>end->key(), but start is still inclusive and end is exclusive // If limit<0, base and output are in descending order, and start->key()>end->key(), but start is still inclusive and end is exclusive
{ {
if (limit==0) return; ASSERT(limit != 0);
int originalLimit = abs(limit) + output.size();
bool forward = limit>0; bool forward = limit>0;
if (!forward) limit = -limit; if (!forward) limit = -limit;
int adjustedLimit = limit + output.size();
int accumulatedBytes = 0; int accumulatedBytes = 0;
KeyValueRef const* baseStart = base.begin(); KeyValueRef const* baseStart = base.begin();
KeyValueRef const* baseEnd = base.end(); KeyValueRef const* baseEnd = base.end();
while (baseStart!=baseEnd && start!=end && --limit>=0 && accumulatedBytes < limitBytes) { while (baseStart!=baseEnd && start!=end && output.size() < adjustedLimit && accumulatedBytes < limitBytes) {
if (forward ? baseStart->key < start.key() : baseStart->key > start.key()) if (forward ? baseStart->key < start.key() : baseStart->key > start.key()) {
output.push_back_deep( arena, *baseStart++ ); output.push_back_deep( arena, *baseStart++ );
}
else { else {
output.push_back_deep( arena, KeyValueRef(start.key(), start->getValue()) ); output.push_back_deep( arena, KeyValueRef(start.key(), start->getValue()) );
if (baseStart->key == start.key()) ++baseStart; if (baseStart->key == start.key()) ++baseStart;
@ -1070,18 +1072,17 @@ void merge( Arena& arena, VectorRef<KeyValueRef, VecSerStrategy::String>& output
} }
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize(); accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
} }
while (baseStart!=baseEnd && --limit>=0 && accumulatedBytes < limitBytes) { while (baseStart!=baseEnd && output.size() < adjustedLimit && accumulatedBytes < limitBytes) {
output.push_back_deep( arena, *baseStart++ ); output.push_back_deep( arena, *baseStart++ );
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize(); accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
} }
if( !stopAtEndOfBase ) { if( !stopAtEndOfBase ) {
while (start!=end && --limit>=0 && accumulatedBytes < limitBytes) { while (start!=end && output.size() < adjustedLimit && accumulatedBytes < limitBytes) {
output.push_back_deep( arena, KeyValueRef(start.key(), start->getValue()) ); output.push_back_deep( arena, KeyValueRef(start.key(), start->getValue()) );
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize(); accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
if (forward) ++start; else --start; if (forward) ++start; else --start;
} }
} }
ASSERT( output.size() <= originalLimit );
} }
// If limit>=0, it returns the first rows in the range (sorted ascending), otherwise the last rows (sorted descending). // If limit>=0, it returns the first rows in the range (sorted ascending), otherwise the last rows (sorted descending).
@ -1095,10 +1096,6 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
state KeyRef readEnd; state KeyRef readEnd;
state Key readBeginTemp; state Key readBeginTemp;
state int vCount; state int vCount;
//state UID rrid = deterministicRandom()->randomUniqueID();
//state int originalLimit = limit;
//state int originalLimitBytes = *pLimitBytes;
//state bool track = rrid.first() == 0x1bc134c2f752187cLL;
// Check if the desired key-range intersects the cached key-ranges // Check if the desired key-range intersects the cached key-ranges
// TODO Find a more efficient way to do it // TODO Find a more efficient way to do it
@ -1106,9 +1103,7 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
auto cached = data->cachedRangeMap.intersectingRanges(range); auto cached = data->cachedRangeMap.intersectingRanges(range);
result.cached = (cached.begin() != cached.end()); result.cached = (cached.begin() != cached.end());
// FIXME: Review pLimitBytes behavior
// if (limit >= 0) we are reading forward, else backward // if (limit >= 0) we are reading forward, else backward
if (limit >= 0) { if (limit >= 0) {
// We might care about a clear beginning before start that // We might care about a clear beginning before start that
// runs into range // runs into range
@ -1120,20 +1115,7 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
vStart = view.lower_bound(readBegin); vStart = view.lower_bound(readBegin);
/*if (track) {
printf("readRange(%llx, @%lld, '%s'-'%s')\n", data->thisServerID.first(), version, printable(range.begin).c_str(), printable(range.end).c_str());
printf("mvcc:\n");
vEnd = view.upper_bound(range.end);
for(auto r=vStart; r != vEnd; ++r) {
if (r->isClearTo())
printf(" '%s'-'%s' cleared\n", printable(r.key()).c_str(), printable(r->getEndKey()).c_str());
else
printf(" '%s' := '%s'\n", printable(r.key()).c_str(), printable(r->getValue()).c_str());
}
}*/
while (limit>0 && *pLimitBytes>0 && readBegin < range.end) { while (limit>0 && *pLimitBytes>0 && readBegin < range.end) {
// ASSERT( vStart == view.lower_bound(readBegin) );
ASSERT( !vStart || vStart.key() >= readBegin ); ASSERT( !vStart || vStart.key() >= readBegin );
if (vStart) { auto b = vStart; --b; ASSERT( !b || b.key() < readBegin ); } if (vStart) { auto b = vStart; --b; ASSERT( !b || b.key() < readBegin ); }
ASSERT( data->storageVersion() <= version ); ASSERT( data->storageVersion() <= version );
@ -1150,94 +1132,59 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
// Read the data on disk up to vEnd (or the end of the range) // Read the data on disk up to vEnd (or the end of the range)
readEnd = vEnd ? std::min( vEnd.key(), range.end ) : range.end; readEnd = vEnd ? std::min( vEnd.key(), range.end ) : range.end;
Standalone<VectorRef<KeyValueRef>> atStorageVersion = wait( Standalone<RangeResultRef> atStorageVersion = wait(
data->storage.readRange( KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes ) ); data->storage.readRange( KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes ) );
/*if (track) {
printf("read [%s,%s): %d rows\n", printable(readBegin).c_str(), printable(readEnd).c_str(), atStorageVersion.size());
for(auto r=atStorageVersion.begin(); r != atStorageVersion.end(); ++r)
printf(" '%s' := '%s'\n", printable(r->key).c_str(), printable(r->value).c_str());
}*/
ASSERT( atStorageVersion.size() <= limit ); ASSERT( atStorageVersion.size() <= limit );
if (data->storageVersion() > version) throw transaction_too_old(); if (data->storageVersion() > version) throw transaction_too_old();
bool more = atStorageVersion.size()!=0; // merge the sets in [vStart,vEnd) with the sets on disk, stopping at the last key from disk if we were limited
// merge the sets in [vStart,vEnd) with the sets on disk, stopping at the last key from disk if there is 'more'
int prevSize = result.data.size(); int prevSize = result.data.size();
merge( result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, more, *pLimitBytes ); merge( result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, atStorageVersion.more, *pLimitBytes );
limit -= result.data.size() - prevSize; limit -= result.data.size() - prevSize;
for (auto i = result.data.begin() + prevSize; i != result.data.end(); i++) { for (auto i = result.data.begin() + prevSize; i != result.data.end(); i++) {
*pLimitBytes -= sizeof(KeyValueRef) + i->expectedSize(); *pLimitBytes -= sizeof(KeyValueRef) + i->expectedSize();
} }
// Setup for the next iteration if (limit <=0 || *pLimitBytes <= 0) {
if (more) { // if there might be more data, begin reading right after what we already found to find out break;
//if (track) printf("more\n"); }
if (!(limit<=0 || *pLimitBytes<=0 || result.data.end()[-1].key == atStorageVersion.end()[-1].key))
TraceEvent(SevError, "ReadRangeIssue", data->thisServerID).detail("ReadBegin", readBegin).detail("ReadEnd", readEnd)
.detail("VStart", vStart ? vStart.key() : LiteralStringRef("nil")).detail("VEnd", vEnd ? vEnd.key() : LiteralStringRef("nil"))
.detail("AtStorageVersionBack", atStorageVersion.end()[-1].key).detail("ResultBack", result.data.end()[-1].key)
.detail("Limit", limit).detail("LimitBytes", *pLimitBytes).detail("ResultSize", result.data.size()).detail("PrevSize", prevSize);
readBegin = readBeginTemp = keyAfter( result.data.end()[-1].key );
ASSERT( limit<=0 || *pLimitBytes<=0 || result.data.end()[-1].key == atStorageVersion.end()[-1].key );
} else if (vStart && vStart->isClearTo()){ // if vStart is a clear, skip it.
//if (track) printf("skip clear\n");
readBegin = vStart->getEndKey(); // next disk read should start at the end of the clear
++vStart;
} else { // Otherwise, continue at readEnd
//if (track) printf("continue\n");
readBegin = readEnd;
}
}
// all but the last item are less than *pLimitBytes
ASSERT( result.data.size() == 0 || *pLimitBytes + result.data.end()[-1].expectedSize() + sizeof(KeyValueRef) > 0 );
/*if (*pLimitBytes <= 0)
TraceEvent(SevWarn, "ReadRangeLimitExceeded")
.detail("Version", version)
.detail("Begin", range.begin )
.detail("End", range.end )
.detail("LimitReamin", limit)
.detail("LimitBytesRemain", *pLimitBytes); */
/*GetKeyValuesReply correct = wait( readRangeOld(data, version, range, originalLimit, originalLimitBytes) ); // If we hit our limits reading from disk but then combining with MVCC gave us back more room
bool prefix_equal = true; if (atStorageVersion.more) {
int totalsize = 0; ASSERT(result.data.end()[-1].key == atStorageVersion.end()[-1].key);
int first_difference = -1; readBegin = readBeginTemp = keyAfter(result.data.end()[-1].key);
for(int i=0; i<result.data.size() && i<correct.data.size(); i++) { } else if (vEnd && vEnd->isClearTo()) {
if (result.data[i] != correct.data[i]) { ASSERT(vStart == vEnd); // vStart will have been advanced by merge()
first_difference = i; ASSERT(vEnd->getEndKey() > readBegin);
prefix_equal = false; readBegin = vEnd->getEndKey();
++vStart;
} else {
ASSERT(readEnd == range.end);
break; break;
} }
totalsize += result.data[i].expectedSize() + sizeof(KeyValueRef);
} }
// for the following check
result.more = limit == 0 || *pLimitBytes<=0; // FIXME: Does this have to be exact?
result.version = version;
if ( !(totalsize>originalLimitBytes ? prefix_equal : result.data==correct.data) || correct.more != result.more ) {
TraceEvent(SevError, "IncorrectResult", rrid).detail("Server", data->thisServerID).detail("CorrectRows", correct.data.size())
.detail("FirstDifference", first_difference).detail("OriginalLimit", originalLimit)
.detail("ResultRows", result.data.size()).detail("Result0", result.data[0].key).detail("Correct0", correct.data[0].key)
.detail("ResultN", result.data.size() ? result.data[std::min(correct.data.size(),result.data.size())-1].key : "nil")
.detail("CorrectN", correct.data.size() ? correct.data[std::min(correct.data.size(),result.data.size())-1].key : "nil");
}*/
} else { } else {
// Reverse read - abandon hope alle ye who enter here vStart = view.lastLess(range.end);
readEnd = range.end;
vStart = view.lastLess(readEnd);
// A clear might extend all the way to range.end // A clear might extend all the way to range.end
if (vStart && vStart->isClearTo() && vStart->getEndKey() >= readEnd) { if (vStart && vStart->isClearTo() && vStart->getEndKey() >= range.end) {
readEnd = vStart.key(); readEnd = vStart.key();
--vStart; --vStart;
} else {
readEnd = range.end;
} }
while (limit < 0 && *pLimitBytes > 0 && readEnd > range.begin) { while (limit < 0 && *pLimitBytes > 0 && readEnd > range.begin) {
ASSERT(!vStart || vStart.key() < readEnd);
if (vStart) {
auto b = vStart;
++b;
ASSERT(!b || b.key() >= readEnd);
}
ASSERT(data->storageVersion() <= version);
vEnd = vStart; vEnd = vStart;
vCount = 0; vCount = 0;
int vSize=0; int vSize=0;
@ -1247,31 +1194,43 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
--vEnd; --vEnd;
} }
readBegin = range.begin; readBegin = vEnd ? std::max(vEnd->isClearTo() ? vEnd->getEndKey() : vEnd.key(), range.begin) : range.begin;
if (vEnd) Standalone<RangeResultRef> atStorageVersion =
readBegin = std::max( readBegin, vEnd->isClearTo() ? vEnd->getEndKey() : vEnd.key() ); wait(data->storage.readRange(KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes));
Standalone<VectorRef<KeyValueRef>> atStorageVersion = wait( data->storage.readRange( KeyRangeRef(readBegin, readEnd), limit ) ); ASSERT(atStorageVersion.size() <= -limit);
if (data->storageVersion() > version) throw transaction_too_old(); if (data->storageVersion() > version) throw transaction_too_old();
int prevSize = result.data.size(); int prevSize = result.data.size();
merge( result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, false, *pLimitBytes ); merge(result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, atStorageVersion.more, *pLimitBytes);
limit += result.data.size() - prevSize; limit += result.data.size() - prevSize;
for (auto i = result.data.begin() + prevSize; i != result.data.end(); i++) { for (auto i = result.data.begin() + prevSize; i != result.data.end(); i++) {
*pLimitBytes -= sizeof(KeyValueRef) + i->expectedSize(); *pLimitBytes -= sizeof(KeyValueRef) + i->expectedSize();
} }
vStart = vEnd; if (limit >=0 || *pLimitBytes <= 0) {
readEnd = readBegin; break;
}
if (vStart && vStart->isClearTo()) { if (atStorageVersion.more) {
ASSERT( vStart.key() < readEnd ); ASSERT(result.data.end()[-1].key == atStorageVersion.end()[-1].key);
readEnd = vStart.key(); readEnd = result.data.end()[-1].key;
} else if (vEnd && vEnd->isClearTo()) {
ASSERT(vStart == vEnd);
ASSERT(vEnd.key() < readEnd)
readEnd = vEnd.key();
--vStart; --vStart;
} else {
ASSERT(readBegin == range.begin);
break;
} }
} }
} }
// all but the last item are less than *pLimitBytes
ASSERT(result.data.size() == 0 || *pLimitBytes + result.data.end()[-1].expectedSize() + sizeof(KeyValueRef) > 0);
result.more = limit == 0 || *pLimitBytes<=0; // FIXME: Does this have to be exact? result.more = limit == 0 || *pLimitBytes<=0; // FIXME: Does this have to be exact?
result.version = version; result.version = version;
return result; return result;
@ -3119,8 +3078,8 @@ ACTOR Future<Void> applyByteSampleResult( StorageServer* data, IKeyValueStore* s
state int totalKeys = 0; state int totalKeys = 0;
state int totalBytes = 0; state int totalBytes = 0;
loop { loop {
Standalone<VectorRef<KeyValueRef>> bs = wait( storage->readRange( KeyRangeRef(begin, end), SERVER_KNOBS->STORAGE_LIMIT_BYTES, SERVER_KNOBS->STORAGE_LIMIT_BYTES ) ); Standalone<RangeResultRef> bs = wait( storage->readRange( KeyRangeRef(begin, end), SERVER_KNOBS->STORAGE_LIMIT_BYTES, SERVER_KNOBS->STORAGE_LIMIT_BYTES ) );
if(results) results->push_back(bs); if(results) results->push_back(bs.castTo<VectorRef<KeyValueRef>>());
int rangeSize = bs.expectedSize(); int rangeSize = bs.expectedSize();
totalFetches++; totalFetches++;
totalKeys += bs.size(); totalKeys += bs.size();
@ -3201,8 +3160,8 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
state Future<Optional<Value>> fVersion = storage->readValue(persistVersion); state Future<Optional<Value>> fVersion = storage->readValue(persistVersion);
state Future<Optional<Value>> fLogProtocol = storage->readValue(persistLogProtocol); state Future<Optional<Value>> fLogProtocol = storage->readValue(persistLogProtocol);
state Future<Optional<Value>> fPrimaryLocality = storage->readValue(persistPrimaryLocality); state Future<Optional<Value>> fPrimaryLocality = storage->readValue(persistPrimaryLocality);
state Future<Standalone<VectorRef<KeyValueRef>>> fShardAssigned = storage->readRange(persistShardAssignedKeys); state Future<Standalone<RangeResultRef>> fShardAssigned = storage->readRange(persistShardAssignedKeys);
state Future<Standalone<VectorRef<KeyValueRef>>> fShardAvailable = storage->readRange(persistShardAvailableKeys); state Future<Standalone<RangeResultRef>> fShardAvailable = storage->readRange(persistShardAvailableKeys);
state Promise<Void> byteSampleSampleRecovered; state Promise<Void> byteSampleSampleRecovered;
state Promise<Void> startByteSampleRestore; state Promise<Void> startByteSampleRestore;
@ -3239,7 +3198,7 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
debug_checkRestoredVersion( data->thisServerID, version, "StorageServer" ); debug_checkRestoredVersion( data->thisServerID, version, "StorageServer" );
data->setInitialVersion( version ); data->setInitialVersion( version );
state Standalone<VectorRef<KeyValueRef>> available = fShardAvailable.get(); state Standalone<RangeResultRef> available = fShardAvailable.get();
state int availableLoc; state int availableLoc;
for(availableLoc=0; availableLoc<available.size(); availableLoc++) { for(availableLoc=0; availableLoc<available.size(); availableLoc++) {
KeyRangeRef keys( KeyRangeRef keys(
@ -3253,7 +3212,7 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
wait(yield()); wait(yield());
} }
state Standalone<VectorRef<KeyValueRef>> assigned = fShardAssigned.get(); state Standalone<RangeResultRef> assigned = fShardAssigned.get();
state int assignedLoc; state int assignedLoc;
for(assignedLoc=0; assignedLoc<assigned.size(); assignedLoc++) { for(assignedLoc=0; assignedLoc<assigned.size(); assignedLoc++) {
KeyRangeRef keys( KeyRangeRef keys(
@ -3444,6 +3403,7 @@ ACTOR Future<Void> waitMetrics( StorageServerMetrics* self, WaitMetricsRequest r
if( timedout ) { if( timedout ) {
TEST( true ); // ShardWaitMetrics return on timeout TEST( true ); // ShardWaitMetrics return on timeout
//FIXME: instead of using random chance, send wrong_shard_server when the call in from waitMetricsMultiple (requires additional information in the request)
if(deterministicRandom()->random01() < SERVER_KNOBS->WAIT_METRICS_WRONG_SHARD_CHANCE) { if(deterministicRandom()->random01() < SERVER_KNOBS->WAIT_METRICS_WRONG_SHARD_CHANCE) {
req.reply.sendError( wrong_shard_server() ); req.reply.sendError( wrong_shard_server() );
} else { } else {

View File

@ -973,6 +973,8 @@ vector<TestSpec> readTests( ifstream& ifs ) {
TraceEvent("TestParserTest").detail("ParsedSimDrAgents", spec.simDrAgents); TraceEvent("TestParserTest").detail("ParsedSimDrAgents", spec.simDrAgents);
} else if( attrib == "extraDB" ) { } else if( attrib == "extraDB" ) {
TraceEvent("TestParserTest").detail("ParsedExtraDB", ""); TraceEvent("TestParserTest").detail("ParsedExtraDB", "");
} else if ( attrib == "configureLocked" ) {
TraceEvent("TestParserTest").detail("ParsedConfigureLocked", "");
} else if( attrib == "minimumReplication" ) { } else if( attrib == "minimumReplication" ) {
TraceEvent("TestParserTest").detail("ParsedMinimumReplication", ""); TraceEvent("TestParserTest").detail("ParsedMinimumReplication", "");
} else if( attrib == "minimumRegions" ) { } else if( attrib == "minimumRegions" ) {

View File

@ -660,7 +660,7 @@ Standalone<StringRef> roleString(std::set<std::pair<std::string, std::string>> r
return StringRef(result); return StringRef(result);
} }
void startRole(const Role &role, UID roleId, UID workerId, std::map<std::string, std::string> details, std::string origination) { void startRole(const Role &role, UID roleId, UID workerId, const std::map<std::string, std::string> &details, const std::string &origination) {
if(role.includeInTraceRoles) { if(role.includeInTraceRoles) {
addTraceRole(role.abbreviation); addTraceRole(role.abbreviation);
} }
@ -980,7 +980,7 @@ ACTOR Future<Void> workerServer(
auto& logData = sharedLogs[SharedLogsKey(s.tLogOptions, s.storeType)]; auto& logData = sharedLogs[SharedLogsKey(s.tLogOptions, s.storeType)];
// FIXME: Shouldn't if logData.first isValid && !isReady, shouldn't we // FIXME: Shouldn't if logData.first isValid && !isReady, shouldn't we
// be sending a fake InitializeTLogRequest rather than calling tLog() ? // be sending a fake InitializeTLogRequest rather than calling tLog() ?
Future<Void> tl = tLogFn( kv, queue, dbInfo, locality, !logData.actor.isValid() || logData.actor.isReady() ? logData.requests : PromiseStream<InitializeTLogRequest>(), s.storeID, true, oldLog, recovery, folder, degraded, activeSharedTLog ); Future<Void> tl = tLogFn( kv, queue, dbInfo, locality, !logData.actor.isValid() || logData.actor.isReady() ? logData.requests : PromiseStream<InitializeTLogRequest>(), s.storeID, interf.id(), true, oldLog, recovery, folder, degraded, activeSharedTLog );
recoveries.push_back(recovery.getFuture()); recoveries.push_back(recovery.getFuture());
activeSharedTLog->set(s.storeID); activeSharedTLog->set(s.storeID);
@ -1161,7 +1161,7 @@ ACTOR Future<Void> workerServer(
filesClosed.add( data->onClosed() ); filesClosed.add( data->onClosed() );
filesClosed.add( queue->onClosed() ); filesClosed.add( queue->onClosed() );
Future<Void> tLogCore = tLogFn( data, queue, dbInfo, locality, logData.requests, logId, false, Promise<Void>(), Promise<Void>(), folder, degraded, activeSharedTLog ); Future<Void> tLogCore = tLogFn( data, queue, dbInfo, locality, logData.requests, logId, interf.id(), false, Promise<Void>(), Promise<Void>(), folder, degraded, activeSharedTLog );
tLogCore = handleIOErrors( tLogCore, data, logId ); tLogCore = handleIOErrors( tLogCore, data, logId );
tLogCore = handleIOErrors( tLogCore, queue, logId ); tLogCore = handleIOErrors( tLogCore, queue, logId );
errorForwarders.add( forwardError( errors, Role::SHARED_TRANSACTION_LOG, logId, tLogCore ) ); errorForwarders.add( forwardError( errors, Role::SHARED_TRANSACTION_LOG, logId, tLogCore ) );
@ -1458,37 +1458,48 @@ ACTOR Future<UID> createAndLockProcessIdFile(std::string folder) {
state UID processIDUid; state UID processIDUid;
platform::createDirectory(folder); platform::createDirectory(folder);
try { loop {
state std::string lockFilePath = joinPath(folder, "processId"); try {
state ErrorOr<Reference<IAsyncFile>> lockFile = wait(errorOr(IAsyncFileSystem::filesystem(g_network)->open(lockFilePath, IAsyncFile::OPEN_READWRITE | IAsyncFile::OPEN_LOCK, 0600))); state std::string lockFilePath = joinPath(folder, "processId");
state ErrorOr<Reference<IAsyncFile>> lockFile = wait(errorOr(IAsyncFileSystem::filesystem(g_network)->open(lockFilePath, IAsyncFile::OPEN_READWRITE | IAsyncFile::OPEN_LOCK, 0600)));
if (lockFile.isError() && lockFile.getError().code() == error_code_file_not_found && !fileExists(lockFilePath)) { if (lockFile.isError() && lockFile.getError().code() == error_code_file_not_found && !fileExists(lockFilePath)) {
Reference<IAsyncFile> _lockFile = wait(IAsyncFileSystem::filesystem()->open(lockFilePath, IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_LOCK | IAsyncFile::OPEN_READWRITE, 0600)); Reference<IAsyncFile> _lockFile = wait(IAsyncFileSystem::filesystem()->open(lockFilePath, IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_LOCK | IAsyncFile::OPEN_READWRITE, 0600));
lockFile = _lockFile; lockFile = _lockFile;
processIDUid = deterministicRandom()->randomUniqueID(); processIDUid = deterministicRandom()->randomUniqueID();
BinaryWriter wr(IncludeVersion()); BinaryWriter wr(IncludeVersion());
wr << processIDUid; wr << processIDUid;
wait(lockFile.get()->write(wr.getData(), wr.getLength(), 0)); wait(lockFile.get()->write(wr.getData(), wr.getLength(), 0));
wait(lockFile.get()->sync()); wait(lockFile.get()->sync());
} }
else { else {
if (lockFile.isError()) throw lockFile.getError(); // If we've failed to open the file, throw an exception if (lockFile.isError()) throw lockFile.getError(); // If we've failed to open the file, throw an exception
int64_t fileSize = wait(lockFile.get()->size()); int64_t fileSize = wait(lockFile.get()->size());
state Key fileData = makeString(fileSize); state Key fileData = makeString(fileSize);
wait(success(lockFile.get()->read(mutateString(fileData), fileSize, 0))); wait(success(lockFile.get()->read(mutateString(fileData), fileSize, 0)));
processIDUid = BinaryReader::fromStringRef<UID>(fileData, IncludeVersion()); try {
processIDUid = BinaryReader::fromStringRef<UID>(fileData, IncludeVersion());
return processIDUid;
} catch (Error& e) {
if(!g_network->isSimulated()) {
throw;
}
deleteFile(lockFilePath);
}
}
} }
} catch (Error& e) {
catch (Error& e) { if (e.code() == error_code_actor_cancelled) {
if (e.code() != error_code_actor_cancelled) { throw;
if (!e.isInjectedFault()) }
if (!e.isInjectedFault()) {
fprintf(stderr, "ERROR: error creating or opening process id file `%s'.\n", joinPath(folder, "processId").c_str()); fprintf(stderr, "ERROR: error creating or opening process id file `%s'.\n", joinPath(folder, "processId").c_str());
}
TraceEvent(SevError, "OpenProcessIdError").error(e); TraceEvent(SevError, "OpenProcessIdError").error(e);
throw;
} }
throw;
} }
return processIDUid;
} }
ACTOR Future<Void> fdbd( ACTOR Future<Void> fdbd(

View File

@ -1136,18 +1136,19 @@ struct ConsistencyCheckWorkload : TestWorkload
std::set<Optional<Key>> missingStorage; std::set<Optional<Key>> missingStorage;
for( int i = 0; i < workers.size(); i++ ) { for( int i = 0; i < workers.size(); i++ ) {
if( !configuration.isExcludedServer(workers[i].interf.address()) && NetworkAddress addr = workers[i].interf.tLog.getEndpoint().addresses.getTLSAddress();
if( !configuration.isExcludedServer(addr) &&
( workers[i].processClass == ProcessClass::StorageClass || workers[i].processClass == ProcessClass::UnsetClass ) ) { ( workers[i].processClass == ProcessClass::StorageClass || workers[i].processClass == ProcessClass::UnsetClass ) ) {
bool found = false; bool found = false;
for( int j = 0; j < storageServers.size(); j++ ) { for( int j = 0; j < storageServers.size(); j++ ) {
if( storageServers[j].address() == workers[i].interf.address() ) { if( storageServers[j].getValue.getEndpoint().addresses.getTLSAddress() == addr ) {
found = true; found = true;
break; break;
} }
} }
if( !found ) { if( !found ) {
TraceEvent("ConsistencyCheck_NoStorage") TraceEvent("ConsistencyCheck_NoStorage")
.detail("Address", workers[i].interf.address()) .detail("Address", addr)
.detail("ProcessClassEqualToStorageClass", .detail("ProcessClassEqualToStorageClass",
(int)(workers[i].processClass == ProcessClass::StorageClass)); (int)(workers[i].processClass == ProcessClass::StorageClass));
missingStorage.insert(workers[i].interf.locality.dcId()); missingStorage.insert(workers[i].interf.locality.dcId());
@ -1195,8 +1196,15 @@ struct ConsistencyCheckWorkload : TestWorkload
if(!statefulProcesses[itr->interf.address()].count(id)) { if(!statefulProcesses[itr->interf.address()].count(id)) {
TraceEvent("ConsistencyCheck_ExtraDataStore").detail("Address", itr->interf.address()).detail("DataStoreID", id); TraceEvent("ConsistencyCheck_ExtraDataStore").detail("Address", itr->interf.address()).detail("DataStoreID", id);
if(g_network->isSimulated()) { if(g_network->isSimulated()) {
TraceEvent("ConsistencyCheck_RebootProcess").detail("Address", itr->interf.address()).detail("DataStoreID", id); //FIXME: this is hiding the fact that we can recruit a new storage server on a location the has files left behind by a previous failure
g_simulator.rebootProcess(g_simulator.getProcessByAddress(itr->interf.address()), ISimulator::RebootProcess); // this means that the process is wasting disk space until the process is rebooting
auto p = g_simulator.getProcessByAddress(itr->interf.address());
TraceEvent("ConsistencyCheck_RebootProcess").detail("Address", itr->interf.address()).detail("DataStoreID", id).detail("Reliable", p->isReliable());
if(p->isReliable()) {
g_simulator.rebootProcess(p, ISimulator::RebootProcess);
} else {
g_simulator.killProcess(p, ISimulator::KillInstantly);
}
} }
foundExtraDataStore = true; foundExtraDataStore = true;
@ -1220,12 +1228,13 @@ struct ConsistencyCheckWorkload : TestWorkload
std::set<NetworkAddress> workerAddresses; std::set<NetworkAddress> workerAddresses;
for (const auto& it : workers) { for (const auto& it : workers) {
ISimulator::ProcessInfo* info = g_simulator.getProcessByAddress(it.interf.address()); NetworkAddress addr = it.interf.tLog.getEndpoint().addresses.getTLSAddress();
ISimulator::ProcessInfo* info = g_simulator.getProcessByAddress(addr);
if(!info || info->failed) { if(!info || info->failed) {
TraceEvent("ConsistencyCheck_FailedWorkerInList").detail("Addr", it.interf.address()); TraceEvent("ConsistencyCheck_FailedWorkerInList").detail("Addr", it.interf.address());
return false; return false;
} }
workerAddresses.insert( NetworkAddress(it.interf.address().ip, it.interf.address().port, true, false) ); workerAddresses.insert( NetworkAddress(addr.ip, addr.port, true, addr.isTLS()) );
} }
vector<ISimulator::ProcessInfo*> all = g_simulator.getAllProcesses(); vector<ISimulator::ProcessInfo*> all = g_simulator.getAllProcesses();

View File

@ -46,7 +46,7 @@ struct DDMetricsExcludeWorkload : TestWorkload {
ACTOR static Future<double> getMovingDataAmount(Database cx, DDMetricsExcludeWorkload* self) { ACTOR static Future<double> getMovingDataAmount(Database cx, DDMetricsExcludeWorkload* self) {
try { try {
StatusObject statusObj = wait(StatusClient::statusFetcher(cx->getConnectionFile())); StatusObject statusObj = wait(StatusClient::statusFetcher(cx));
StatusObjectReader statusObjCluster; StatusObjectReader statusObjCluster;
((StatusObjectReader)statusObj).get("cluster", statusObjCluster); ((StatusObjectReader)statusObj).get("cluster", statusObjCluster);
StatusObjectReader statusObjData; StatusObjectReader statusObjData;

View File

@ -260,7 +260,7 @@ ACTOR Future<Void> testKVStoreMain( KVStoreTestWorkload* workload, KVTest* ptest
state Key k; state Key k;
state double cst = timer(); state double cst = timer();
while (true) { while (true) {
Standalone<VectorRef<KeyValueRef>> kv = Standalone<RangeResultRef> kv =
wait(test.store->readRange(KeyRangeRef(k, LiteralStringRef("\xff\xff\xff\xff")), 1000)); wait(test.store->readRange(KeyRangeRef(k, LiteralStringRef("\xff\xff\xff\xff")), 1000));
count += kv.size(); count += kv.size();
if (kv.size() < 1000) break; if (kv.size() < 1000) break;

View File

@ -27,12 +27,14 @@
struct LockDatabaseWorkload : TestWorkload { struct LockDatabaseWorkload : TestWorkload {
double lockAfter, unlockAfter; double lockAfter, unlockAfter;
bool ok; bool ok;
bool onlyCheckLocked;
LockDatabaseWorkload(WorkloadContext const& wcx) LockDatabaseWorkload(WorkloadContext const& wcx)
: TestWorkload(wcx), ok(true) : TestWorkload(wcx), ok(true)
{ {
lockAfter = getOption( options, LiteralStringRef("lockAfter"), 0.0 ); lockAfter = getOption( options, LiteralStringRef("lockAfter"), 0.0 );
unlockAfter = getOption( options, LiteralStringRef("unlockAfter"), 10.0 ); unlockAfter = getOption( options, LiteralStringRef("unlockAfter"), 10.0 );
onlyCheckLocked = getOption(options, LiteralStringRef("onlyCheckLocked"), false);
ASSERT(unlockAfter > lockAfter); ASSERT(unlockAfter > lockAfter);
} }
@ -42,9 +44,8 @@ struct LockDatabaseWorkload : TestWorkload {
return Void(); return Void();
} }
virtual Future<Void> start( Database const& cx ) { virtual Future<Void> start(Database const& cx) {
if( clientId == 0 ) if (clientId == 0) return onlyCheckLocked ? timeout(checkLocked(cx, this), 60, Void()) : lockWorker(cx, this);
return lockWorker( cx, this );
return Void(); return Void();
} }
@ -110,6 +111,7 @@ struct LockDatabaseWorkload : TestWorkload {
self->ok = false; self->ok = false;
return Void(); return Void();
} catch( Error &e ) { } catch( Error &e ) {
TEST(e.code() == error_code_database_locked); // Database confirmed locked
wait( tr.onError(e) ); wait( tr.onError(e) );
} }
} }

View File

@ -327,10 +327,43 @@ struct ReadWriteWorkload : KVWorkload {
elapsed += self->periodicLoggingInterval; elapsed += self->periodicLoggingInterval;
wait( delayUntil(start + elapsed) ); wait( delayUntil(start + elapsed) );
TraceEvent((self->description() + "_RowReadLatency").c_str()).detail("Mean", self->readLatencies.mean()).detail("Median", self->readLatencies.median()).detail("Percentile5", self->readLatencies.percentile(.05)).detail("Percentile95", self->readLatencies.percentile(.95)).detail("Count", self->readLatencyCount).detail("Elapsed", elapsed); TraceEvent((self->description() + "_RowReadLatency").c_str())
TraceEvent((self->description() + "_GRVLatency").c_str()).detail("Mean", self->GRVLatencies.mean()).detail("Median", self->GRVLatencies.median()).detail("Percentile5", self->GRVLatencies.percentile(.05)).detail("Percentile95", self->GRVLatencies.percentile(.95)); .detail("Mean", self->readLatencies.mean())
TraceEvent((self->description() + "_CommitLatency").c_str()).detail("Mean", self->commitLatencies.mean()).detail("Median", self->commitLatencies.median()).detail("Percentile5", self->commitLatencies.percentile(.05)).detail("Percentile95", self->commitLatencies.percentile(.95)); .detail("Median", self->readLatencies.median())
TraceEvent((self->description() + "_TotalLatency").c_str()).detail("Mean", self->latencies.mean()).detail("Median", self->latencies.median()).detail("Percentile5", self->latencies.percentile(.05)).detail("Percentile95", self->latencies.percentile(.95)); .detail("Percentile5", self->readLatencies.percentile(.05))
.detail("Percentile95", self->readLatencies.percentile(.95))
.detail("Percentile99", self->readLatencies.percentile(.99))
.detail("Percentile99_9", self->readLatencies.percentile(.999))
.detail("Max", self->readLatencies.max())
.detail("Count", self->readLatencyCount)
.detail("Elapsed", elapsed);
TraceEvent((self->description() + "_GRVLatency").c_str())
.detail("Mean", self->GRVLatencies.mean())
.detail("Median", self->GRVLatencies.median())
.detail("Percentile5", self->GRVLatencies.percentile(.05))
.detail("Percentile95", self->GRVLatencies.percentile(.95))
.detail("Percentile99", self->GRVLatencies.percentile(.99))
.detail("Percentile99_9", self->GRVLatencies.percentile(.999))
.detail("Max", self->GRVLatencies.max());
TraceEvent((self->description() + "_CommitLatency").c_str())
.detail("Mean", self->commitLatencies.mean())
.detail("Median", self->commitLatencies.median())
.detail("Percentile5", self->commitLatencies.percentile(.05))
.detail("Percentile95", self->commitLatencies.percentile(.95))
.detail("Percentile99", self->commitLatencies.percentile(.99))
.detail("Percentile99_9", self->commitLatencies.percentile(.999))
.detail("Max", self->commitLatencies.max());
TraceEvent((self->description() + "_TotalLatency").c_str())
.detail("Mean", self->latencies.mean())
.detail("Median", self->latencies.median())
.detail("Percentile5", self->latencies.percentile(.05))
.detail("Percentile95", self->latencies.percentile(.95))
.detail("Percentile99", self->latencies.percentile(.99))
.detail("Percentile99_9", self->latencies.percentile(.999))
.detail("Max", self->latencies.max());
int64_t ops = (self->aTransactions.getValue() * (self->readsPerTransactionA+self->writesPerTransactionA)) + int64_t ops = (self->aTransactions.getValue() * (self->readsPerTransactionA+self->writesPerTransactionA)) +
(self->bTransactions.getValue() * (self->readsPerTransactionB+self->writesPerTransactionB)); (self->bTransactions.getValue() * (self->readsPerTransactionB+self->writesPerTransactionB));

View File

@ -69,7 +69,7 @@ struct StatusWorkload : TestWorkload {
if (clientId != 0) if (clientId != 0)
return Void(); return Void();
return success(timeout(fetcher(cx->getConnectionFile(), this), testDuration)); return success(timeout(fetcher(cx, this), testDuration));
} }
virtual Future<bool> check(Database const& cx) { virtual Future<bool> check(Database const& cx) {
return errors.getValue() == 0; return errors.getValue() == 0;
@ -161,7 +161,7 @@ struct StatusWorkload : TestWorkload {
} }
} }
ACTOR Future<Void> fetcher(Reference<ClusterConnectionFile> connFile, StatusWorkload *self) { ACTOR Future<Void> fetcher(Database cx, StatusWorkload *self) {
state double lastTime = now(); state double lastTime = now();
loop{ loop{
@ -170,7 +170,7 @@ struct StatusWorkload : TestWorkload {
// Since we count the requests that start, we could potentially never really hear back? // Since we count the requests that start, we could potentially never really hear back?
++self->requests; ++self->requests;
state double issued = now(); state double issued = now();
StatusObject result = wait(StatusClient::statusFetcher(connFile)); StatusObject result = wait(StatusClient::statusFetcher(cx));
++self->replies; ++self->replies;
BinaryWriter br(AssumeVersion(currentProtocolVersion)); BinaryWriter br(AssumeVersion(currentProtocolVersion));
save(br, result); save(br, result);

View File

@ -519,6 +519,10 @@ public:
} }
#endif #endif
template <class U> Standalone<U> castTo() const {
return Standalone<U>(*this, arena());
}
template <class Archive> template <class Archive>
void serialize(Archive& ar) { void serialize(Archive& ar) {
// FIXME: something like BinaryReader(ar) >> arena >> *(T*)this; to guarantee standalone arena??? // FIXME: something like BinaryReader(ar) >> arena >> *(T*)this; to guarantee standalone arena???

View File

@ -58,6 +58,8 @@ set(FLOW_SRCS
ThreadSafeQueue.h ThreadSafeQueue.h
Trace.cpp Trace.cpp
Trace.h Trace.h
TLSPolicy.h
TLSPolicy.cpp
UnitTest.cpp UnitTest.cpp
UnitTest.h UnitTest.h
XmlTraceLogFormatter.h XmlTraceLogFormatter.h
@ -84,6 +86,7 @@ set(FLOW_SRCS
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/SourceVersion.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/SourceVersion.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/SourceVersion.h)
add_flow_target(STATIC_LIBRARY NAME flow SRCS ${FLOW_SRCS}) add_flow_target(STATIC_LIBRARY NAME flow SRCS ${FLOW_SRCS})
target_include_directories(flow SYSTEM PUBLIC ${CMAKE_THREAD_LIBS_INIT})
target_include_directories(flow PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(flow PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
if (NOT APPLE AND NOT WIN32) if (NOT APPLE AND NOT WIN32)
set (FLOW_LIBS ${FLOW_LIBS} rt) set (FLOW_LIBS ${FLOW_LIBS} rt)
@ -92,7 +95,6 @@ elseif(WIN32)
target_link_libraries(flow PUBLIC psapi.lib) target_link_libraries(flow PUBLIC psapi.lib)
endif() endif()
target_link_libraries(flow PRIVATE ${FLOW_LIBS}) target_link_libraries(flow PRIVATE ${FLOW_LIBS})
target_link_libraries(flow PUBLIC boost_target Threads::Threads ${CMAKE_DL_LIBS})
if(USE_VALGRIND) if(USE_VALGRIND)
target_link_libraries(flow PUBLIC Valgrind) target_link_libraries(flow PUBLIC Valgrind)
endif() endif()
@ -100,7 +102,11 @@ endif()
if(NOT WITH_TLS OR OPEN_FOR_IDE) if(NOT WITH_TLS OR OPEN_FOR_IDE)
target_compile_definitions(flow PUBLIC TLS_DISABLED) target_compile_definitions(flow PUBLIC TLS_DISABLED)
else() else()
target_link_libraries(flow PUBLIC FDBLibTLS) target_link_libraries(flow PUBLIC OpenSSL::SSL)
endif()
target_link_libraries(flow PUBLIC boost_target Threads::Threads ${CMAKE_DL_LIBS})
if(USE_VALGRIND)
target_link_libraries(flow PUBLIC Valgrind)
endif() endif()
if(APPLE) if(APPLE)

View File

@ -68,7 +68,6 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
init( MAX_RECONNECTION_TIME, 0.5 ); init( MAX_RECONNECTION_TIME, 0.5 );
init( RECONNECTION_TIME_GROWTH_RATE, 1.2 ); init( RECONNECTION_TIME_GROWTH_RATE, 1.2 );
init( RECONNECTION_RESET_TIME, 5.0 ); init( RECONNECTION_RESET_TIME, 5.0 );
init( CONNECTION_ACCEPT_DELAY, 0.5 );
init( TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY, 5.0 ); init( TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY, 5.0 );
init( TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT, 20.0 ); init( TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT, 20.0 );
init( PEER_UNAVAILABLE_FOR_LONG_TIME_TIMEOUT, 3600.0 ); init( PEER_UNAVAILABLE_FOR_LONG_TIME_TIMEOUT, 3600.0 );
@ -112,6 +111,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
//GenericActors //GenericActors
init( BUGGIFY_FLOW_LOCK_RELEASE_DELAY, 1.0 ); init( BUGGIFY_FLOW_LOCK_RELEASE_DELAY, 1.0 );
init( LOW_PRIORITY_DELAY_COUNT, 5 );
//IAsyncFile //IAsyncFile
init( INCREMENTAL_DELETE_TRUNCATE_AMOUNT, 5e8 ); //500MB init( INCREMENTAL_DELETE_TRUNCATE_AMOUNT, 5e8 ); //500MB
@ -123,6 +123,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
init( SLOW_LOOP_CUTOFF, 15.0 / 1000.0 ); init( SLOW_LOOP_CUTOFF, 15.0 / 1000.0 );
init( SLOW_LOOP_SAMPLING_RATE, 0.1 ); init( SLOW_LOOP_SAMPLING_RATE, 0.1 );
init( TSC_YIELD_TIME, 1000000 ); init( TSC_YIELD_TIME, 1000000 );
init( CERT_FILE_MAX_SIZE, 5 * 1024 * 1024 );
//Network //Network
init( PACKET_LIMIT, 100LL<<20 ); init( PACKET_LIMIT, 100LL<<20 );
@ -133,6 +134,8 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
init( MIN_PACKET_BUFFER_FREE_BYTES, 256 ); init( MIN_PACKET_BUFFER_FREE_BYTES, 256 );
init( FLOW_TCP_NODELAY, 1 ); init( FLOW_TCP_NODELAY, 1 );
init( FLOW_TCP_QUICKACK, 0 ); init( FLOW_TCP_QUICKACK, 0 );
init( UNRESTRICTED_HANDSHAKE_LIMIT, 15 );
init( BOUNDED_HANDSHAKE_LIMIT, 400 );
//Sim2 //Sim2
init( MIN_OPEN_TIME, 0.0002 ); init( MIN_OPEN_TIME, 0.0002 );

View File

@ -87,7 +87,6 @@ public:
double MAX_RECONNECTION_TIME; double MAX_RECONNECTION_TIME;
double RECONNECTION_TIME_GROWTH_RATE; double RECONNECTION_TIME_GROWTH_RATE;
double RECONNECTION_RESET_TIME; double RECONNECTION_RESET_TIME;
double CONNECTION_ACCEPT_DELAY;
int TLS_CERT_REFRESH_DELAY_SECONDS; int TLS_CERT_REFRESH_DELAY_SECONDS;
double TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT; double TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT;
@ -131,6 +130,7 @@ public:
//GenericActors //GenericActors
double BUGGIFY_FLOW_LOCK_RELEASE_DELAY; double BUGGIFY_FLOW_LOCK_RELEASE_DELAY;
int LOW_PRIORITY_DELAY_COUNT;
//IAsyncFile //IAsyncFile
int64_t INCREMENTAL_DELETE_TRUNCATE_AMOUNT; int64_t INCREMENTAL_DELETE_TRUNCATE_AMOUNT;
@ -143,6 +143,7 @@ public:
double SLOW_LOOP_SAMPLING_RATE; double SLOW_LOOP_SAMPLING_RATE;
int64_t TSC_YIELD_TIME; int64_t TSC_YIELD_TIME;
int64_t REACTOR_FLAGS; int64_t REACTOR_FLAGS;
int CERT_FILE_MAX_SIZE;
//Network //Network
int64_t PACKET_LIMIT; int64_t PACKET_LIMIT;
@ -153,6 +154,8 @@ public:
int MIN_PACKET_BUFFER_FREE_BYTES; int MIN_PACKET_BUFFER_FREE_BYTES;
int FLOW_TCP_NODELAY; int FLOW_TCP_NODELAY;
int FLOW_TCP_QUICKACK; int FLOW_TCP_QUICKACK;
int UNRESTRICTED_HANDSHAKE_LIMIT;
int BOUNDED_HANDSHAKE_LIMIT;
//Sim2 //Sim2
//FIMXE: more parameters could be factored out //FIMXE: more parameters could be factored out

View File

@ -37,6 +37,7 @@
#include "flow/AsioReactor.h" #include "flow/AsioReactor.h"
#include "flow/Profiler.h" #include "flow/Profiler.h"
#include "flow/ProtocolVersion.h" #include "flow/ProtocolVersion.h"
#include "flow/TLSPolicy.h"
#ifdef WIN32 #ifdef WIN32
#include <mmsystem.h> #include <mmsystem.h>
@ -49,7 +50,6 @@ intptr_t g_stackYieldLimit = 0;
using namespace boost::asio::ip; using namespace boost::asio::ip;
#if defined(__linux__) #if defined(__linux__)
#include <execinfo.h> #include <execinfo.h>
@ -111,7 +111,7 @@ thread_local INetwork* thread_network = 0;
class Net2 sealed : public INetwork, public INetworkConnections { class Net2 sealed : public INetwork, public INetworkConnections {
public: public:
Net2(bool useThreadPool, bool useMetrics); Net2(bool useThreadPool, bool useMetrics, Reference<TLSPolicy> policy, const TLSParams& tlsParams);
void run(); void run();
void initMetrics(); void initMetrics();
@ -122,6 +122,7 @@ public:
// INetwork interface // INetwork interface
virtual double now() { return currentTime; }; virtual double now() { return currentTime; };
virtual double timer() { return ::timer(); };
virtual Future<Void> delay( double seconds, TaskPriority taskId ); virtual Future<Void> delay( double seconds, TaskPriority taskId );
virtual Future<class Void> yield( TaskPriority taskID ); virtual Future<class Void> yield( TaskPriority taskID );
virtual bool check_yield(TaskPriority taskId); virtual bool check_yield(TaskPriority taskId);
@ -154,6 +155,15 @@ public:
//private: //private:
ASIOReactor reactor; ASIOReactor reactor;
#ifndef TLS_DISABLED
boost::asio::ssl::context sslContext;
#endif
std::string tlsPassword;
std::string get_password() const {
return tlsPassword;
}
INetworkConnections *network; // initially this, but can be changed INetworkConnections *network; // initially this, but can be changed
int64_t tsc_begin, tsc_end; int64_t tsc_begin, tsc_end;
@ -244,7 +254,11 @@ public:
try { try {
if (error) { if (error) {
// Log the error... // Log the error...
TraceEvent(SevWarn, errContext, errID).suppressFor(1.0).detail("ErrorCode", error.value()).detail("Message", error.message()); TraceEvent(SevWarn, errContext, errID).suppressFor(1.0).detail("ErrorCode", error.value()).detail("Message", error.message())
#ifndef TLS_DISABLED
.detail("WhichMeans", TLSPolicy::ErrorString(error))
#endif
;
p.sendError( connection_failed() ); p.sendError( connection_failed() );
} else } else
p.send( Void() ); p.send( Void() );
@ -297,6 +311,10 @@ public:
init(); init();
} }
virtual Future<Void> acceptHandshake() { return Void(); }
virtual Future<Void> connectHandshake() { return Void(); }
// returns when write() can write at least one byte // returns when write() can write at least one byte
virtual Future<Void> onWritable() { virtual Future<Void> onWritable() {
++g_net2->countWriteProbes; ++g_net2->countWriteProbes;
@ -480,6 +498,342 @@ private:
} }
}; };
#ifndef TLS_DISABLED
typedef boost::asio::ssl::stream<boost::asio::ip::tcp::socket&> ssl_socket;
class SSLConnection : public IConnection, ReferenceCounted<SSLConnection> {
public:
virtual void addref() { ReferenceCounted<SSLConnection>::addref(); }
virtual void delref() { ReferenceCounted<SSLConnection>::delref(); }
virtual void close() {
closeSocket();
}
explicit SSLConnection( boost::asio::io_service& io_service, boost::asio::ssl::context& context )
: id(nondeterministicRandom()->randomUniqueID()), socket(io_service), ssl_sock(socket, context)
{
}
// This is not part of the IConnection interface, because it is wrapped by INetwork::connect()
ACTOR static Future<Reference<IConnection>> connect( boost::asio::io_service* ios, boost::asio::ssl::context* context, NetworkAddress addr ) {
std::pair<IPAddress,uint16_t> peerIP = std::make_pair(addr.ip, addr.port);
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
if (now() < iter->second.second) {
if(iter->second.first >= FLOW_KNOBS->TLS_CLIENT_CONNECTION_THROTTLE_ATTEMPTS) {
TraceEvent("TLSOutgoingConnectionThrottlingWarning").suppressFor(1.0).detail("PeerIP", addr);
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT));
throw connection_failed();
}
} else {
g_network->networkInfo.serverTLSConnectionThrottler.erase(peerIP);
}
}
state Reference<SSLConnection> self( new SSLConnection(*ios, *context) );
self->peer_address = addr;
try {
auto to = tcpEndpoint(self->peer_address);
BindPromise p("N2_ConnectError", self->id);
Future<Void> onConnected = p.getFuture();
self->socket.async_connect( to, std::move(p) );
wait( onConnected );
self->init();
return self;
} catch (Error& e) {
// Either the connection failed, or was cancelled by the caller
self->closeSocket();
throw;
}
}
// This is not part of the IConnection interface, because it is wrapped by IListener::accept()
void accept(NetworkAddress peerAddr) {
this->peer_address = peerAddr;
init();
}
ACTOR static void doAcceptHandshake( Reference<SSLConnection> self, Promise<Void> connected) {
try {
state std::pair<IPAddress,uint16_t> peerIP = std::make_pair(self->getPeerAddress().ip, static_cast<uint16_t>(0));
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
if (now() < iter->second.second) {
if(iter->second.first >= FLOW_KNOBS->TLS_SERVER_CONNECTION_THROTTLE_ATTEMPTS) {
TraceEvent("TLSIncomingConnectionThrottlingWarning").suppressFor(1.0).detail("PeerIP", peerIP.first.toString());
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT));
self->closeSocket();
connected.sendError(connection_failed());
return;
}
} else {
g_network->networkInfo.serverTLSConnectionThrottler.erase(peerIP);
}
}
int64_t permitNumber = wait(g_network->networkInfo.handshakeLock->take());
state BoundedFlowLock::Releaser releaser(g_network->networkInfo.handshakeLock, permitNumber);
BindPromise p("N2_AcceptHandshakeError", UID());
auto onHandshook = p.getFuture();
self->getSSLSocket().async_handshake( boost::asio::ssl::stream_base::server, std::move(p) );
wait( onHandshook );
wait(delay(0, TaskPriority::Handshake));
connected.send(Void());
} catch (...) {
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
iter->second.first++;
} else {
g_network->networkInfo.serverTLSConnectionThrottler[peerIP] = std::make_pair(0,now() + FLOW_KNOBS->TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT);
}
self->closeSocket();
connected.sendError(connection_failed());
}
}
ACTOR static Future<Void> acceptHandshakeWrapper( Reference<SSLConnection> self ) {
Promise<Void> connected;
doAcceptHandshake(self, connected);
try {
wait(connected.getFuture());
return Void();
} catch (Error& e) {
// Either the connection failed, or was cancelled by the caller
self->closeSocket();
throw;
}
}
virtual Future<Void> acceptHandshake() {
return acceptHandshakeWrapper( Reference<SSLConnection>::addRef(this) );
}
ACTOR static void doConnectHandshake( Reference<SSLConnection> self, Promise<Void> connected) {
try {
int64_t permitNumber = wait(g_network->networkInfo.handshakeLock->take());
state BoundedFlowLock::Releaser releaser(g_network->networkInfo.handshakeLock, permitNumber);
BindPromise p("N2_ConnectHandshakeError", self->id);
Future<Void> onHandshook = p.getFuture();
self->ssl_sock.async_handshake( boost::asio::ssl::stream_base::client, std::move(p) );
wait( onHandshook );
wait(delay(0, TaskPriority::Handshake));
connected.send(Void());
} catch (...) {
std::pair<IPAddress,uint16_t> peerIP = std::make_pair(self->peer_address.ip, self->peer_address.port);
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
iter->second.first++;
} else {
g_network->networkInfo.serverTLSConnectionThrottler[peerIP] = std::make_pair(0,now() + FLOW_KNOBS->TLS_CLIENT_CONNECTION_THROTTLE_TIMEOUT);
}
self->closeSocket();
connected.sendError(connection_failed());
}
}
ACTOR static Future<Void> connectHandshakeWrapper( Reference<SSLConnection> self ) {
Promise<Void> connected;
doConnectHandshake(self, connected);
try {
wait(connected.getFuture());
return Void();
} catch (Error& e) {
// Either the connection failed, or was cancelled by the caller
self->closeSocket();
throw;
}
}
virtual Future<Void> connectHandshake() {
return connectHandshakeWrapper( Reference<SSLConnection>::addRef(this) );
}
// returns when write() can write at least one byte
virtual Future<Void> onWritable() {
++g_net2->countWriteProbes;
BindPromise p("N2_WriteProbeError", id);
auto f = p.getFuture();
socket.async_write_some( boost::asio::null_buffers(), std::move(p) );
return f;
}
// returns when read() can read at least one byte
virtual Future<Void> onReadable() {
++g_net2->countReadProbes;
BindPromise p("N2_ReadProbeError", id);
auto f = p.getFuture();
socket.async_read_some( boost::asio::null_buffers(), std::move(p) );
return f;
}
// Reads as many bytes as possible from the read buffer into [begin,end) and returns the number of bytes read (might be 0)
virtual int read( uint8_t* begin, uint8_t* end ) {
boost::system::error_code err;
++g_net2->countReads;
size_t toRead = end-begin;
size_t size = ssl_sock.read_some( boost::asio::mutable_buffers_1(begin, toRead), err );
g_net2->bytesReceived += size;
//TraceEvent("ConnRead", this->id).detail("Bytes", size);
if (err) {
if (err == boost::asio::error::would_block) {
++g_net2->countWouldBlock;
return 0;
}
onReadError(err);
throw connection_failed();
}
ASSERT( size ); // If the socket is closed, we expect an 'eof' error, not a zero return value
return size;
}
// Writes as many bytes as possible from the given SendBuffer chain into the write buffer and returns the number of bytes written (might be 0)
virtual int write( SendBuffer const* data, int limit ) {
boost::system::error_code err;
++g_net2->countWrites;
size_t sent = ssl_sock.write_some( boost::iterator_range<SendBufferIterator>(SendBufferIterator(data, limit), SendBufferIterator()), err );
if (err) {
// Since there was an error, sent's value can't be used to infer that the buffer has data and the limit is positive so check explicitly.
ASSERT(limit > 0);
bool notEmpty = false;
for(auto p = data; p; p = p->next)
if(p->bytes_written - p->bytes_sent > 0) {
notEmpty = true;
break;
}
ASSERT(notEmpty);
if (err == boost::asio::error::would_block) {
++g_net2->countWouldBlock;
return 0;
}
onWriteError(err);
throw connection_failed();
}
ASSERT( sent ); // Make sure data was sent, and also this check will fail if the buffer chain was empty or the limit was not > 0.
return sent;
}
virtual NetworkAddress getPeerAddress() { return peer_address; }
virtual UID getDebugID() { return id; }
tcp::socket& getSocket() { return socket; }
ssl_socket& getSSLSocket() { return ssl_sock; }
private:
UID id;
tcp::socket socket;
ssl_socket ssl_sock;
NetworkAddress peer_address;
struct SendBufferIterator {
typedef boost::asio::const_buffer value_type;
typedef std::forward_iterator_tag iterator_category;
typedef size_t difference_type;
typedef boost::asio::const_buffer* pointer;
typedef boost::asio::const_buffer& reference;
SendBuffer const* p;
int limit;
SendBufferIterator(SendBuffer const* p=0, int limit = std::numeric_limits<int>::max()) : p(p), limit(limit) {
ASSERT(limit > 0);
}
bool operator == (SendBufferIterator const& r) const { return p == r.p; }
bool operator != (SendBufferIterator const& r) const { return p != r.p; }
void operator++() {
limit -= p->bytes_written - p->bytes_sent;
if(limit > 0)
p = p->next;
else
p = NULL;
}
boost::asio::const_buffer operator*() const {
return boost::asio::const_buffer( p->data + p->bytes_sent, std::min(limit, p->bytes_written - p->bytes_sent) );
}
};
void init() {
// Socket settings that have to be set after connect or accept succeeds
socket.non_blocking(true);
socket.set_option(boost::asio::ip::tcp::no_delay(true));
platform::setCloseOnExec(socket.native_handle());
}
void closeSocket() {
boost::system::error_code cancelError;
socket.cancel(cancelError);
boost::system::error_code closeError;
socket.close(closeError);
boost::system::error_code shutdownError;
ssl_sock.shutdown(shutdownError);
}
void onReadError( const boost::system::error_code& error ) {
TraceEvent(SevWarn, "N2_ReadError", id).suppressFor(1.0).detail("Message", error.value());
closeSocket();
}
void onWriteError( const boost::system::error_code& error ) {
TraceEvent(SevWarn, "N2_WriteError", id).suppressFor(1.0).detail("Message", error.value());
closeSocket();
}
};
class SSLListener : public IListener, ReferenceCounted<SSLListener> {
NetworkAddress listenAddress;
tcp::acceptor acceptor;
boost::asio::ssl::context* context;
public:
SSLListener( boost::asio::io_service& io_service, boost::asio::ssl::context* context, NetworkAddress listenAddress )
: listenAddress(listenAddress), acceptor( io_service, tcpEndpoint( listenAddress ) ), context(context)
{
platform::setCloseOnExec(acceptor.native_handle());
}
virtual void addref() { ReferenceCounted<SSLListener>::addref(); }
virtual void delref() { ReferenceCounted<SSLListener>::delref(); }
// Returns one incoming connection when it is available
virtual Future<Reference<IConnection>> accept() {
return doAccept( this );
}
virtual NetworkAddress getListenAddress() { return listenAddress; }
private:
ACTOR static Future<Reference<IConnection>> doAccept( SSLListener* self ) {
state Reference<SSLConnection> conn( new SSLConnection( self->acceptor.get_io_service(), *self->context) );
state tcp::acceptor::endpoint_type peer_endpoint;
try {
BindPromise p("N2_AcceptError", UID());
auto f = p.getFuture();
self->acceptor.async_accept( conn->getSocket(), peer_endpoint, std::move(p) );
wait( f );
auto peer_address = peer_endpoint.address().is_v6() ? IPAddress(peer_endpoint.address().to_v6().to_bytes()) : IPAddress(peer_endpoint.address().to_v4().to_ulong());
conn->accept(NetworkAddress(peer_address, peer_endpoint.port(), false, true));
return conn;
} catch (...) {
conn->close();
throw;
}
}
};
#endif
struct PromiseTask : public Task, public FastAllocated<PromiseTask> { struct PromiseTask : public Task, public FastAllocated<PromiseTask> {
Promise<Void> promise; Promise<Void> promise;
PromiseTask() {} PromiseTask() {}
@ -491,7 +845,15 @@ struct PromiseTask : public Task, public FastAllocated<PromiseTask> {
} }
}; };
Net2::Net2(bool useThreadPool, bool useMetrics) // 5MB for loading files into memory
#ifndef TLS_DISABLED
bool insecurely_always_accept(bool _1, boost::asio::ssl::verify_context& _2) {
return true;
}
#endif
Net2::Net2(bool useThreadPool, bool useMetrics, Reference<TLSPolicy> policy, const TLSParams& tlsParams)
: useThreadPool(useThreadPool), : useThreadPool(useThreadPool),
network(this), network(this),
reactor(this), reactor(this),
@ -500,10 +862,49 @@ Net2::Net2(bool useThreadPool, bool useMetrics)
// Until run() is called, yield() will always yield // Until run() is called, yield() will always yield
tsc_begin(0), tsc_end(0), taskBegin(0), currentTaskID(TaskPriority::DefaultYield), tsc_begin(0), tsc_end(0), taskBegin(0), currentTaskID(TaskPriority::DefaultYield),
lastMinTaskID(TaskPriority::Zero), lastMinTaskID(TaskPriority::Zero),
numYields(0) numYields(0),
tlsPassword(tlsParams.tlsPassword)
#ifndef TLS_DISABLED
,sslContext(boost::asio::ssl::context(boost::asio::ssl::context::tlsv12))
#endif
{ {
TraceEvent("Net2Starting"); TraceEvent("Net2Starting");
#ifndef TLS_DISABLED
sslContext.set_options(boost::asio::ssl::context::default_workarounds);
sslContext.set_verify_mode(boost::asio::ssl::context::verify_peer | boost::asio::ssl::verify_fail_if_no_peer_cert);
if (policy) {
sslContext.set_verify_callback([policy](bool preverified, boost::asio::ssl::verify_context& ctx) {
return policy->verify_peer(preverified, ctx.native_handle());
});
} else {
sslContext.set_verify_callback(boost::bind(&insecurely_always_accept, _1, _2));
}
sslContext.set_password_callback(std::bind(&Net2::get_password, this));
if (tlsParams.tlsCertPath.size() ) {
sslContext.use_certificate_chain_file(tlsParams.tlsCertPath);
}
if (tlsParams.tlsCertBytes.size() ) {
sslContext.use_certificate(boost::asio::buffer(tlsParams.tlsCertBytes.data(), tlsParams.tlsCertBytes.size()), boost::asio::ssl::context::pem);
}
if (tlsParams.tlsCAPath.size()) {
std::string cert = readFileBytes(tlsParams.tlsCAPath, FLOW_KNOBS->CERT_FILE_MAX_SIZE);
sslContext.add_certificate_authority(boost::asio::buffer(cert.data(), cert.size()));
}
if (tlsParams.tlsCABytes.size()) {
sslContext.add_certificate_authority(boost::asio::buffer(tlsParams.tlsCABytes.data(), tlsParams.tlsCABytes.size()));
}
if (tlsParams.tlsKeyPath.size()) {
sslContext.use_private_key_file(tlsParams.tlsKeyPath, boost::asio::ssl::context::pem);
}
if (tlsParams.tlsKeyBytes.size()) {
sslContext.use_private_key(boost::asio::buffer(tlsParams.tlsKeyBytes.data(), tlsParams.tlsKeyBytes.size()), boost::asio::ssl::context::pem);
}
#endif
// Set the global members // Set the global members
if(useMetrics) { if(useMetrics) {
setGlobal(INetwork::enTDMetrics, (flowGlobalType) &tdmetrics); setGlobal(INetwork::enTDMetrics, (flowGlobalType) &tdmetrics);
@ -879,8 +1280,13 @@ THREAD_HANDLE Net2::startThread( THREAD_FUNC_RETURN (*func) (void*), void *arg )
return ::startThread(func, arg); return ::startThread(func, arg);
} }
Future< Reference<IConnection> > Net2::connect( NetworkAddress toAddr, std::string host ) { Future< Reference<IConnection> > Net2::connect( NetworkAddress toAddr, std::string host ) {
#ifndef TLS_DISABLED
if ( toAddr.isTLS() ) {
return SSLConnection::connect(&this->reactor.ios, &this->sslContext, toAddr);
}
#endif
return Connection::connect(&this->reactor.ios, toAddr); return Connection::connect(&this->reactor.ios, toAddr);
} }
@ -954,6 +1360,11 @@ bool Net2::isAddressOnThisHost( NetworkAddress const& addr ) {
Reference<IListener> Net2::listen( NetworkAddress localAddr ) { Reference<IListener> Net2::listen( NetworkAddress localAddr ) {
try { try {
#ifndef TLS_DISABLED
if ( localAddr.isTLS() ) {
return Reference<IListener>(new SSLListener( reactor.ios, &this->sslContext, localAddr ));
}
#endif
return Reference<IListener>( new Listener( reactor.ios, localAddr ) ); return Reference<IListener>( new Listener( reactor.ios, localAddr ) );
} catch (boost::system::system_error const& e) { } catch (boost::system::system_error const& e) {
Error x; Error x;
@ -1048,13 +1459,13 @@ void ASIOReactor::wake() {
} // namespace net2 } // namespace net2
INetwork* newNet2(bool useThreadPool, bool useMetrics) { INetwork* newNet2(bool useThreadPool, bool useMetrics, Reference<TLSPolicy> policy, const TLSParams& tlsParams) {
try { try {
N2::g_net2 = new N2::Net2(useThreadPool, useMetrics); N2::g_net2 = new N2::Net2(useThreadPool, useMetrics, policy, tlsParams);
} }
catch(boost::system::system_error e) { catch(boost::system::system_error e) {
TraceEvent("Net2InitError").detail("Message", e.what()); TraceEvent("Net2InitError").detail("Message", e.what());
throw unknown_error(); throw;
} }
catch(std::exception const& e) { catch(std::exception const& e) {
TraceEvent("Net2InitError").detail("Message", e.what()); TraceEvent("Net2InitError").detail("Message", e.what());

535
flow/TLSPolicy.cpp Normal file
View File

@ -0,0 +1,535 @@
/*
* TLSPolicy.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "flow/TLSPolicy.h"
TLSPolicy::~TLSPolicy() {}
#ifndef TLS_DISABLED
#include <algorithm>
#include <cstring>
#include <exception>
#include <map>
#include <set>
#include <openssl/objects.h>
#include <openssl/bio.h>
#include <openssl/err.h>
#include <openssl/pem.h>
#include <openssl/x509.h>
#include <openssl/x509v3.h>
#include <openssl/x509_vfy.h>
#include <stdint.h>
#include <string>
#include <sstream>
#include <utility>
#include "flow/FastRef.h"
#include "flow/Trace.h"
std::string TLSPolicy::ErrorString(boost::system::error_code e) {
char* str = ERR_error_string(e.value(), NULL);
return std::string(str);
}
// To force typeinfo to only be emitted once.
std::string TLSPolicy::toString() const {
std::stringstream ss;
ss << "TLSPolicy{ Rules=[";
for (const auto &r : rules) {
ss << " " << r.toString() << ",";
}
ss << " ] }";
return ss.str();
}
std::string TLSPolicy::Rule::toString() const {
std::stringstream ss;
ss << "Rule{ verify_cert=" << verify_cert
<< ", verify_time=" << verify_time;
ss << ", Subject=[";
for (const auto& s : subject_criteria) {
ss << " { NID=" << s.first << ", Criteria=" << s.second.criteria << "},";
}
ss << " ], Issuer=[";
for (const auto& s : issuer_criteria) {
ss << " { NID=" << s.first << ", Criteria=" << s.second.criteria << "},";
}
ss << " ], Root=[";
for (const auto& s : root_criteria) {
ss << " { NID=" << s.first << ", Criteria=" << s.second.criteria << "},";
}
ss << " ] }";
return ss.str();
}
static int hexValue(char c) {
static char const digits[] = "0123456789ABCDEF";
if (c >= 'a' && c <= 'f')
c -= ('a' - 'A');
int value = std::find(digits, digits + 16, c) - digits;
if (value >= 16) {
throw std::runtime_error("hexValue");
}
return value;
}
// Does not handle "raw" form (e.g. #28C4D1), only escaped text
static std::string de4514(std::string const& input, int start, int& out_end) {
std::string output;
if(input[start] == '#' || input[start] == ' ') {
out_end = start;
return output;
}
int space_count = 0;
for(int p = start; p < input.size();) {
switch(input[p]) {
case '\\': // Handle escaped sequence
// Backslash escaping nothing!
if(p == input.size() - 1) {
out_end = p;
goto FIN;
}
switch(input[p+1]) {
case ' ':
case '"':
case '#':
case '+':
case ',':
case ';':
case '<':
case '=':
case '>':
case '|':
case '\\':
output += input[p+1];
p += 2;
space_count = 0;
continue;
default:
// Backslash escaping pair of hex digits requires two characters
if(p == input.size() - 2) {
out_end = p;
goto FIN;
}
try {
output += hexValue(input[p+1]) * 16 + hexValue(input[p+2]);
p += 3;
space_count = 0;
continue;
} catch( ... ) {
out_end = p;
goto FIN;
}
}
case '"':
case '+':
case ',':
case ';':
case '<':
case '>':
case 0:
// All of these must have been escaped
out_end = p;
goto FIN;
default:
// Character is what it is
output += input[p];
if(input[p] == ' ')
space_count++;
else
space_count = 0;
p++;
}
}
out_end = input.size();
FIN:
out_end -= space_count;
output.resize(output.size() - space_count);
return output;
}
static std::pair<std::string, std::string> splitPair(std::string const& input, char c) {
int p = input.find_first_of(c);
if(p == input.npos) {
throw std::runtime_error("splitPair");
}
return std::make_pair(input.substr(0, p), input.substr(p+1, input.size()));
}
static NID abbrevToNID(std::string const& sn) {
NID nid = NID_undef;
if (sn == "C" || sn == "CN" || sn == "L" || sn == "ST" || sn == "O" || sn == "OU" || sn == "UID" || sn == "DC" || sn == "subjectAltName")
nid = OBJ_sn2nid(sn.c_str());
if (nid == NID_undef)
throw std::runtime_error("abbrevToNID");
return nid;
}
static X509Location locationForNID(NID nid) {
const char* name = OBJ_nid2ln(nid);
if (name == NULL) {
throw std::runtime_error("locationForNID");
}
if (strncmp(name, "X509v3", 6) == 0) {
return X509Location::EXTENSION;
} else {
// It probably isn't true that all other NIDs live in the NAME, but it is for now...
return X509Location::NAME;
}
}
bool TLSPolicy::set_verify_peers(std::vector<std::string> verify_peers) {
for (int i = 0; i < verify_peers.size(); i++) {
try {
std::string& verifyString = verify_peers[i];
int start = 0;
while(start < verifyString.size()) {
int split = verifyString.find('|', start);
if(split == std::string::npos) {
break;
}
if(split == start || verifyString[split-1] != '\\') {
rules.emplace_back(verifyString.substr(start,split-start));
start = split+1;
}
}
rules.emplace_back(verifyString.substr(start));
} catch ( const std::runtime_error& e ) {
rules.clear();
std::string& verifyString = verify_peers[i];
TraceEvent(SevError, "FDBLibTLSVerifyPeersParseError").detail("Config", verifyString);
return false;
}
}
return true;
}
TLSPolicy::Rule::Rule(std::string input) {
int s = 0;
while (s < input.size()) {
int eq = input.find('=', s);
if (eq == input.npos)
throw std::runtime_error("parse_verify");
MatchType mt = MatchType::EXACT;
if (input[eq-1] == '>') mt = MatchType::PREFIX;
if (input[eq-1] == '<') mt = MatchType::SUFFIX;
std::string term = input.substr(s, eq - s - (mt == MatchType::EXACT ? 0 : 1));
if (term.find("Check.") == 0) {
if (eq + 2 > input.size())
throw std::runtime_error("parse_verify");
if (eq + 2 != input.size() && input[eq + 2] != ',')
throw std::runtime_error("parse_verify");
if (mt != MatchType::EXACT)
throw std::runtime_error("parse_verify: cannot prefix match Check");
bool* flag;
if (term == "Check.Valid")
flag = &verify_cert;
else if (term == "Check.Unexpired")
flag = &verify_time;
else
throw std::runtime_error("parse_verify");
if (input[eq + 1] == '0')
*flag = false;
else if (input[eq + 1] == '1')
*flag = true;
else
throw std::runtime_error("parse_verify");
s = eq + 3;
} else {
std::map< int, Criteria >* criteria = &subject_criteria;
if (term.find('.') != term.npos) {
auto scoped = splitPair(term, '.');
if (scoped.first == "S" || scoped.first == "Subject")
criteria = &subject_criteria;
else if (scoped.first == "I" || scoped.first == "Issuer")
criteria = &issuer_criteria;
else if (scoped.first == "R" || scoped.first == "Root")
criteria = &root_criteria;
else
throw std::runtime_error("parse_verify");
term = scoped.second;
}
int remain;
auto unesc = de4514(input, eq + 1, remain);
if (remain == eq + 1)
throw std::runtime_error("parse_verify");
NID termNID = abbrevToNID(term);
const X509Location loc = locationForNID(termNID);
criteria->insert(std::make_pair(termNID, Criteria(unesc, mt, loc)));
if (remain != input.size() && input[remain] != ',')
throw std::runtime_error("parse_verify");
s = remain + 1;
}
}
}
bool match_criteria_entry(const std::string& criteria, ASN1_STRING* entry, MatchType mt) {
bool rc = false;
ASN1_STRING* asn_criteria = NULL;
unsigned char* criteria_utf8 = NULL;
int criteria_utf8_len = 0;
unsigned char* entry_utf8 = NULL;
int entry_utf8_len = 0;
if ((asn_criteria = ASN1_IA5STRING_new()) == NULL)
goto err;
if (ASN1_STRING_set(asn_criteria, criteria.c_str(), criteria.size()) != 1)
goto err;
if ((criteria_utf8_len = ASN1_STRING_to_UTF8(&criteria_utf8, asn_criteria)) < 1)
goto err;
if ((entry_utf8_len = ASN1_STRING_to_UTF8(&entry_utf8, entry)) < 1)
goto err;
if (mt == MatchType::EXACT) {
if (criteria_utf8_len == entry_utf8_len &&
memcmp(criteria_utf8, entry_utf8, criteria_utf8_len) == 0)
rc = true;
} else if (mt == MatchType::PREFIX) {
if (criteria_utf8_len <= entry_utf8_len &&
memcmp(criteria_utf8, entry_utf8, criteria_utf8_len) == 0)
rc = true;
} else if (mt == MatchType::SUFFIX) {
if (criteria_utf8_len <= entry_utf8_len &&
memcmp(criteria_utf8, entry_utf8 + (entry_utf8_len - criteria_utf8_len), criteria_utf8_len) == 0)
rc = true;
}
err:
ASN1_STRING_free(asn_criteria);
free(criteria_utf8);
free(entry_utf8);
return rc;
}
bool match_name_criteria(X509_NAME *name, NID nid, const std::string& criteria, MatchType mt) {
X509_NAME_ENTRY *name_entry;
int idx;
// If name does not exist, or has multiple of this RDN, refuse to proceed.
if ((idx = X509_NAME_get_index_by_NID(name, nid, -1)) < 0)
return false;
if (X509_NAME_get_index_by_NID(name, nid, idx) != -1)
return false;
if ((name_entry = X509_NAME_get_entry(name, idx)) == NULL)
return false;
return match_criteria_entry(criteria, X509_NAME_ENTRY_get_data(name_entry), mt);
}
bool match_extension_criteria(X509 *cert, NID nid, const std::string& value, MatchType mt) {
if (nid != NID_subject_alt_name && nid != NID_issuer_alt_name) {
// I have no idea how other extensions work.
return false;
}
auto pos = value.find(':');
if (pos == value.npos) {
return false;
}
std::string value_gen = value.substr(0, pos);
std::string value_val = value.substr(pos+1, value.npos);
STACK_OF(GENERAL_NAME)* sans = reinterpret_cast<STACK_OF(GENERAL_NAME)*>(X509_get_ext_d2i(cert, nid, NULL, NULL));
if (sans == NULL) {
return false;
}
int num_sans = sk_GENERAL_NAME_num( sans );
bool rc = false;
for( int i = 0; i < num_sans && !rc; ++i ) {
GENERAL_NAME* altname = sk_GENERAL_NAME_value( sans, i );
std::string matchable;
switch (altname->type) {
case GEN_OTHERNAME:
break;
case GEN_EMAIL:
if (value_gen == "EMAIL" &&
match_criteria_entry( value_val, altname->d.rfc822Name, mt)) {
rc = true;
break;
}
case GEN_DNS:
if (value_gen == "DNS" &&
match_criteria_entry( value_val, altname->d.dNSName, mt )) {
rc = true;
break;
}
case GEN_X400:
case GEN_DIRNAME:
case GEN_EDIPARTY:
break;
case GEN_URI:
if (value_gen == "URI" &&
match_criteria_entry( value_val, altname->d.uniformResourceIdentifier, mt )) {
rc = true;
break;
}
case GEN_IPADD:
if (value_gen == "IP" &&
match_criteria_entry( value_val, altname->d.iPAddress, mt )) {
rc = true;
break;
}
case GEN_RID:
break;
}
}
sk_GENERAL_NAME_pop_free(sans, GENERAL_NAME_free);
return rc;
}
bool match_criteria(X509* cert, X509_NAME* subject, NID nid, const std::string& criteria, MatchType mt, X509Location loc) {
switch(loc) {
case X509Location::NAME: {
return match_name_criteria(subject, nid, criteria, mt);
}
case X509Location::EXTENSION: {
return match_extension_criteria(cert, nid, criteria, mt);
}
}
// Should never be reachable.
return false;
}
std::tuple<bool,std::string> check_verify(const TLSPolicy::Rule* verify, X509_STORE_CTX* store_ctx, bool is_client) {
X509_NAME *subject, *issuer;
bool rc = false;
X509* cert = NULL;
// if returning false, give a reason string
std::string reason = "";
// Check subject criteria.
cert = sk_X509_value(X509_STORE_CTX_get0_chain(store_ctx), 0);
if ((subject = X509_get_subject_name(cert)) == NULL) {
reason = "Cert subject error";
goto err;
}
for (auto &pair: verify->subject_criteria) {
if (!match_criteria(cert, subject, pair.first, pair.second.criteria, pair.second.match_type, pair.second.location)) {
reason = "Cert subject match failure";
goto err;
}
}
// Check issuer criteria.
if ((issuer = X509_get_issuer_name(cert)) == NULL) {
reason = "Cert issuer error";
goto err;
}
for (auto &pair: verify->issuer_criteria) {
if (!match_criteria(cert, issuer, pair.first, pair.second.criteria, pair.second.match_type, pair.second.location)) {
reason = "Cert issuer match failure";
goto err;
}
}
// Check root criteria - this is the subject of the final certificate in the stack.
cert = sk_X509_value(X509_STORE_CTX_get0_chain(store_ctx), sk_X509_num(X509_STORE_CTX_get0_chain(store_ctx)) - 1);
if ((subject = X509_get_subject_name(cert)) == NULL) {
reason = "Root subject error";
goto err;
}
for (auto &pair: verify->root_criteria) {
if (!match_criteria(cert, subject, pair.first, pair.second.criteria, pair.second.match_type, pair.second.location)) {
reason = "Root subject match failure";
goto err;
}
}
// If we got this far, everything checked out...
rc = true;
err:
return std::make_tuple(rc, reason);
}
bool TLSPolicy::verify_peer(bool preverified, X509_STORE_CTX* store_ctx) {
bool rc = false;
std::set<std::string> verify_failure_reasons;
bool verify_success;
std::string verify_failure_reason;
// If certificate verification is disabled, there's nothing more to do.
if (std::any_of(rules.begin(), rules.end(), [](const Rule& r){ return !r.verify_cert; })) {
return true;
}
if(!preverified) {
TraceEvent("TLSPolicyFailure").suppressFor(1.0).detail("Reason", "preverification failed").detail("VerifyError", X509_verify_cert_error_string(X509_STORE_CTX_get_error(store_ctx)));
return false;
}
if(!rules.size()) {
return true;
}
// Any matching rule is sufficient.
for (auto &verify_rule: rules) {
std::tie(verify_success, verify_failure_reason) = check_verify(&verify_rule, store_ctx, is_client);
if (verify_success) {
rc = true;
break;
} else {
if (verify_failure_reason.length() > 0)
verify_failure_reasons.insert(verify_failure_reason);
}
}
if (!rc) {
// log the various failure reasons
for (std::string reason : verify_failure_reasons) {
TraceEvent("TLSPolicyFailure").suppressFor(1.0).detail("Reason", reason);
}
}
return rc;
}
#endif

145
flow/TLSPolicy.h Normal file
View File

@ -0,0 +1,145 @@
/*
* TLSPolicy.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _FLOW_TLSPOLICY_H_
#define _FLOW_TLSPOLICY_H_
#pragma once
#include <map>
#include <string>
#include <vector>
#include <boost/system/system_error.hpp>
#include "flow/FastRef.h"
#ifndef TLS_DISABLED
#include <openssl/x509.h>
typedef int NID;
enum class MatchType {
EXACT,
PREFIX,
SUFFIX,
};
enum class X509Location {
// This NID is located within a X509_NAME
NAME,
// This NID is an X509 extension, and should be parsed accordingly
EXTENSION,
};
struct Criteria {
Criteria( const std::string& s )
: criteria(s), match_type(MatchType::EXACT), location(X509Location::NAME) {}
Criteria( const std::string& s, MatchType mt )
: criteria(s), match_type(mt), location(X509Location::NAME) {}
Criteria( const std::string& s, X509Location loc)
: criteria(s), match_type(MatchType::EXACT), location(loc) {}
Criteria( const std::string& s, MatchType mt, X509Location loc)
: criteria(s), match_type(mt), location(loc) {}
std::string criteria;
MatchType match_type;
X509Location location;
bool operator==(const Criteria& c) const {
return criteria == c.criteria && match_type == c.match_type && location == c.location;
}
};
#endif
struct TLSParams {
enum { OPT_TLS = 100000, OPT_TLS_PLUGIN, OPT_TLS_CERTIFICATES, OPT_TLS_KEY, OPT_TLS_VERIFY_PEERS, OPT_TLS_CA_FILE, OPT_TLS_PASSWORD };
std::string tlsCertPath, tlsKeyPath, tlsCAPath, tlsPassword;
std::string tlsCertBytes, tlsKeyBytes, tlsCABytes;
};
class TLSPolicy : ReferenceCounted<TLSPolicy> {
public:
enum class Is {
CLIENT,
SERVER
};
TLSPolicy(Is client) : is_client(client == Is::CLIENT) {}
virtual ~TLSPolicy();
virtual void addref() { ReferenceCounted<TLSPolicy>::addref(); }
virtual void delref() { ReferenceCounted<TLSPolicy>::delref(); }
#ifndef TLS_DISABLED
static std::string ErrorString(boost::system::error_code e);
bool set_verify_peers(std::vector<std::string> verify_peers);
bool verify_peer(bool preverified, X509_STORE_CTX* store_ctx);
std::string toString() const;
struct Rule {
explicit Rule(std::string input);
std::string toString() const;
std::map< NID, Criteria > subject_criteria;
std::map< NID, Criteria > issuer_criteria;
std::map< NID, Criteria > root_criteria;
bool verify_cert = true;
bool verify_time = true;
};
std::vector<Rule> rules;
#endif
bool is_client;
};
#define TLS_PLUGIN_FLAG "--tls_plugin"
#define TLS_CERTIFICATE_FILE_FLAG "--tls_certificate_file"
#define TLS_KEY_FILE_FLAG "--tls_key_file"
#define TLS_VERIFY_PEERS_FLAG "--tls_verify_peers"
#define TLS_CA_FILE_FLAG "--tls_ca_file"
#define TLS_PASSWORD_FLAG "--tls_password"
#define TLS_OPTION_FLAGS \
{ TLSParams::OPT_TLS_PLUGIN, TLS_PLUGIN_FLAG, SO_REQ_SEP }, \
{ TLSParams::OPT_TLS_CERTIFICATES, TLS_CERTIFICATE_FILE_FLAG, SO_REQ_SEP }, \
{ TLSParams::OPT_TLS_KEY, TLS_KEY_FILE_FLAG, SO_REQ_SEP }, \
{ TLSParams::OPT_TLS_VERIFY_PEERS, TLS_VERIFY_PEERS_FLAG, SO_REQ_SEP }, \
{ TLSParams::OPT_TLS_PASSWORD, TLS_PASSWORD_FLAG, SO_REQ_SEP }, \
{ TLSParams::OPT_TLS_CA_FILE, TLS_CA_FILE_FLAG, SO_REQ_SEP },
#define TLS_HELP \
" " TLS_CERTIFICATE_FILE_FLAG " CERTFILE\n" \
" The path of a file containing the TLS certificate and CA\n" \
" chain.\n" \
" " TLS_CA_FILE_FLAG " CERTAUTHFILE\n" \
" The path of a file containing the CA certificates chain.\n" \
" " TLS_KEY_FILE_FLAG " KEYFILE\n" \
" The path of a file containing the private key corresponding\n" \
" to the TLS certificate.\n" \
" " TLS_PASSWORD_FLAG " PASSCODE\n" \
" The passphrase of encrypted private key\n" \
" " TLS_VERIFY_PEERS_FLAG " CONSTRAINTS\n" \
" The constraints by which to validate TLS peers. The contents\n" \
" and format of CONSTRAINTS are plugin-specific.\n"
#endif

View File

@ -684,6 +684,50 @@ void removeTraceRole(std::string role) {
g_traceLog.removeRole(role); g_traceLog.removeRole(role);
} }
TraceEvent::TraceEvent() : initialized(true), enabled(false), logged(true) {}
TraceEvent::TraceEvent(TraceEvent &&ev) {
enabled = ev.enabled;
err = ev.err;
fields = std::move(ev.fields);
id = ev.id;
initialized = ev.initialized;
logged = ev.logged;
maxEventLength = ev.maxEventLength;
maxFieldLength = ev.maxFieldLength;
severity = ev.severity;
tmpEventMetric = ev.tmpEventMetric;
trackingKey = ev.trackingKey;
type = ev.type;
ev.initialized = true;
ev.enabled = false;
ev.logged = true;
ev.tmpEventMetric = nullptr;
}
TraceEvent& TraceEvent::operator=(TraceEvent &&ev) {
enabled = ev.enabled;
err = ev.err;
fields = std::move(ev.fields);
id = ev.id;
initialized = ev.initialized;
logged = ev.logged;
maxEventLength = ev.maxEventLength;
maxFieldLength = ev.maxFieldLength;
severity = ev.severity;
tmpEventMetric = ev.tmpEventMetric;
trackingKey = ev.trackingKey;
type = ev.type;
ev.initialized = true;
ev.enabled = false;
ev.logged = true;
ev.tmpEventMetric = nullptr;
return *this;
}
TraceEvent::TraceEvent( const char* type, UID id ) : id(id), type(type), severity(SevInfo), initialized(false), enabled(true), logged(false) { TraceEvent::TraceEvent( const char* type, UID id ) : id(id), type(type), severity(SevInfo), initialized(false), enabled(true), logged(false) {
g_trace_depth++; g_trace_depth++;
setMaxFieldLength(0); setMaxFieldLength(0);
@ -760,7 +804,9 @@ bool TraceEvent::init() {
} }
detail("Severity", int(severity)); detail("Severity", int(severity));
detailf("Time", "%.6f", getCurrentTime()); detail("Time", "0.000000");
timeIndex = fields.size() - 1;
detail("Type", type); detail("Type", type);
if(g_network && g_network->isSimulated()) { if(g_network && g_network->isSimulated()) {
NetworkAddress local = g_network->getLocalAddress(); NetworkAddress local = g_network->getLocalAddress();
@ -968,6 +1014,8 @@ void TraceEvent::log() {
init(); init();
try { try {
if (enabled) { if (enabled) {
fields.mutate(timeIndex).second = format("%.6f", TraceEvent::getCurrentTime());
if (this->severity == SevError) { if (this->severity == SevError) {
severity = SevInfo; severity = SevInfo;
backtrace(); backtrace();
@ -1181,6 +1229,10 @@ std::string TraceEventFields::getValue(std::string key) const {
} }
} }
TraceEventFields::Field& TraceEventFields::mutate(int index) {
return fields.at(index);
}
namespace { namespace {
void parseNumericValue(std::string const& s, double &outValue, bool permissive = false) { void parseNumericValue(std::string const& s, double &outValue, bool permissive = false) {
double d = 0; double d = 0;
@ -1306,6 +1358,9 @@ void TraceEventFields::validateFormat() const {
} }
std::string traceableStringToString(const char* value, size_t S) { std::string traceableStringToString(const char* value, size_t S) {
ASSERT_WE_THINK(S > 0 && value[S - 1] == '\0'); if(g_network) {
ASSERT_WE_THINK(S > 0 && value[S - 1] == '\0');
}
return std::string(value, S - 1); // Exclude trailing \0 byte return std::string(value, S - 1); // Exclude trailing \0 byte
} }

View File

@ -81,6 +81,8 @@ public:
int64_t getInt64(std::string key, bool permissive=false) const; int64_t getInt64(std::string key, bool permissive=false) const;
double getDouble(std::string key, bool permissive=false) const; double getDouble(std::string key, bool permissive=false) const;
Field &mutate(int index);
std::string toString() const; std::string toString() const;
void validateFormat() const; void validateFormat() const;
template<class Archiver> template<class Archiver>
@ -374,11 +376,15 @@ struct SpecialTraceMetricType
TRACE_METRIC_TYPE(double, double); TRACE_METRIC_TYPE(double, double);
struct TraceEvent { struct TraceEvent {
TraceEvent();
TraceEvent( const char* type, UID id = UID() ); // Assumes SevInfo severity TraceEvent( const char* type, UID id = UID() ); // Assumes SevInfo severity
TraceEvent( Severity, const char* type, UID id = UID() ); TraceEvent( Severity, const char* type, UID id = UID() );
TraceEvent( struct TraceInterval&, UID id = UID() ); TraceEvent( struct TraceInterval&, UID id = UID() );
TraceEvent( Severity severity, struct TraceInterval& interval, UID id = UID() ); TraceEvent( Severity severity, struct TraceInterval& interval, UID id = UID() );
TraceEvent( TraceEvent &&ev );
TraceEvent& operator=( TraceEvent &&ev );
static void setNetworkThread(); static void setNetworkThread();
static bool isNetworkThread(); static bool isNetworkThread();
@ -490,6 +496,7 @@ private:
int maxFieldLength; int maxFieldLength;
int maxEventLength; int maxEventLength;
int timeIndex;
void setSizeLimits(); void setSizeLimits();

View File

@ -51,6 +51,7 @@
<ClCompile Include="version.cpp" /> <ClCompile Include="version.cpp" />
<ClCompile Include="SignalSafeUnwind.cpp" /> <ClCompile Include="SignalSafeUnwind.cpp" />
<ClCompile Include="serialize.cpp" /> <ClCompile Include="serialize.cpp" />
<ClCompile Include="TLSPolicy.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="CompressedInt.h" /> <ClInclude Include="CompressedInt.h" />
@ -95,6 +96,7 @@
<ClInclude Include="Platform.h" /> <ClInclude Include="Platform.h" />
<ClInclude Include="ThreadSafeQueue.h" /> <ClInclude Include="ThreadSafeQueue.h" />
<ClInclude Include="Trace.h" /> <ClInclude Include="Trace.h" />
<ClInclude Include="TLSPolicy.h" />
<ClInclude Include="SignalSafeUnwind.h" /> <ClInclude Include="SignalSafeUnwind.h" />
<ClInclude Include="UnitTest.h" /> <ClInclude Include="UnitTest.h" />
<ActorCompiler Include="ThreadHelper.actor.h"> <ActorCompiler Include="ThreadHelper.actor.h">

View File

@ -129,3 +129,12 @@ ACTOR Future<Void> returnIfTrue( Future<bool> f )
wait( Never() ); wait( Never() );
throw internal_error(); throw internal_error();
} }
ACTOR Future<Void> lowPriorityDelay( double waitTime ) {
state int loopCount = 0;
while(loopCount < FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT) {
wait(delay(waitTime/FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT, TaskPriority::Low));
loopCount++;
}
return Void();
}

View File

@ -32,6 +32,7 @@
#include "flow/flow.h" #include "flow/flow.h"
#include "flow/Knobs.h" #include "flow/Knobs.h"
#include "flow/Util.h" #include "flow/Util.h"
#include "flow/IndexedSet.h"
#include "flow/actorcompiler.h" // This must be the last #include. #include "flow/actorcompiler.h" // This must be the last #include.
#pragma warning( disable: 4355 ) // 'this' : used in base member initializer list #pragma warning( disable: 4355 ) // 'this' : used in base member initializer list
@ -813,6 +814,7 @@ Future<Void> anyTrue( std::vector<Reference<AsyncVar<bool>>> const& input, Refer
Future<Void> cancelOnly( std::vector<Future<Void>> const& futures ); Future<Void> cancelOnly( std::vector<Future<Void>> const& futures );
Future<Void> timeoutWarningCollector( FutureStream<Void> const& input, double const& logDelay, const char* const& context, UID const& id ); Future<Void> timeoutWarningCollector( FutureStream<Void> const& input, double const& logDelay, const char* const& context, UID const& id );
Future<bool> quorumEqualsTrue( std::vector<Future<bool>> const& futures, int const& required ); Future<bool> quorumEqualsTrue( std::vector<Future<bool>> const& futures, int const& required );
Future<Void> lowPriorityDelay( double const& waitTime );
ACTOR template <class T> ACTOR template <class T>
Future<Void> streamHelper( PromiseStream<T> output, PromiseStream<Error> errors, Future<T> input ) { Future<Void> streamHelper( PromiseStream<T> output, PromiseStream<Error> errors, Future<T> input ) {
@ -1297,6 +1299,110 @@ private:
} }
}; };
struct NotifiedInt {
NotifiedInt( int64_t val = 0 ) : val(val) {}
Future<Void> whenAtLeast( int64_t limit ) {
if (val >= limit)
return Void();
Promise<Void> p;
waiting.push( std::make_pair(limit,p) );
return p.getFuture();
}
int64_t get() const { return val; }
void set( int64_t v ) {
ASSERT( v >= val );
if (v != val) {
val = v;
std::vector<Promise<Void>> toSend;
while ( waiting.size() && v >= waiting.top().first ) {
Promise<Void> p = std::move(waiting.top().second);
waiting.pop();
toSend.push_back(p);
}
for(auto& p : toSend) {
p.send(Void());
}
}
}
void operator=( int64_t v ) {
set( v );
}
NotifiedInt(NotifiedInt&& r) BOOST_NOEXCEPT : waiting(std::move(r.waiting)), val(r.val) {}
void operator=(NotifiedInt&& r) BOOST_NOEXCEPT { waiting = std::move(r.waiting); val = r.val; }
private:
typedef std::pair<int64_t,Promise<Void>> Item;
struct ItemCompare {
bool operator()(const Item& a, const Item& b) { return a.first > b.first; }
};
std::priority_queue<Item, std::vector<Item>, ItemCompare> waiting;
int64_t val;
};
struct BoundedFlowLock : NonCopyable, public ReferenceCounted<BoundedFlowLock> {
// BoundedFlowLock is different from a FlowLock in that it has a bound on how many locks can be taken from the oldest outstanding lock.
// For instance, with a FlowLock that has two permits, if one permit is taken but never released, the other permit can be reused an unlimited
// amount of times, but with a BoundedFlowLock, it can only be reused a fixed number of times.
struct Releaser : NonCopyable {
BoundedFlowLock* lock;
int64_t permitNumber;
Releaser() : lock(nullptr), permitNumber(0) {}
Releaser( BoundedFlowLock* lock, int64_t permitNumber ) : lock(lock), permitNumber(permitNumber) {}
Releaser(Releaser&& r) BOOST_NOEXCEPT : lock(r.lock), permitNumber(r.permitNumber) { r.permitNumber = 0; }
void operator=(Releaser&& r) { if (permitNumber) lock->release(permitNumber); lock = r.lock; permitNumber = r.permitNumber; r.permitNumber = 0; }
void release() {
if (permitNumber) {
lock->release(permitNumber);
}
permitNumber = 0;
}
~Releaser() { if (permitNumber) lock->release(permitNumber); }
};
BoundedFlowLock() : unrestrictedPermits(1), boundedPermits(0), nextPermitNumber(0), minOutstanding(0) {}
explicit BoundedFlowLock(int64_t unrestrictedPermits, int64_t boundedPermits) : unrestrictedPermits(unrestrictedPermits), boundedPermits(boundedPermits), nextPermitNumber(0), minOutstanding(0) {}
Future<int64_t> take() {
return takeActor(this);
}
void release( int64_t permitNumber ) {
outstanding.erase(permitNumber);
updateMinOutstanding();
}
private:
IndexedSet<int64_t, int64_t> outstanding;
NotifiedInt minOutstanding;
int64_t nextPermitNumber;
const int64_t unrestrictedPermits;
const int64_t boundedPermits;
void updateMinOutstanding() {
auto it = outstanding.index(unrestrictedPermits-1);
if(it == outstanding.end()) {
minOutstanding.set(nextPermitNumber);
} else {
minOutstanding.set(*it);
}
}
ACTOR static Future<int64_t> takeActor(BoundedFlowLock* lock) {
state int64_t permitNumber = ++lock->nextPermitNumber;
lock->outstanding.insert(permitNumber, 1);
lock->updateMinOutstanding();
wait( lock->minOutstanding.whenAtLeast(std::max<int64_t>(0, permitNumber - lock->boundedPermits)) );
return permitNumber;
}
};
ACTOR template <class T> ACTOR template <class T>
Future<Void> yieldPromiseStream( FutureStream<T> input, PromiseStream<T> output, TaskPriority taskID = TaskPriority::DefaultYield ) { Future<Void> yieldPromiseStream( FutureStream<T> input, PromiseStream<T> output, TaskPriority taskID = TaskPriority::DefaultYield ) {
loop { loop {

Some files were not shown because too many files have changed in this diff Show More