Merge branch 'master' into mengxu/storage-engine-switch-PR-v2

This commit is contained in:
Meng Xu 2019-09-03 14:11:33 -07:00
commit bd80a67d46
66 changed files with 1267 additions and 949 deletions

View File

@ -18,7 +18,7 @@
# limitations under the License.
cmake_minimum_required(VERSION 3.13)
project(foundationdb
VERSION 6.1.0
VERSION 7.0.0
DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions."
HOMEPAGE_URL "http://www.foundationdb.org/"
LANGUAGES C CXX ASM)

View File

@ -1,21 +1,44 @@
#!/bin/bash
set -e
OPTIONS=''
# Get compiler version and major version
COMPILER_VER=$("${CC}" -dumpversion)
COMPILER_MAJVER="${COMPILER_VER%%\.*}"
# Add linker, if specified and valid
# The linker to use for building:
# can be LD (system default, default choice), GOLD, LLD, or BFD
if [ -n "${USE_LD}" ] && \
(([[ "${CC}" == *"gcc"* ]] && [ "${COMPILER_MAJVER}" -ge 9 ]) || \
([[ "${CXX}" == *"clang++"* ]] && [ "${COMPILER_MAJVER}" -ge 4 ]) )
then
if [ "${PLATFORM}" == "linux" ]; then
if [ "${USE_LD}" == "BFD" ]; then
OPTIONS+='-fuse-ld=bfd -Wl,--disable-new-dtags'
elif [ "${USE_LD}" == "GOLD" ]; then
OPTIONS+='-fuse-ld=gold -Wl,--disable-new-dtags'
elif [ "${USE_LD}" == "LLD" ]; then
OPTIONS+='-fuse-ld=lld -Wl,--disable-new-dtags'
elif [ "${USE_LD}" != "DEFAULT" ] && [ "${USE_LD}" != "LD" ]; then
echo 'USE_LD must be set to DEFAULT, LD, BFD, GOLD, or LLD!'
exit 1
fi
fi
fi
case $1 in
Application | DynamicLibrary)
echo "Linking $3"
if [ "$1" = "DynamicLibrary" ]; then
OPTIONS="-shared"
if [ "$PLATFORM" = "linux" ]; then
OPTIONS="$OPTIONS -Wl,-z,noexecstack -Wl,-soname,$( basename $3 )"
fi
if [ "$PLATFORM" = "osx" ]; then
OPTIONS="$OPTIONS -Wl,-dylib_install_name -Wl,$( basename $3 )"
fi
else
OPTIONS=
OPTIONS+=" -shared"
if [ "$PLATFORM" = "linux" ]; then
OPTIONS+=" -Wl,-z,noexecstack -Wl,-soname,$( basename $3 )"
elif [ "$PLATFORM" = "osx" ]; then
OPTIONS+=" -Wl,-dylib_install_name -Wl,$( basename $3 )"
fi
fi
OPTIONS=$( eval echo "$OPTIONS $LDFLAGS \$$2_OBJECTS \$$2_LIBS \$$2_STATIC_LIBS_REAL \$$2_LDFLAGS -o $3" )

View File

@ -5,7 +5,7 @@ set(ALLOC_INSTRUMENTATION OFF CACHE BOOL "Instrument alloc")
set(WITH_UNDODB OFF CACHE BOOL "Use rr or undodb")
set(USE_ASAN OFF CACHE BOOL "Compile with address sanitizer")
set(FDB_RELEASE OFF CACHE BOOL "This is a building of a final release")
set(USE_LD "LD" CACHE STRING "The linker to use for building: can be LD (system default, default choice), GOLD, or LLD")
set(USE_LD "DEFAULT" CACHE STRING "The linker to use for building: can be LD (system default, default choice), BFD, GOLD, or LLD")
set(USE_LIBCXX OFF CACHE BOOL "Use libc++")
set(USE_CCACHE OFF CACHE BOOL "Use ccache for compilation if available")
set(RELATIVE_DEBUG_PATHS OFF CACHE BOOL "Use relative file paths in debug info")
@ -90,9 +90,23 @@ else()
set(GCC YES)
endif()
# Use the linker environmental variable, if specified and valid
if ((USE_LD STREQUAL "DEFAULT") AND (NOT "$ENV{USE_LD}" STREQUAL ""))
string(TOUPPER "$ENV{USE_LD}" USE_LDENV)
if (("${USE_LDENV}" STREQUAL "LD") OR ("${USE_LDENV}" STREQUAL "GOLD") OR ("${USE_LDENV}" STREQUAL "LLD") OR ("${USE_LDENV}" STREQUAL "BFD") OR ("${USE_LDENV}" STREQUAL "DEFAULT"))
set(USE_LD "${USE_LDENV}")
else()
message (FATAL_ERROR "USE_LD must be set to DEFAULT, LD, BFD, GOLD, or LLD!")
endif()
endif()
# check linker flags.
if ((NOT (USE_LD STREQUAL "LD")) AND (NOT (USE_LD STREQUAL "GOLD")) AND (NOT (USE_LD STREQUAL "LLD")))
message (FATAL_ERROR "USE_LD must be set to LD, GOLD, or LLD!")
if (USE_LD STREQUAL "DEFAULT")
set(USE_LD "LD")
else()
if ((NOT (USE_LD STREQUAL "LD")) AND (NOT (USE_LD STREQUAL "GOLD")) AND (NOT (USE_LD STREQUAL "LLD")) AND (NOT (USE_LD STREQUAL "BFD")))
message (FATAL_ERROR "USE_LD must be set to DEFAULT, LD, BFD, GOLD, or LLD!")
endif()
endif()
# if USE_LD=LD, then we don't do anything, defaulting to whatever system
@ -100,6 +114,11 @@ else()
# implies the default xcode linker, and other distros may choose others by
# default).
if(USE_LD STREQUAL "BFD")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=bfd -Wl,--disable-new-dtags")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=bfd -Wl,--disable-new-dtags")
endif()
if(USE_LD STREQUAL "GOLD")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold -Wl,--disable-new-dtags")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold -Wl,--disable-new-dtags")

View File

@ -199,14 +199,12 @@ The ``foundationdb.conf`` file contains several sections, detailed below. Note t
## foundationdb.conf
##
## Configuration file for FoundationDB server processes
## Full documentation is available in the FoundationDB Administration document.
[fdbmonitor]
restart_delay = 60
user = foundationdb
group = foundationdb
Contains basic configuration parameters of the ``fdbmonitor`` process. ``restart_delay`` specifies the number of seconds that ``fdbmonitor`` waits before restarting a failed process. ``user`` and ``group`` are used on Linux systems to control the privilege level of child processes.
Contains basic configuration parameters of the ``fdbmonitor`` process. ``user`` and ``group`` are used on Linux systems to control the privilege level of child processes.
``[general]`` section
-----------------------
@ -215,8 +213,41 @@ Contains basic configuration parameters of the ``fdbmonitor`` process. ``restart
[general]
cluster_file = /etc/foundationdb/fdb.cluster
restart_delay = 60
## restart_backoff and restart_delay_reset_interval default to the value that is used for restart_delay
# initial_restart_delay = 0
# restart_backoff = 60.0
# restart_delay_reset_interval = 60
# delete_envvars =
# kill_on_configuration_change = true
# disable_lifecycle_logging = false
Contains settings applicable to all processes (e.g. fdbserver, backup_agent). The main setting of interest is ``cluster_file``, which specifies the location of the cluster file. This file and the directory that contains it must be writable by all processes (i.e. by the user or group set in the [fdbmonitor] section).
Contains settings applicable to all processes (e.g. fdbserver, backup_agent).
* ``cluster_file``: Specifies the location of the cluster file. This file and the directory that contains it must be writable by all processes (i.e. by the user or group set in the ``[fdbmonitor]`` section).
* ``delete_envvars``: A space separated list of environment variables to remove from the environments of child processes. This can be used if the ``fdbmonitor`` process needs to be run with environment variables that are undesired in its children.
* ``kill_on_configuration_change``: If ``true``, affected processes will be restarted whenever the configuration file changes. Defaults to ``true``.
* ``disable_lifecycle_logging``: If ``true``, ``fdbmonitor`` will not write log events when processes start or terminate. Defaults to ``false``.
The ``[general]`` section also contains some parameters to control how processes are restarted when they die. ``fdbmonitor`` uses backoff logic to prevent a process that dies repeatedly from cycling too quickly, and it also introduces up to +/-10% random jitter into the delay to avoid multiple processes all restarting simultaneously. ``fdbmonitor`` tracks separate backoff state for each process, so the restarting of one process will have no effect on the backoff behavior of another.
* ``restart_delay``: The maximum number of seconds (subject to jitter) that fdbmonitor will delay before restarting a failed process.
* ``initial_restart_delay``: The number of seconds ``fdbmonitor`` waits to restart a process the first time it dies. Defaults to 0 (i.e. the process gets restarted immediately).
* ``restart_backoff``: Controls how quickly ``fdbmonitor`` backs off when a process dies repeatedly. The previous delay (or 1, if the previous delay is 0) is multiplied by ``restart_backoff`` to get the next delay, maxing out at the value of ``restart_delay``. Defaults to the value of ``restart_delay``, meaning that the second and subsequent failures will all delay ``restart_delay`` between restarts.
* ``restart_delay_reset_interval``: The number of seconds a process must be running before resetting the backoff back to the value of ``initial_restart_delay``. Defaults to the value of ``restart_delay``.
As an example, let's say the following parameters have been set:
.. code-block:: ini
restart_delay = 60
initial_restart_delay = 0
restart_backoff = 2.0
restart_delay_reset_interval = 180
The progression of delays for a process that fails repeatedly would be ``0, 2, 4, 8, 16, 32, 60, 60, ...``, each subject to a 10% random jitter. After the process stays alive for 180 seconds, the backoff would reset and the next failure would restart the process immediately.
Using the default parameters, a process will restart immediately if it fails and then delay ``restart_delay`` seconds if it fails again within ``restart_delay`` seconds.
.. _foundationdb-conf-fdbserver:

View File

@ -10,38 +10,38 @@ macOS
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
* `FoundationDB-6.2.2.pkg <https://www.foundationdb.org/downloads/6.2.2/macOS/installers/FoundationDB-6.2.2.pkg>`_
* `FoundationDB-6.2.3.pkg <https://www.foundationdb.org/downloads/6.2.3/macOS/installers/FoundationDB-6.2.3.pkg>`_
Ubuntu
------
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
* `foundationdb-clients-6.2.2-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.2/ubuntu/installers/foundationdb-clients_6.2.2-1_amd64.deb>`_
* `foundationdb-server-6.2.2-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.2/ubuntu/installers/foundationdb-server_6.2.2-1_amd64.deb>`_ (depends on the clients package)
* `foundationdb-clients-6.2.3-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.3/ubuntu/installers/foundationdb-clients_6.2.3-1_amd64.deb>`_
* `foundationdb-server-6.2.3-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.3/ubuntu/installers/foundationdb-server_6.2.3-1_amd64.deb>`_ (depends on the clients package)
RHEL/CentOS EL6
---------------
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
* `foundationdb-clients-6.2.2-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.2/rhel6/installers/foundationdb-clients-6.2.2-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.2.2-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.2/rhel6/installers/foundationdb-server-6.2.2-1.el6.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.2.3-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.3/rhel6/installers/foundationdb-clients-6.2.3-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.2.3-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.3/rhel6/installers/foundationdb-server-6.2.3-1.el6.x86_64.rpm>`_ (depends on the clients package)
RHEL/CentOS EL7
---------------
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
* `foundationdb-clients-6.2.2-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.2/rhel7/installers/foundationdb-clients-6.2.2-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.2.2-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.2/rhel7/installers/foundationdb-server-6.2.2-1.el7.x86_64.rpm>`_ (depends on the clients package)
* `foundationdb-clients-6.2.3-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.3/rhel7/installers/foundationdb-clients-6.2.3-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.2.3-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.3/rhel7/installers/foundationdb-server-6.2.3-1.el7.x86_64.rpm>`_ (depends on the clients package)
Windows
-------
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
* `foundationdb-6.2.2-x64.msi <https://www.foundationdb.org/downloads/6.2.2/windows/installers/foundationdb-6.2.2-x64.msi>`_
* `foundationdb-6.2.3-x64.msi <https://www.foundationdb.org/downloads/6.2.3/windows/installers/foundationdb-6.2.3-x64.msi>`_
API Language Bindings
=====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package:
* `foundationdb-6.2.2.tar.gz <https://www.foundationdb.org/downloads/6.2.2/bindings/python/foundationdb-6.2.2.tar.gz>`_
* `foundationdb-6.2.3.tar.gz <https://www.foundationdb.org/downloads/6.2.3/bindings/python/foundationdb-6.2.3.tar.gz>`_
Ruby 1.9.3/2.0.0+
-----------------
* `fdb-6.2.2.gem <https://www.foundationdb.org/downloads/6.2.2/bindings/ruby/fdb-6.2.2.gem>`_
* `fdb-6.2.3.gem <https://www.foundationdb.org/downloads/6.2.3/bindings/ruby/fdb-6.2.3.gem>`_
Java 8+
-------
* `fdb-java-6.2.2.jar <https://www.foundationdb.org/downloads/6.2.2/bindings/java/fdb-java-6.2.2.jar>`_
* `fdb-java-6.2.2-javadoc.jar <https://www.foundationdb.org/downloads/6.2.2/bindings/java/fdb-java-6.2.2-javadoc.jar>`_
* `fdb-java-6.2.3.jar <https://www.foundationdb.org/downloads/6.2.3/bindings/java/fdb-java-6.2.3.jar>`_
* `fdb-java-6.2.3-javadoc.jar <https://www.foundationdb.org/downloads/6.2.3/bindings/java/fdb-java-6.2.3-javadoc.jar>`_
Go 1.11+
--------

View File

@ -66,7 +66,8 @@
"cluster_controller",
"data_distributor",
"ratekeeper",
"router"
"router",
"coordinator"
]
},
"data_version":12341234,
@ -264,7 +265,23 @@
"limiting_queue_bytes_storage_server":0,
"worst_queue_bytes_storage_server":0,
"limiting_version_lag_storage_server":0,
"worst_version_lag_storage_server":0
"worst_version_lag_storage_server":0,
"limiting_data_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"worst_data_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"limiting_durability_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"worst_durability_lag_storage_server":{
"versions":0,
"seconds":0.0
}
},
"incompatible_connections":[
],
@ -460,6 +477,9 @@
"full_replication":true,
"maintenance_zone":"0ccb4e0fdbdb5583010f6b77d9d10ece",
"maintenance_seconds_remaining":1.0,
"data_distribution_disabled_for_ss_failures":true,
"data_distribution_disabled_for_rebalance":true,
"data_distribution_disabled":true,
"configuration":{
"log_anti_quorum":0,
"log_replicas":2,

View File

@ -2,7 +2,7 @@
Release Notes
#############
6.2.2
6.2.3
=====
Performance
@ -25,6 +25,7 @@ Performance
* Made the storage cache eviction policy configurable, and added an LRU policy. `(PR #1506) <https://github.com/apple/foundationdb/pull/1506>`_.
* Improved the speed of recoveries on large clusters at ``log_version >= 4``. `(PR #1729) <https://github.com/apple/foundationdb/pull/1729>`_.
* Log routers will prefer to peek from satellites at ``log_version >= 4``. `(PR #1795) <https://github.com/apple/foundationdb/pull/1795>`_.
* In clusters using a region configuration, clients will read from the remote region if all of the servers in the primary region are overloaded. [6.2.3] `(PR #2019) <https://github.com/apple/foundationdb/pull/2019>`_.
Fixes
-----
@ -40,6 +41,10 @@ Fixes
* In very rare scenarios, master recovery would restart because system metadata was loaded incorrectly. `(PR #1919) <https://github.com/apple/foundationdb/pull/1919>`_.
* Ratekeeper will aggressively throttle when unable to fetch the list of storage servers for a considerable period of time. `(PR #1858) <https://github.com/apple/foundationdb/pull/1858>`_.
* Proxies could become overloaded when all storage servers on a team fail. [6.2.1] `(PR #1976) <https://github.com/apple/foundationdb/pull/1976>`_.
* Proxies could start too few transactions if they didn't receive get read version requests frequently enough. [6.2.3] `(PR #1999) <https://github.com/apple/foundationdb/pull/1999>`_.
* The ``fileconfigure`` command in ``fdbcli`` could fail with an unknown error if the file did not contain a valid JSON object. `(PR #2017) <https://github.com/apple/foundationdb/pull/2017>`_.
* Configuring regions would fail with an internal error if the cluster contained storage servers that didn't set a datacenter ID. `(PR #2017) <https://github.com/apple/foundationdb/pull/2017>`_.
* Clients no longer prefer reading from servers with the same zone ID, because it could create hot shards. [6.2.3] `(PR #2019) <https://github.com/apple/foundationdb/pull/2019>`_.
Status
------
@ -54,6 +59,9 @@ Status
* ``connected_clients`` is now only a sample of the connected clients, rather than a complete list. `(PR #1902) <https://github.com/apple/foundationdb/pull/1902>`_.
* Added ``max_protocol_clients`` to the ``supported_versions`` section, which provides a sample of connected clients which cannot connect to any higher protocol version. `(PR #1902) <https://github.com/apple/foundationdb/pull/1902>`_.
* Clients which connect without specifying their supported versions are tracked as an ``Unknown`` version in the ``supported_versions`` section. [6.2.2] `(PR #1990) <https://github.com/apple/foundationdb/pull/1990>`_.
* Add ``coordinator`` to the list of roles that can be reported for a process. [6.2.3] `(PR #2006) <https://github.com/apple/foundationdb/pull/2006>`_.
* Added ``worst_durability_lag_storage_server`` and ``limiting_durability_lag_storage_server`` to the ``cluster.qos`` section, each with subfields ``versions`` and ``seconds``. These report the durability lag values being used by ratekeeper to potentially limit the transaction rate. [6.2.3] `(PR #2003) <https://github.com/apple/foundationdb/pull/2003>`_.
* Added ``worst_data_lag_storage_server`` and ``limiting_data_lag_storage_server`` to the ``cluster.qos`` section, each with subfields ``versions`` and ``seconds``. These are meant to replace ``worst_version_lag_storage_server`` and ``limiting_version_lag_storage_server``, which are now deprecated. [6.2.3] `(PR #2003) <https://github.com/apple/foundationdb/pull/2003>`_.
Bindings
--------
@ -92,6 +100,10 @@ Fixes only impacting 6.2.0+
* Clients could crash when closing connections with incompatible servers. [6.2.1] `(PR #1976) <https://github.com/apple/foundationdb/pull/1976>`_.
* Do not close idle network connections with incompatible servers. [6.2.1] `(PR #1976) <https://github.com/apple/foundationdb/pull/1976>`_.
* In status, ``max_protocol_clients`` were incorrectly added to the ``connected_clients`` list. [6.2.2] `(PR #1990) <https://github.com/apple/foundationdb/pull/1990>`_.
* Ratekeeper ignores the (default 5 second) MVCC window when controlling on durability lag. [6.2.3] `(PR #2012) <https://github.com/apple/foundationdb/pull/2012>`_.
* The macOS client was not compatible with a Linux server. [6.2.3] `(PR #2045) <https://github.com/apple/foundationdb/pull/2045>`_.
* Incompatible clients would continually reconnect with coordinators. [6.2.3] `(PR #2048) <https://github.com/apple/foundationdb/pull/2048>`_.
* Connections were being closed as idle when there were still unreliable requests waiting for a response. [6.2.3] `(PR #2048) <https://github.com/apple/foundationdb/pull/2048>`_.
Earlier release notes
---------------------

View File

@ -586,7 +586,6 @@ CSimpleOpt::SOption g_rgDBAgentOptions[] = {
#ifdef _WIN32
{ OPT_PARENTPID, "--parentpid", SO_REQ_SEP },
#endif
{ OPT_TRACE_LOG_GROUP, "--loggroup", SO_REQ_SEP },
{ OPT_SOURCE_CLUSTER, "-s", SO_REQ_SEP },
{ OPT_SOURCE_CLUSTER, "--source", SO_REQ_SEP },
{ OPT_DEST_CLUSTER, "-d", SO_REQ_SEP },
@ -826,6 +825,9 @@ static void printAgentUsage(bool devhelp) {
" --logdir PATH Specifes the output directory for trace files. If\n"
" unspecified, defaults to the current directory. Has\n"
" no effect unless --log is specified.\n");
printf(" --loggroup LOG_GROUP\n"
" Sets the LogGroup field with the specified value for all\n"
" events in the trace output (defaults to `default').\n");
printf(" --trace_format FORMAT\n"
" Select the format of the trace files. xml (the default) and json are supported.\n"
" Has no effect unless --log is specified.\n");
@ -912,6 +914,9 @@ static void printBackupUsage(bool devhelp) {
" --logdir PATH Specifes the output directory for trace files. If\n"
" unspecified, defaults to the current directory. Has\n"
" no effect unless --log is specified.\n");
printf(" --loggroup LOG_GROUP\n"
" Sets the LogGroup field with the specified value for all\n"
" events in the trace output (defaults to `default').\n");
printf(" --trace_format FORMAT\n"
" Select the format of the trace files. xml (the default) and json are supported.\n"
" Has no effect unless --log is specified.\n");
@ -970,6 +975,9 @@ static void printRestoreUsage(bool devhelp ) {
" --logdir PATH Specifes the output directory for trace files. If\n"
" unspecified, defaults to the current directory. Has\n"
" no effect unless --log is specified.\n");
printf(" --loggroup LOG_GROUP\n"
" Sets the LogGroup field with the specified value for all\n"
" events in the trace output (defaults to `default').\n");
printf(" --trace_format FORMAT\n"
" Select the format of the trace files. xml (the default) and json are supported.\n"
" Has no effect unless --log is specified.\n");
@ -1015,6 +1023,9 @@ static void printDBAgentUsage(bool devhelp) {
" --logdir PATH Specifes the output directory for trace files. If\n"
" unspecified, defaults to the current directory. Has\n"
" no effect unless --log is specified.\n");
printf(" --loggroup LOG_GROUP\n"
" Sets the LogGroup field with the specified value for all\n"
" events in the trace output (defaults to `default').\n");
printf(" --trace_format FORMAT\n"
" Select the format of the trace files. xml (the default) and json are supported.\n"
" Has no effect unless --log is specified.\n");
@ -1062,6 +1073,9 @@ static void printDBBackupUsage(bool devhelp) {
" --logdir PATH Specifes the output directory for trace files. If\n"
" unspecified, defaults to the current directory. Has\n"
" no effect unless --log is specified.\n");
printf(" --loggroup LOG_GROUP\n"
" Sets the LogGroup field with the specified value for all\n"
" events in the trace output (defaults to `default').\n");
printf(" --trace_format FORMAT\n"
" Select the format of the trace files. xml (the default) and json are supported.\n"
" Has no effect unless --log is specified.\n");

View File

@ -498,6 +498,10 @@ void initHelp() {
helpMap["quit"] = CommandHelp();
helpMap["waitconnected"] = CommandHelp();
helpMap["waitopen"] = CommandHelp();
helpMap["sleep"] = CommandHelp(
"sleep <SECONDS>",
"sleep for a period of time",
"");
helpMap["get"] = CommandHelp(
"get <KEY>",
"fetch the value for a given key",
@ -1493,6 +1497,17 @@ void printStatus(StatusObjectReader statusObj, StatusClient::StatusLevel level,
outputString += "\n\nWARNING: A single process is both a transaction log and a storage server.\n For best performance use dedicated disks for the transaction logs by setting process classes.";
}
if (statusObjCluster.has("data_distribution_disabled")) {
outputString += "\n\nWARNING: Data distribution is off.";
} else {
if (statusObjCluster.has("data_distribution_disabled_for_ss_failures")) {
outputString += "\n\nWARNING: Data distribution is currently turned on but disabled for all storage server failures.";
}
if (statusObjCluster.has("data_distribution_disabled_for_rebalance")) {
outputString += "\n\nWARNING: Data distribution is currently turned on but shard size balancing is currently disabled.";
}
}
printf("%s\n", outputString.c_str());
}
@ -1777,6 +1792,10 @@ ACTOR Future<bool> fileConfigure(Database db, std::string filePath, bool isNewDa
printf("ERROR: Invalid JSON\n");
return true;
}
if(config.type() != json_spirit::obj_type) {
printf("ERROR: Configuration file must contain a JSON object\n");
return true;
}
StatusObject configJSON = config.get_obj();
json_spirit::mValue schema;
@ -2195,7 +2214,14 @@ ACTOR Future<bool> exclude( Database db, std::vector<StringRef> tokens, Referenc
}
}
ACTOR Future<bool> createSnapshot(Database db, StringRef snapCmd) {
ACTOR Future<bool> createSnapshot(Database db, std::vector<StringRef> tokens ) {
state Standalone<StringRef> snapCmd;
for ( int i = 1; i < tokens.size(); i++) {
snapCmd = snapCmd.withSuffix(tokens[i]);
if (i != tokens.size() - 1) {
snapCmd = snapCmd.withSuffix(LiteralStringRef(" "));
}
}
try {
UID snapUID = wait(makeInterruptable(mgmtSnapCreate(db, snapCmd)));
printf("Snapshot command succeeded with UID %s\n", snapUID.toString().c_str());
@ -2589,8 +2615,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
if (!opt.exec.present()) {
if(opt.initialStatusCheck) {
Future<Void> checkStatusF = checkStatus(Void(), db->getConnectionFile());
Future<Void> checkDDStatusF = checkDataDistributionStatus(db, true);
wait(makeInterruptable(success(checkStatusF) && success(checkDDStatusF)));
wait(makeInterruptable(success(checkStatusF)));
}
else {
printf("\n");
@ -2736,6 +2761,23 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
continue;
}
if( tokencmp(tokens[0], "sleep")) {
if(tokens.size() != 2) {
printUsage(tokens[0]);
is_error = true;
} else {
double v;
int n=0;
if (sscanf(tokens[1].toString().c_str(), "%lf%n", &v, &n) != 1 || n != tokens[1].size()) {
printUsage(tokens[0]);
is_error = true;
} else {
wait(delay(v));
}
}
continue;
}
if (tokencmp(tokens[0], "status")) {
// Warn at 7 seconds since status will spend as long as 5 seconds trying to read/write from the database
warn = timeWarning( 7.0, "\nWARNING: Long delay (Ctrl-C to interrupt)\n" );
@ -2811,11 +2853,11 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
}
if (tokencmp(tokens[0], "snapshot")) {
if (tokens.size() != 2) {
if (tokens.size() < 2) {
printUsage(tokens[0]);
is_error = true;
} else {
bool err = wait(createSnapshot(db, tokens[1]));
bool err = wait(createSnapshot(db, tokens));
if (err) is_error = true;
}
continue;
@ -3426,13 +3468,11 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
if (tokencmp(tokens[0], "datadistribution")) {
if (tokens.size() != 2 && tokens.size() != 3) {
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
"<ssfailure|rebalance>>\n");
is_error = true;
} else {
if (tokencmp(tokens[1], "status")) {
wait(makeInterruptable(checkDataDistributionStatus(db)));
} else if (tokencmp(tokens[1], "on")) {
if (tokencmp(tokens[1], "on")) {
wait(success(setDDMode(db, 1)));
printf("Data distribution is turned on.\n");
} else if (tokencmp(tokens[1], "off")) {
@ -3446,7 +3486,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
wait(makeInterruptable(setDDIgnoreRebalanceSwitch(db, true)));
printf("Data distribution is disabled for rebalance.\n");
} else {
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
"<ssfailure|rebalance>>\n");
is_error = true;
}
@ -3458,12 +3498,12 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
wait(makeInterruptable(setDDIgnoreRebalanceSwitch(db, false)));
printf("Data distribution is enabled for rebalance.\n");
} else {
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
"<ssfailure|rebalance>>\n");
is_error = true;
}
} else {
printf("Usage: datadistribution <status|on|off|disable <ssfailure|rebalance>|enable "
printf("Usage: datadistribution <on|off|disable <ssfailure|rebalance>|enable "
"<ssfailure|rebalance>>\n");
is_error = true;
}

View File

@ -172,7 +172,6 @@ public:
Counter transactionsMaybeCommitted;
Counter transactionsResourceConstrained;
Counter transactionsProcessBehind;
Counter transactionWaitsForFullRecovery;
ContinuousSample<double> latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, bytesPerCommit;

View File

@ -423,11 +423,11 @@ ACTOR Future<ConfigurationResult::Type> changeConfig( Database cx, std::map<std:
for(auto& it : newConfig.regions) {
newDcIds.insert(it.dcId);
}
std::set<Key> missingDcIds;
std::set<Optional<Key>> missingDcIds;
for(auto& s : serverList) {
auto ssi = decodeServerListValue( s.value );
if ( !ssi.locality.dcId().present() || !newDcIds.count(ssi.locality.dcId().get()) ) {
missingDcIds.insert(ssi.locality.dcId().get());
missingDcIds.insert(ssi.locality.dcId());
}
}
if(missingDcIds.size() > (oldReplicationUsesDcId ? 1 : 0)) {
@ -1340,61 +1340,17 @@ ACTOR Future<vector<AddressExclusion>> getExcludedServers( Database cx ) {
}
}
ACTOR Future<Void> checkDataDistributionStatus(Database cx, bool printWarningOnly) {
state Transaction tr(cx);
loop {
try {
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
state Future<Optional<Value>> overallSwitchF = tr.get(dataDistributionModeKey);
state Future<Optional<Value>> healthyZoneValueF = tr.get(healthyZoneKey);
state Future<Optional<Value>> rebalanceDDIgnoreValueF = tr.get(rebalanceDDIgnoreKey);
wait(success(overallSwitchF) && success(healthyZoneValueF) && success(rebalanceDDIgnoreValueF));
if (overallSwitchF.get().present()) {
BinaryReader rd(overallSwitchF.get().get(), Unversioned());
int currentMode;
rd >> currentMode;
if (currentMode == 0) {
printf("WARNING: Data distribution is off.\n");
return Void();
}
}
if (!printWarningOnly) {
printf("Data distribution is on.\n");
}
if (healthyZoneValueF.get().present()) {
auto healthyZoneKV = decodeHealthyZoneValue(healthyZoneValueF.get().get());
if (healthyZoneKV.first == ignoreSSFailuresZoneString) {
printf("WARNING: Data distribution is currently turned on but disabled for all storage server "
"failures.\n");
} else {
printf("WARNING: Data distribution is currently turned on but zone %s is under maintenance and "
"will continue for %" PRId64 " seconds.\n",
healthyZoneKV.first.toString().c_str(),
(healthyZoneKV.second - tr.getReadVersion().get()) / CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
}
}
if (rebalanceDDIgnoreValueF.get().present()) {
printf("WARNING: Data distribution is currently turned on but shard size balancing is currently "
"disabled.\n");
}
return Void();
} catch (Error& e) {
wait(tr.onError(e));
}
}
}
ACTOR Future<Void> printHealthyZone( Database cx ) {
state Transaction tr(cx);
loop {
try {
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
Optional<Value> val = wait( tr.get(healthyZoneKey) );
if(!val.present() || decodeHealthyZoneValue(val.get()).second <= tr.getReadVersion().get()) {
printf("No ongoing maintenance.\n");
} else if (val.present() && decodeHealthyZoneValue(val.get()).first == ignoreSSFailuresZoneString) {
if (val.present() && decodeHealthyZoneValue(val.get()).first == ignoreSSFailuresZoneString) {
printf("Data distribution has been disabled for all storage server failures in this cluster and thus "
"maintenance mode is not active.\n");
} else if(!val.present() || decodeHealthyZoneValue(val.get()).second <= tr.getReadVersion().get()) {
printf("No ongoing maintenance.\n");
} else {
auto healthyZone = decodeHealthyZoneValue(val.get());
printf("Maintenance for zone %s will continue for %" PRId64 " seconds.\n", healthyZone.first.toString().c_str(), (healthyZone.second-tr.getReadVersion().get())/CLIENT_KNOBS->CORE_VERSIONSPERSECOND);
@ -1577,7 +1533,7 @@ ACTOR Future<std::set<NetworkAddress>> checkForExcludingServers(Database cx, vec
return inProgressExclusion;
}
ACTOR Future<UID> mgmtSnapCreate(Database cx, StringRef snapCmd) {
ACTOR Future<UID> mgmtSnapCreate(Database cx, Standalone<StringRef> snapCmd) {
state UID snapUID = deterministicRandom()->randomUniqueID();
try {
wait(snapCreate(cx, snapCmd, snapUID));

View File

@ -181,7 +181,6 @@ ACTOR Future<int> setDDMode( Database cx, int mode );
ACTOR Future<Void> forceRecovery( Reference<ClusterConnectionFile> clusterFile, Standalone<StringRef> dcId );
ACTOR Future<Void> checkDataDistributionStatus(Database cx, bool printWarningOnly = false);
ACTOR Future<Void> printHealthyZone( Database cx );
ACTOR Future<Void> setDDIgnoreRebalanceSwitch(Database cx, bool ignoreRebalance);
ACTOR Future<bool> clearHealthyZone(Database cx, bool printWarning = false, bool clearSSFailureZoneString = false);
@ -197,7 +196,7 @@ bool schemaMatch( json_spirit::mValue const& schema, json_spirit::mValue const&
// execute payload in 'snapCmd' on all the coordinators, TLogs and
// storage nodes
ACTOR Future<UID> mgmtSnapCreate(Database cx, StringRef snapCmd);
ACTOR Future<UID> mgmtSnapCreate(Database cx, Standalone<StringRef> snapCmd);
#include "flow/unactorcompiler.h"
#endif

View File

@ -519,7 +519,7 @@ DatabaseContext::DatabaseContext(
transactionCommittedMutations("CommittedMutations", cc), transactionCommittedMutationBytes("CommittedMutationBytes", cc), transactionsCommitStarted("CommitStarted", cc),
transactionsCommitCompleted("CommitCompleted", cc), transactionsTooOld("TooOld", cc), transactionsFutureVersions("FutureVersions", cc),
transactionsNotCommitted("NotCommitted", cc), transactionsMaybeCommitted("MaybeCommitted", cc), transactionsResourceConstrained("ResourceConstrained", cc),
transactionsProcessBehind("ProcessBehind", cc), transactionWaitsForFullRecovery("WaitsForFullRecovery", cc), outstandingWatches(0),
transactionsProcessBehind("ProcessBehind", cc), outstandingWatches(0),
latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), mvCacheInsertLocation(0),
healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0), internal(internal)
{
@ -548,7 +548,7 @@ DatabaseContext::DatabaseContext( const Error &err ) : deferredError(err), cc("T
transactionCommittedMutations("CommittedMutations", cc), transactionCommittedMutationBytes("CommittedMutationBytes", cc), transactionsCommitStarted("CommitStarted", cc),
transactionsCommitCompleted("CommitCompleted", cc), transactionsTooOld("TooOld", cc), transactionsFutureVersions("FutureVersions", cc),
transactionsNotCommitted("NotCommitted", cc), transactionsMaybeCommitted("MaybeCommitted", cc), transactionsResourceConstrained("ResourceConstrained", cc),
transactionsProcessBehind("ProcessBehind", cc), transactionWaitsForFullRecovery("WaitsForFullRecovery", cc), latencies(1000), readLatencies(1000), commitLatencies(1000),
transactionsProcessBehind("ProcessBehind", cc), latencies(1000), readLatencies(1000), commitLatencies(1000),
GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000),
internal(false) {}
@ -1474,11 +1474,11 @@ ACTOR Future<Void> watchValue(Future<Version> version, Key key, Optional<Value>
g_traceBatch.addAttach("WatchValueAttachID", info.debugID.get().first(), watchValueID.get().first());
g_traceBatch.addEvent("WatchValueDebug", watchValueID.get().first(), "NativeAPI.watchValue.Before"); //.detail("TaskID", g_network->getCurrentTask());
}
state Version resp;
state WatchValueReply resp;
choose {
when(Version r = wait(loadBalance(ssi.second, &StorageServerInterface::watchValue,
WatchValueRequest(key, value, ver, watchValueID),
TaskPriority::DefaultPromiseEndpoint))) {
when(WatchValueReply r = wait(loadBalance(ssi.second, &StorageServerInterface::watchValue,
WatchValueRequest(key, value, ver, watchValueID),
TaskPriority::DefaultPromiseEndpoint))) {
resp = r;
}
when(wait(cx->connectionFile ? cx->connectionFile->onChange() : Never())) { wait(Never()); }
@ -1489,12 +1489,13 @@ ACTOR Future<Void> watchValue(Future<Version> version, Key key, Optional<Value>
//FIXME: wait for known committed version on the storage server before replying,
//cannot do this until the storage server is notified on knownCommittedVersion changes from tlog (faster than the current update loop)
Version v = wait( waitForCommittedVersion( cx, resp ) );
Version v = wait(waitForCommittedVersion(cx, resp.version));
//TraceEvent("WatcherCommitted").detail("CommittedVersion", v).detail("WatchVersion", resp).detail("Key", key ).detail("Value", value);
//TraceEvent("WatcherCommitted").detail("CommittedVersion", v).detail("WatchVersion", resp.version).detail("Key", key ).detail("Value", value);
if( v - resp < 50000000 ) // False if there is a master failure between getting the response and getting the committed version, Dependent on SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT
return Void();
// False if there is a master failure between getting the response and getting the committed version,
// Dependent on SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT
if (v - resp.version < 50000000) return Void();
ver = v;
} catch (Error& e) {
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed) {
@ -2705,10 +2706,7 @@ ACTOR static Future<Void> tryCommit( Database cx, Reference<TransactionLogInfo>
if (e.code() != error_code_transaction_too_old
&& e.code() != error_code_not_committed
&& e.code() != error_code_database_locked
&& e.code() != error_code_proxy_memory_limit_exceeded
&& e.code() != error_code_transaction_not_permitted
&& e.code() != error_code_cluster_not_fully_recovered
&& e.code() != error_code_txn_exec_log_anti_quorum)
&& e.code() != error_code_proxy_memory_limit_exceeded)
TraceEvent(SevError, "TryCommitError").error(e);
if (trLogInfo)
trLogInfo->addLog(FdbClientLogEvents::EventCommitError(startTime, static_cast<int>(e.code()), req));
@ -3115,8 +3113,7 @@ Future<Void> Transaction::onError( Error const& e ) {
e.code() == error_code_commit_unknown_result ||
e.code() == error_code_database_locked ||
e.code() == error_code_proxy_memory_limit_exceeded ||
e.code() == error_code_process_behind ||
e.code() == error_code_cluster_not_fully_recovered)
e.code() == error_code_process_behind)
{
if(e.code() == error_code_not_committed)
++cx->transactionsNotCommitted;
@ -3126,9 +3123,6 @@ Future<Void> Transaction::onError( Error const& e ) {
++cx->transactionsResourceConstrained;
if (e.code() == error_code_process_behind)
++cx->transactionsProcessBehind;
if (e.code() == error_code_cluster_not_fully_recovered) {
++cx->transactionWaitsForFullRecovery;
}
double backoff = getBackoff(e.code());
reset();
@ -3348,54 +3342,22 @@ void enableClientInfoLogging() {
TraceEvent(SevInfo, "ClientInfoLoggingEnabled");
}
ACTOR Future<Void> snapshotDatabase(Reference<DatabaseContext> cx, StringRef snapPayload, UID snapUID, Optional<UID> debugID) {
TraceEvent("SnapshotDatabaseEnter")
.detail("SnapPayload", snapPayload)
.detail("SnapUID", snapUID);
try {
if (debugID.present()) {
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.snapshotDatabase.Before");
}
choose {
when(wait(cx->onMasterProxiesChanged())) { throw operation_failed(); }
when(wait(loadBalance(cx->getMasterProxies(false), &MasterProxyInterface::proxySnapReq, ProxySnapRequest(snapPayload, snapUID, debugID), cx->taskID, true /*atmostOnce*/ ))) {
if (debugID.present())
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(),
"NativeAPI.SnapshotDatabase.After");
}
}
} catch (Error& e) {
TraceEvent("SnapshotDatabaseError")
.error(e)
.detail("SnapPayload", snapPayload)
.detail("SnapUID", snapUID);
throw;
}
return Void();
}
ACTOR Future<Void> snapCreate(Database cx, StringRef snapCmd, UID snapUID) {
// remember the client ID before the snap operation
state UID preSnapClientUID = cx->clientInfo->get().id;
ACTOR Future<Void> snapCreate(Database cx, Standalone<StringRef> snapCmd, UID snapUID) {
TraceEvent("SnapCreateEnter")
.detail("SnapCmd", snapCmd.toString())
.detail("UID", snapUID)
.detail("PreSnapClientUID", preSnapClientUID);
StringRef snapCmdArgs = snapCmd;
StringRef snapCmdPart = snapCmdArgs.eat(":");
Standalone<StringRef> snapUIDRef(snapUID.toString());
Standalone<StringRef> snapPayloadRef = snapCmdPart
.withSuffix(LiteralStringRef(":uid="))
.withSuffix(snapUIDRef)
.withSuffix(LiteralStringRef(","))
.withSuffix(snapCmdArgs);
.detail("UID", snapUID);
try {
Future<Void> exec = snapshotDatabase(Reference<DatabaseContext>::addRef(cx.getPtr()), snapPayloadRef, snapUID, snapUID);
wait(exec);
loop {
choose {
when(wait(cx->onMasterProxiesChanged())) {}
when(wait(loadBalance(cx->getMasterProxies(false), &MasterProxyInterface::proxySnapReq, ProxySnapRequest(snapCmd, snapUID, snapUID), cx->taskID, true /*atmostOnce*/ ))) {
TraceEvent("SnapCreateExit")
.detail("SnapCmd", snapCmd.toString())
.detail("UID", snapUID);
return Void();
}
}
}
} catch (Error& e) {
TraceEvent("SnapCreateError")
.detail("SnapCmd", snapCmd.toString())
@ -3403,19 +3365,4 @@ ACTOR Future<Void> snapCreate(Database cx, StringRef snapCmd, UID snapUID) {
.error(e);
throw;
}
UID postSnapClientUID = cx->clientInfo->get().id;
if (preSnapClientUID != postSnapClientUID) {
// if the client IDs changed then we fail the snapshot
TraceEvent("SnapCreateUIDMismatch")
.detail("SnapPreSnapClientUID", preSnapClientUID)
.detail("SnapPostSnapClientUID", postSnapClientUID);
throw coordinators_changed();
}
TraceEvent("SnapCreateExit")
.detail("SnapCmd", snapCmd.toString())
.detail("UID", snapUID)
.detail("PreSnapClientUID", preSnapClientUID);
return Void();
}

View File

@ -316,7 +316,7 @@ int64_t extractIntOption( Optional<StringRef> value, int64_t minValue = std::num
// Takes a snapshot of the cluster, specifically the following persistent
// states: coordinator, TLog and storage state
ACTOR Future<Void> snapCreate(Database cx, StringRef snapCmd, UID snapUID);
ACTOR Future<Void> snapCreate(Database cx, Standalone<StringRef> snapCmd, UID snapUID);
#include "flow/unactorcompiler.h"
#endif

View File

@ -86,7 +86,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"cluster_controller",
"data_distributor",
"ratekeeper",
"router"
"router",
"coordinator"
]
},
"data_version":12341234,
@ -286,7 +287,23 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"limiting_queue_bytes_storage_server":0,
"worst_queue_bytes_storage_server":0,
"limiting_version_lag_storage_server":0,
"worst_version_lag_storage_server":0
"worst_version_lag_storage_server":0,
"limiting_data_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"worst_data_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"limiting_durability_lag_storage_server":{
"versions":0,
"seconds":0.0
},
"worst_durability_lag_storage_server":{
"versions":0,
"seconds":0.0
}
},
"incompatible_connections":[
@ -484,6 +501,9 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"full_replication":true,
"maintenance_zone":"0ccb4e0fdbdb5583010f6b77d9d10ece",
"maintenance_seconds_remaining":1.0,
"data_distribution_disabled_for_ss_failures":true,
"data_distribution_disabled_for_rebalance":true,
"data_distribution_disabled":true,
"configuration":{
"log_anti_quorum":0,
"log_replicas":2,

View File

@ -30,6 +30,20 @@
#include "flow/Stats.h"
#include "fdbrpc/TimedRequest.h"
// Dead code, removed in the next protocol version
struct VersionReply {
constexpr static FileIdentifier file_identifier = 3;
Version version;
VersionReply() = default;
explicit VersionReply(Version version) : version(version) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, version);
}
};
struct StorageServerInterface {
constexpr static FileIdentifier file_identifier = 15302073;
enum { BUSY_ALLOWED = 0, BUSY_FORCE = 1, BUSY_LOCAL = 2 };
@ -40,7 +54,7 @@ struct StorageServerInterface {
LocalityData locality;
UID uniqueID;
RequestStream<ReplyPromise<Version>> getVersion;
RequestStream<ReplyPromise<VersionReply>> getVersion;
RequestStream<struct GetValueRequest> getValue;
RequestStream<struct GetKeyRequest> getKey;
@ -140,14 +154,27 @@ struct GetValueRequest : TimedRequest {
}
};
struct WatchValueReply {
constexpr static FileIdentifier file_identifier = 3;
Version version;
WatchValueReply() = default;
explicit WatchValueReply(Version version) : version(version) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, version);
}
};
struct WatchValueRequest {
constexpr static FileIdentifier file_identifier = 14747733;
Key key;
Optional<Value> value;
Version version;
Optional<UID> debugID;
ReplyPromise< Version > reply;
ReplyPromise<WatchValueReply> reply;
WatchValueRequest(){}
WatchValueRequest(const Key& key, Optional<Value> value, Version ver, Optional<UID> debugID) : key(key), value(value), version(ver), debugID(debugID) {}
@ -219,6 +246,20 @@ struct GetKeyRequest : TimedRequest {
}
};
struct GetShardStateReply {
constexpr static FileIdentifier file_identifier = 0;
Version first;
Version second;
GetShardStateReply() = default;
GetShardStateReply(Version first, Version second) : first(first), second(second) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, first, second);
}
};
struct GetShardStateRequest {
constexpr static FileIdentifier file_identifier = 15860168;
enum waitMode {
@ -229,7 +270,7 @@ struct GetShardStateRequest {
KeyRange keys;
int32_t mode;
ReplyPromise< std::pair<Version,Version> > reply;
ReplyPromise<GetShardStateReply> reply;
GetShardStateRequest() {}
GetShardStateRequest( KeyRange const& keys, waitMode mode ) : keys(keys), mode(mode) {}

View File

@ -268,6 +268,20 @@ TEST_CASE("/flow/flow/cancel2")
return Void();
}
namespace {
// Simple message for flatbuffers unittests
struct Int {
constexpr static FileIdentifier file_identifier = 12345;
uint32_t value;
Int() = default;
Int(uint32_t value) : value(value) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, value);
}
};
} // namespace
TEST_CASE("/flow/flow/nonserializable futures")
{
// Types no longer need to be statically serializable to make futures, promises, actors
@ -283,20 +297,20 @@ TEST_CASE("/flow/flow/nonserializable futures")
// ReplyPromise can be used like a normal promise
{
ReplyPromise<int> rpInt;
Future<int> f = rpInt.getFuture();
ReplyPromise<Int> rpInt;
Future<Int> f = rpInt.getFuture();
ASSERT(!f.isReady());
rpInt.send(123);
ASSERT(f.get() == 123);
ASSERT(f.get().value == 123);
}
{
RequestStream<int> rsInt;
FutureStream<int> f = rsInt.getFuture();
RequestStream<Int> rsInt;
FutureStream<Int> f = rsInt.getFuture();
rsInt.send(1);
rsInt.send(2);
ASSERT(f.pop() == 1);
ASSERT(f.pop() == 2);
ASSERT(f.pop().value == 1);
ASSERT(f.pop().value == 2);
}
return Void();
@ -306,14 +320,14 @@ TEST_CASE("/flow/flow/networked futures")
{
// RequestStream can be serialized
{
RequestStream<int> locInt;
RequestStream<Int> locInt;
BinaryWriter wr(IncludeVersion());
wr << locInt;
ASSERT(locInt.getEndpoint().isValid() && locInt.getEndpoint().isLocal() && locInt.getEndpoint().getPrimaryAddress() == FlowTransport::transport().getLocalAddress());
BinaryReader rd(wr.toValue(), IncludeVersion());
RequestStream<int> remoteInt;
RequestStream<Int> remoteInt;
rd >> remoteInt;
ASSERT(remoteInt.getEndpoint() == locInt.getEndpoint());
@ -323,14 +337,14 @@ TEST_CASE("/flow/flow/networked futures")
// ReplyPromise can be serialized
// TODO: This needs to fiddle with g_currentDeliveryPeerAddress
if (0) {
ReplyPromise<int> locInt;
ReplyPromise<Int> locInt;
BinaryWriter wr(IncludeVersion());
wr << locInt;
ASSERT(locInt.getEndpoint().isValid() && locInt.getEndpoint().isLocal());
BinaryReader rd(wr.toValue(), IncludeVersion());
ReplyPromise<int> remoteInt;
ReplyPromise<Int> remoteInt;
rd >> remoteInt;
ASSERT(remoteInt.getEndpoint() == locInt.getEndpoint());

View File

@ -288,353 +288,333 @@ struct ConnectPacket {
ACTOR static Future<Void> connectionReader(TransportData* transport, Reference<IConnection> conn, Reference<struct Peer> peer,
Promise<Reference<struct Peer>> onConnected);
static PacketID sendPacket( TransportData* self, ISerializeSource const& what, const Endpoint& destination, bool reliable, bool openConnection );
static void sendLocal( TransportData* self, ISerializeSource const& what, const Endpoint& destination );
static ReliablePacket* sendPacket( TransportData* self, Reference<Peer> peer, ISerializeSource const& what, const Endpoint& destination, bool reliable );
struct Peer : public ReferenceCounted<Peer> {
TransportData* transport;
NetworkAddress destination;
UnsentPacketQueue unsent;
ReliablePacketList reliable;
AsyncTrigger dataToSend; // Triggered when unsent.empty() becomes false
Future<Void> connect;
AsyncTrigger resetPing;
bool compatible;
bool outgoingConnectionIdle; // We don't actually have a connection open and aren't trying to open one because we don't have anything to send
double lastConnectTime;
double reconnectionDelay;
int peerReferences;
bool incompatibleProtocolVersionNewer;
int64_t bytesReceived;
double lastDataPacketSentTime;
explicit Peer(TransportData* transport, NetworkAddress const& destination)
: transport(transport), destination(destination), outgoingConnectionIdle(false), lastConnectTime(0.0),
reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), compatible(true),
incompatibleProtocolVersionNewer(false), peerReferences(-1), bytesReceived(0), lastDataPacketSentTime(now()) {}
void send(PacketBuffer* pb, ReliablePacket* rp, bool firstUnsent) {
unsent.setWriteBuffer(pb);
if (rp) reliable.insert(rp);
if (firstUnsent) dataToSend.trigger();
}
void prependConnectPacket() {
// Send the ConnectPacket expected at the beginning of a new connection
ConnectPacket pkt;
if(transport->localAddresses.address.isTLS() == destination.isTLS()) {
pkt.canonicalRemotePort = transport->localAddresses.address.port;
pkt.setCanonicalRemoteIp(transport->localAddresses.address.ip);
} else if(transport->localAddresses.secondaryAddress.present()) {
pkt.canonicalRemotePort = transport->localAddresses.secondaryAddress.get().port;
pkt.setCanonicalRemoteIp(transport->localAddresses.secondaryAddress.get().ip);
} else {
// a "mixed" TLS/non-TLS connection is like a client/server connection - there's no way to reverse it
pkt.canonicalRemotePort = 0;
pkt.setCanonicalRemoteIp(IPAddress(0));
}
pkt.connectPacketLength = sizeof(pkt) - sizeof(pkt.connectPacketLength);
pkt.protocolVersion = currentProtocolVersion;
if (FLOW_KNOBS->USE_OBJECT_SERIALIZER) {
pkt.protocolVersion.addObjectSerializerFlag();
}
pkt.connectionId = transport->transportId;
PacketBuffer* pb_first = PacketBuffer::create();
PacketWriter wr( pb_first, nullptr, Unversioned() );
pkt.serialize(wr);
unsent.prependWriteBuffer(pb_first, wr.finish());
}
void discardUnreliablePackets() {
// Throw away the current unsent list, dropping the reference count on each PacketBuffer that accounts for presence in the unsent list
unsent.discardAll();
// If there are reliable packets, compact reliable packets into a new unsent range
if(!reliable.empty()) {
PacketBuffer* pb = unsent.getWriteBuffer();
pb = reliable.compact(pb, nullptr);
unsent.setWriteBuffer(pb);
}
}
void onIncomingConnection( Reference<Peer> self, Reference<IConnection> conn, Future<Void> reader ) {
// In case two processes are trying to connect to each other simultaneously, the process with the larger canonical NetworkAddress
// gets to keep its outgoing connection.
if ( !destination.isPublic() && !outgoingConnectionIdle ) throw address_in_use();
NetworkAddress compatibleAddr = transport->localAddresses.address;
if(transport->localAddresses.secondaryAddress.present() && transport->localAddresses.secondaryAddress.get().isTLS() == destination.isTLS()) {
compatibleAddr = transport->localAddresses.secondaryAddress.get();
}
if ( !destination.isPublic() || outgoingConnectionIdle || destination > compatibleAddr ) {
// Keep the new connection
TraceEvent("IncomingConnection", conn->getDebugID())
.suppressFor(1.0)
.detail("FromAddr", conn->getPeerAddress())
.detail("CanonicalAddr", destination)
.detail("IsPublic", destination.isPublic());
connect.cancel();
prependConnectPacket();
connect = connectionKeeper( self, conn, reader );
} else {
TraceEvent("RedundantConnection", conn->getDebugID())
.suppressFor(1.0)
.detail("FromAddr", conn->getPeerAddress().toString())
.detail("CanonicalAddr", destination)
.detail("LocalAddr", compatibleAddr);
// Keep our prior connection
reader.cancel();
conn->close();
// Send an (ignored) packet to make sure that, if our outgoing connection died before the peer made this connection attempt,
// we eventually find out that our connection is dead, close it, and then respond to the next connection reattempt from peer.
}
}
ACTOR static Future<Void> connectionMonitor( Reference<Peer> peer ) {
state Endpoint remotePingEndpoint({ peer->destination }, WLTOKEN_PING_PACKET);
loop {
if (!FlowTransport::transport().isClient() && !peer->destination.isPublic()) {
// Don't send ping messages to clients unless necessary. Instead monitor incoming client pings.
state double lastRefreshed = now();
state int64_t lastBytesReceived = peer->bytesReceived;
loop {
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME));
if (lastBytesReceived < peer->bytesReceived) {
lastRefreshed = now();
lastBytesReceived = peer->bytesReceived;
} else if (lastRefreshed < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT *
FLOW_KNOBS->CONNECTION_MONITOR_INCOMING_IDLE_MULTIPLIER) {
// If we have not received anything in this period, client must have closed
// connection by now. Break loop to check if it is still alive by sending a ping.
break;
}
}
}
//We cannot let an error be thrown from connectionMonitor while still on the stack from scanPackets in connectionReader
//because then it would not call the destructor of connectionReader when connectionReader is cancelled.
wait(delay(0));
if (peer->reliable.empty() && peer->unsent.empty()) {
if (peer->peerReferences == 0 &&
(peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_UNREFERENCED_CLOSE_DELAY)) {
// TODO: What about when peerReference == -1?
throw connection_unreferenced();
} else if (FlowTransport::transport().isClient() && peer->compatible && peer->destination.isPublic() &&
(peer->lastConnectTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT) &&
(peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT)) {
// First condition is necessary because we may get here if we are server.
throw connection_idle();
}
}
wait (delayJittered(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME));
// TODO: Stop monitoring and close the connection with no onDisconnect requests outstanding
state ReplyPromise<Void> reply;
FlowTransport::transport().sendUnreliable( SerializeSource<ReplyPromise<Void>>(reply), remotePingEndpoint );
state int64_t startingBytes = peer->bytesReceived;
state int timeouts = 0;
ACTOR Future<Void> connectionMonitor( Reference<Peer> peer ) {
state Endpoint remotePingEndpoint({ peer->destination }, WLTOKEN_PING_PACKET);
loop {
if (!FlowTransport::transport().isClient() && !peer->destination.isPublic() && peer->compatible) {
// Don't send ping messages to clients unless necessary. Instead monitor incoming client pings.
// We ignore this block for incompatible clients because pings from server would trigger the
// peer->resetPing and prevent 'connection_failed' due to ping timeout.
state double lastRefreshed = now();
state int64_t lastBytesReceived = peer->bytesReceived;
loop {
choose {
when (wait( delay( FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT ) )) {
if(startingBytes == peer->bytesReceived) {
TraceEvent("ConnectionTimeout").suppressFor(1.0).detail("WithAddr", peer->destination);
throw connection_failed();
}
if(timeouts > 1) {
TraceEvent(SevWarnAlways, "ConnectionSlowPing")
.suppressFor(1.0)
.detail("WithAddr", peer->destination)
.detail("Timeouts", timeouts);
}
startingBytes = peer->bytesReceived;
timeouts++;
}
when (wait( reply.getFuture() )) {
break;
}
when (wait( peer->resetPing.onTrigger())) {
break;
}
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME));
if (lastBytesReceived < peer->bytesReceived) {
lastRefreshed = now();
lastBytesReceived = peer->bytesReceived;
} else if (lastRefreshed < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT *
FLOW_KNOBS->CONNECTION_MONITOR_INCOMING_IDLE_MULTIPLIER) {
// If we have not received anything in this period, client must have closed
// connection by now. Break loop to check if it is still alive by sending a ping.
break;
}
}
}
}
ACTOR static Future<Void> connectionWriter( Reference<Peer> self, Reference<IConnection> conn ) {
state double lastWriteTime = now();
loop {
//wait( delay(0, TaskPriority::WriteSocket) );
wait( delayJittered(std::max<double>(FLOW_KNOBS->MIN_COALESCE_DELAY, FLOW_KNOBS->MAX_COALESCE_DELAY - (now() - lastWriteTime)), TaskPriority::WriteSocket) );
//wait( delay(500e-6, TaskPriority::WriteSocket) );
//wait( yield(TaskPriority::WriteSocket) );
//We cannot let an error be thrown from connectionMonitor while still on the stack from scanPackets in connectionReader
//because then it would not call the destructor of connectionReader when connectionReader is cancelled.
wait(delay(0));
// Send until there is nothing left to send
loop {
lastWriteTime = now();
int sent = conn->write(self->unsent.getUnsent(), /* limit= */ FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
if (sent) {
self->transport->bytesSent += sent;
self->unsent.sent(sent);
}
if (self->unsent.empty()) break;
TEST(true); // We didn't write everything, so apparently the write buffer is full. Wait for it to be nonfull.
wait( conn->onWritable() );
wait( yield(TaskPriority::WriteSocket) );
if (peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0) {
if (peer->peerReferences == 0 &&
(peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_UNREFERENCED_CLOSE_DELAY)) {
// TODO: What about when peerReference == -1?
throw connection_unreferenced();
} else if (FlowTransport::transport().isClient() && peer->compatible && peer->destination.isPublic() &&
(peer->lastConnectTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT) &&
(peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT)) {
// First condition is necessary because we may get here if we are server.
throw connection_idle();
}
// Wait until there is something to send
while ( self->unsent.empty() )
wait( self->dataToSend.onTrigger() );
}
}
ACTOR static Future<Void> connectionKeeper( Reference<Peer> self,
Reference<IConnection> conn = Reference<IConnection>(),
Future<Void> reader = Void()) {
TraceEvent(SevDebug, "ConnectionKeeper", conn ? conn->getDebugID() : UID())
.detail("PeerAddr", self->destination)
.detail("ConnSet", (bool)conn);
wait (delayJittered(FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME));
// This is used only at client side and is used to override waiting for unsent data to update failure monitoring
// status. At client, if an existing connection fails, we retry making a connection and if that fails, then only
// we report that address as failed.
state bool clientReconnectDelay = false;
// TODO: Stop monitoring and close the connection with no onDisconnect requests outstanding
state ReplyPromise<Void> reply;
FlowTransport::transport().sendUnreliable( SerializeSource<ReplyPromise<Void>>(reply), remotePingEndpoint, true );
state int64_t startingBytes = peer->bytesReceived;
state int timeouts = 0;
loop {
try {
if (!conn) { // Always, except for the first loop with an incoming connection
self->outgoingConnectionIdle = true;
// Wait until there is something to send.
while (self->unsent.empty()) {
if (FlowTransport::transport().isClient() && self->destination.isPublic() &&
clientReconnectDelay) {
break;
}
wait(self->dataToSend.onTrigger());
}
ASSERT( self->destination.isPublic() );
self->outgoingConnectionIdle = false;
wait(delayJittered(
std::max(0.0, self->lastConnectTime + self->reconnectionDelay -
now()))); // Don't connect() to the same peer more than once per 2 sec
self->lastConnectTime = now();
TraceEvent("ConnectingTo", conn ? conn->getDebugID() : UID()).suppressFor(1.0).detail("PeerAddr", self->destination);
Reference<IConnection> _conn = wait( timeout( INetworkConnections::net()->connect(self->destination), FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT, Reference<IConnection>() ) );
if (_conn) {
if (FlowTransport::transport().isClient()) {
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(false));
}
if (self->unsent.empty()) {
_conn->close();
clientReconnectDelay = false;
continue;
} else {
conn = _conn;
TraceEvent("ConnectionExchangingConnectPacket", conn->getDebugID())
.suppressFor(1.0)
.detail("PeerAddr", self->destination);
self->prependConnectPacket();
}
} else {
TraceEvent("ConnectionTimedOut", conn ? conn->getDebugID() : UID()).suppressFor(1.0).detail("PeerAddr", self->destination);
if (FlowTransport::transport().isClient()) {
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(true));
}
choose {
when (wait( delay( FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT ) )) {
if(startingBytes == peer->bytesReceived) {
TraceEvent("ConnectionTimeout").suppressFor(1.0).detail("WithAddr", peer->destination);
throw connection_failed();
}
reader = connectionReader( self->transport, conn, self, Promise<Reference<Peer>>());
} else {
self->outgoingConnectionIdle = false;
}
try {
self->transport->countConnEstablished++;
wait( connectionWriter( self, conn ) || reader || connectionMonitor(self) );
} catch (Error& e) {
if (e.code() == error_code_connection_failed || e.code() == error_code_actor_cancelled ||
e.code() == error_code_connection_unreferenced ||
(g_network->isSimulated() && e.code() == error_code_checksum_failed))
self->transport->countConnClosedWithoutError++;
else
self->transport->countConnClosedWithError++;
throw e;
}
ASSERT( false );
} catch (Error& e) {
if(now() - self->lastConnectTime > FLOW_KNOBS->RECONNECTION_RESET_TIME) {
self->reconnectionDelay = FLOW_KNOBS->INITIAL_RECONNECTION_TIME;
} else {
self->reconnectionDelay = std::min(FLOW_KNOBS->MAX_RECONNECTION_TIME, self->reconnectionDelay * FLOW_KNOBS->RECONNECTION_TIME_GROWTH_RATE);
}
self->discardUnreliablePackets();
reader = Future<Void>();
bool ok = e.code() == error_code_connection_failed || e.code() == error_code_actor_cancelled ||
e.code() == error_code_connection_unreferenced || e.code() == error_code_connection_idle ||
(g_network->isSimulated() && e.code() == error_code_checksum_failed);
if(self->compatible) {
TraceEvent(ok ? SevInfo : SevWarnAlways, "ConnectionClosed", conn ? conn->getDebugID() : UID())
.error(e, true)
.suppressFor(1.0)
.detail("PeerAddr", self->destination);
}
else {
TraceEvent(ok ? SevInfo : SevWarnAlways, "IncompatibleConnectionClosed",
conn ? conn->getDebugID() : UID())
.error(e, true)
.suppressFor(1.0)
.detail("PeerAddr", self->destination);
}
if(self->destination.isPublic() && IFailureMonitor::failureMonitor().getState(self->destination).isAvailable()) {
auto& it = self->transport->closedPeers[self->destination];
if(now() - it.second > FLOW_KNOBS->TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY) {
it.first = now();
} else if(now() - it.first > FLOW_KNOBS->TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT) {
TraceEvent(SevWarnAlways, "TooManyConnectionsClosed", conn ? conn->getDebugID() : UID())
.suppressFor(5.0)
.detail("PeerAddr", self->destination);
self->transport->degraded->set(true);
if(timeouts > 1) {
TraceEvent(SevWarnAlways, "ConnectionSlowPing")
.suppressFor(1.0)
.detail("WithAddr", peer->destination)
.detail("Timeouts", timeouts);
}
it.second = now();
startingBytes = peer->bytesReceived;
timeouts++;
}
if (conn) {
if (FlowTransport::transport().isClient() && e.code() != error_code_connection_idle) {
clientReconnectDelay = true;
}
conn->close();
conn = Reference<IConnection>();
when (wait( reply.getFuture() )) {
break;
}
// Clients might send more packets in response, which needs to go out on the next connection
IFailureMonitor::failureMonitor().notifyDisconnect( self->destination );
if (e.code() == error_code_actor_cancelled) throw;
// Try to recover, even from serious errors, by retrying
if(self->peerReferences <= 0 && self->reliable.empty() && self->unsent.empty()) {
TraceEvent("PeerDestroy").error(e).suppressFor(1.0).detail("PeerAddr", self->destination);
self->connect.cancel();
self->transport->peers.erase(self->destination);
return Void();
when (wait( peer->resetPing.onTrigger())) {
break;
}
}
}
}
};
}
ACTOR Future<Void> connectionWriter( Reference<Peer> self, Reference<IConnection> conn ) {
state double lastWriteTime = now();
loop {
//wait( delay(0, TaskPriority::WriteSocket) );
wait( delayJittered(std::max<double>(FLOW_KNOBS->MIN_COALESCE_DELAY, FLOW_KNOBS->MAX_COALESCE_DELAY - (now() - lastWriteTime)), TaskPriority::WriteSocket) );
//wait( delay(500e-6, TaskPriority::WriteSocket) );
//wait( yield(TaskPriority::WriteSocket) );
// Send until there is nothing left to send
loop {
lastWriteTime = now();
int sent = conn->write(self->unsent.getUnsent(), /* limit= */ FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
if (sent) {
self->transport->bytesSent += sent;
self->unsent.sent(sent);
}
if (self->unsent.empty()) break;
TEST(true); // We didn't write everything, so apparently the write buffer is full. Wait for it to be nonfull.
wait( conn->onWritable() );
wait( yield(TaskPriority::WriteSocket) );
}
// Wait until there is something to send
while ( self->unsent.empty() )
wait( self->dataToSend.onTrigger() );
}
}
ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
Reference<IConnection> conn = Reference<IConnection>(),
Future<Void> reader = Void()) {
TraceEvent(SevDebug, "ConnectionKeeper", conn ? conn->getDebugID() : UID())
.detail("PeerAddr", self->destination)
.detail("ConnSet", (bool)conn);
// This is used only at client side and is used to override waiting for unsent data to update failure monitoring
// status. At client, if an existing connection fails, we retry making a connection and if that fails, then only
// we report that address as failed.
state bool clientReconnectDelay = false;
loop {
try {
if (!conn) { // Always, except for the first loop with an incoming connection
self->outgoingConnectionIdle = true;
// Wait until there is something to send.
while (self->unsent.empty()) {
if (FlowTransport::transport().isClient() && self->destination.isPublic() &&
clientReconnectDelay) {
break;
}
wait(self->dataToSend.onTrigger());
}
ASSERT( self->destination.isPublic() );
self->outgoingConnectionIdle = false;
wait(delayJittered(
std::max(0.0, self->lastConnectTime + self->reconnectionDelay -
now()))); // Don't connect() to the same peer more than once per 2 sec
self->lastConnectTime = now();
TraceEvent("ConnectingTo", conn ? conn->getDebugID() : UID()).suppressFor(1.0).detail("PeerAddr", self->destination);
Reference<IConnection> _conn = wait( timeout( INetworkConnections::net()->connect(self->destination), FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT, Reference<IConnection>() ) );
if (_conn) {
if (FlowTransport::transport().isClient()) {
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(false));
}
if (self->unsent.empty()) {
_conn->close();
clientReconnectDelay = false;
continue;
} else {
conn = _conn;
TraceEvent("ConnectionExchangingConnectPacket", conn->getDebugID())
.suppressFor(1.0)
.detail("PeerAddr", self->destination);
self->prependConnectPacket();
}
} else {
TraceEvent("ConnectionTimedOut", conn ? conn->getDebugID() : UID()).suppressFor(1.0).detail("PeerAddr", self->destination);
if (FlowTransport::transport().isClient()) {
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(true));
}
throw connection_failed();
}
reader = connectionReader( self->transport, conn, self, Promise<Reference<Peer>>());
} else {
self->outgoingConnectionIdle = false;
}
try {
self->transport->countConnEstablished++;
wait( connectionWriter( self, conn ) || reader || connectionMonitor(self) );
} catch (Error& e) {
if (e.code() == error_code_connection_failed || e.code() == error_code_actor_cancelled ||
e.code() == error_code_connection_unreferenced ||
(g_network->isSimulated() && e.code() == error_code_checksum_failed))
self->transport->countConnClosedWithoutError++;
else
self->transport->countConnClosedWithError++;
throw e;
}
ASSERT( false );
} catch (Error& e) {
if(now() - self->lastConnectTime > FLOW_KNOBS->RECONNECTION_RESET_TIME) {
self->reconnectionDelay = FLOW_KNOBS->INITIAL_RECONNECTION_TIME;
} else {
self->reconnectionDelay = std::min(FLOW_KNOBS->MAX_RECONNECTION_TIME, self->reconnectionDelay * FLOW_KNOBS->RECONNECTION_TIME_GROWTH_RATE);
}
self->discardUnreliablePackets();
reader = Future<Void>();
bool ok = e.code() == error_code_connection_failed || e.code() == error_code_actor_cancelled ||
e.code() == error_code_connection_unreferenced || e.code() == error_code_connection_idle ||
(g_network->isSimulated() && e.code() == error_code_checksum_failed);
if(self->compatible) {
TraceEvent(ok ? SevInfo : SevWarnAlways, "ConnectionClosed", conn ? conn->getDebugID() : UID())
.error(e, true)
.suppressFor(1.0)
.detail("PeerAddr", self->destination);
}
else {
TraceEvent(ok ? SevInfo : SevWarnAlways, "IncompatibleConnectionClosed",
conn ? conn->getDebugID() : UID())
.error(e, true)
.suppressFor(1.0)
.detail("PeerAddr", self->destination);
}
if(self->destination.isPublic() && IFailureMonitor::failureMonitor().getState(self->destination).isAvailable()) {
auto& it = self->transport->closedPeers[self->destination];
if(now() - it.second > FLOW_KNOBS->TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY) {
it.first = now();
} else if(now() - it.first > FLOW_KNOBS->TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT) {
TraceEvent(SevWarnAlways, "TooManyConnectionsClosed", conn ? conn->getDebugID() : UID())
.suppressFor(5.0)
.detail("PeerAddr", self->destination);
self->transport->degraded->set(true);
}
it.second = now();
}
if (conn) {
if (FlowTransport::transport().isClient() && e.code() != error_code_connection_idle) {
clientReconnectDelay = true;
}
conn->close();
conn = Reference<IConnection>();
}
// Clients might send more packets in response, which needs to go out on the next connection
IFailureMonitor::failureMonitor().notifyDisconnect( self->destination );
if (e.code() == error_code_actor_cancelled) throw;
// Try to recover, even from serious errors, by retrying
if(self->peerReferences <= 0 && self->reliable.empty() && self->unsent.empty() && self->outstandingReplies==0) {
TraceEvent("PeerDestroy").error(e).suppressFor(1.0).detail("PeerAddr", self->destination);
self->connect.cancel();
self->transport->peers.erase(self->destination);
return Void();
}
}
}
}
void Peer::send(PacketBuffer* pb, ReliablePacket* rp, bool firstUnsent) {
unsent.setWriteBuffer(pb);
if (rp) reliable.insert(rp);
if (firstUnsent) dataToSend.trigger();
}
void Peer::prependConnectPacket() {
// Send the ConnectPacket expected at the beginning of a new connection
ConnectPacket pkt;
if(transport->localAddresses.address.isTLS() == destination.isTLS()) {
pkt.canonicalRemotePort = transport->localAddresses.address.port;
pkt.setCanonicalRemoteIp(transport->localAddresses.address.ip);
} else if(transport->localAddresses.secondaryAddress.present()) {
pkt.canonicalRemotePort = transport->localAddresses.secondaryAddress.get().port;
pkt.setCanonicalRemoteIp(transport->localAddresses.secondaryAddress.get().ip);
} else {
// a "mixed" TLS/non-TLS connection is like a client/server connection - there's no way to reverse it
pkt.canonicalRemotePort = 0;
pkt.setCanonicalRemoteIp(IPAddress(0));
}
pkt.connectPacketLength = sizeof(pkt) - sizeof(pkt.connectPacketLength);
pkt.protocolVersion = currentProtocolVersion;
if (FLOW_KNOBS->USE_OBJECT_SERIALIZER) {
pkt.protocolVersion.addObjectSerializerFlag();
}
pkt.connectionId = transport->transportId;
PacketBuffer* pb_first = PacketBuffer::create();
PacketWriter wr( pb_first, nullptr, Unversioned() );
pkt.serialize(wr);
unsent.prependWriteBuffer(pb_first, wr.finish());
}
void Peer::discardUnreliablePackets() {
// Throw away the current unsent list, dropping the reference count on each PacketBuffer that accounts for presence in the unsent list
unsent.discardAll();
// If there are reliable packets, compact reliable packets into a new unsent range
if(!reliable.empty()) {
PacketBuffer* pb = unsent.getWriteBuffer();
pb = reliable.compact(pb, nullptr);
unsent.setWriteBuffer(pb);
}
}
void Peer::onIncomingConnection( Reference<Peer> self, Reference<IConnection> conn, Future<Void> reader ) {
// In case two processes are trying to connect to each other simultaneously, the process with the larger canonical NetworkAddress
// gets to keep its outgoing connection.
if ( !destination.isPublic() && !outgoingConnectionIdle ) throw address_in_use();
NetworkAddress compatibleAddr = transport->localAddresses.address;
if(transport->localAddresses.secondaryAddress.present() && transport->localAddresses.secondaryAddress.get().isTLS() == destination.isTLS()) {
compatibleAddr = transport->localAddresses.secondaryAddress.get();
}
if ( !destination.isPublic() || outgoingConnectionIdle || destination > compatibleAddr ) {
// Keep the new connection
TraceEvent("IncomingConnection", conn->getDebugID())
.suppressFor(1.0)
.detail("FromAddr", conn->getPeerAddress())
.detail("CanonicalAddr", destination)
.detail("IsPublic", destination.isPublic());
connect.cancel();
prependConnectPacket();
connect = connectionKeeper( self, conn, reader );
} else {
TraceEvent("RedundantConnection", conn->getDebugID())
.suppressFor(1.0)
.detail("FromAddr", conn->getPeerAddress().toString())
.detail("CanonicalAddr", destination)
.detail("LocalAddr", compatibleAddr);
// Keep our prior connection
reader.cancel();
conn->close();
// Send an (ignored) packet to make sure that, if our outgoing connection died before the peer made this connection attempt,
// we eventually find out that our connection is dead, close it, and then respond to the next connection reattempt from peer.
}
}
TransportData::~TransportData() {
for(auto &p : peers) {
@ -671,9 +651,12 @@ ACTOR static void deliver(TransportData* self, Endpoint destination, ArenaReader
} else if (destination.token.first() & TOKEN_STREAM_FLAG) {
// We don't have the (stream) endpoint 'token', notify the remote machine
if (destination.token.first() != -1) {
sendPacket(self,
SerializeSource<Endpoint>(Endpoint(self->localAddresses, destination.token)),
Endpoint(destination.addresses, WLTOKEN_ENDPOINT_NOT_FOUND), false, true);
if (self->isLocalAddress(destination.getPrimaryAddress())) {
sendLocal(self, SerializeSource<Endpoint>(Endpoint(self->localAddresses, destination.token)), Endpoint(destination.addresses, WLTOKEN_ENDPOINT_NOT_FOUND));
} else {
Reference<Peer> peer = self->getPeer(destination.getPrimaryAddress());
sendPacket(self, peer, SerializeSource<Endpoint>(Endpoint(self->localAddresses, destination.token)), Endpoint(destination.addresses, WLTOKEN_ENDPOINT_NOT_FOUND), false);
}
}
}
@ -1013,7 +996,7 @@ Reference<Peer> TransportData::getPeer( NetworkAddress const& address, bool open
return Reference<Peer>();
}
Reference<Peer> newPeer = Reference<Peer>( new Peer(this, address) );
newPeer->connect = Peer::connectionKeeper(newPeer);
newPeer->connect = connectionKeeper(newPeer);
peers[address] = newPeer;
return newPeer;
}
@ -1113,7 +1096,7 @@ void FlowTransport::removePeerReference(const Endpoint& endpoint, bool isStream)
.detail("Address", endpoint.getPrimaryAddress())
.detail("Token", endpoint.token);
}
if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty()) {
if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0) {
peer->resetPing.trigger();
}
}
@ -1143,137 +1126,143 @@ void FlowTransport::addWellKnownEndpoint( Endpoint& endpoint, NetworkMessageRece
ASSERT( endpoint.token == otoken );
}
static PacketID sendPacket( TransportData* self, ISerializeSource const& what, const Endpoint& destination, bool reliable, bool openConnection ) {
if (self->isLocalAddress(destination.getPrimaryAddress())) {
TEST(true); // "Loopback" delivery
// SOMEDAY: Would it be better to avoid (de)serialization by doing this check in flow?
static void sendLocal( TransportData* self, ISerializeSource const& what, const Endpoint& destination ) {
TEST(true); // "Loopback" delivery
// SOMEDAY: Would it be better to avoid (de)serialization by doing this check in flow?
Standalone<StringRef> copy;
if (FLOW_KNOBS->USE_OBJECT_SERIALIZER) {
ObjectWriter wr(AssumeVersion(currentProtocolVersion));
what.serializeObjectWriter(wr);
copy = wr.toStringRef();
} else {
BinaryWriter wr( AssumeVersion(currentProtocolVersion) );
what.serializeBinaryWriter(wr);
copy = wr.toValue();
}
Standalone<StringRef> copy;
if (FLOW_KNOBS->USE_OBJECT_SERIALIZER) {
ObjectWriter wr(AssumeVersion(currentProtocolVersion));
what.serializeObjectWriter(wr);
copy = wr.toStringRef();
} else {
BinaryWriter wr( AssumeVersion(currentProtocolVersion) );
what.serializeBinaryWriter(wr);
copy = wr.toValue();
}
#if VALGRIND
VALGRIND_CHECK_MEM_IS_DEFINED(copy.begin(), copy.size());
#endif
ASSERT(copy.size() > 0);
deliver(self, destination, ArenaReader(copy.arena(), copy, AssumeVersion(currentProtocolVersion)), false);
ASSERT(copy.size() > 0);
deliver(self, destination, ArenaReader(copy.arena(), copy, AssumeVersion(currentProtocolVersion)), false);
}
return (PacketID)nullptr;
} else {
const bool checksumEnabled = !destination.getPrimaryAddress().isTLS();
++self->countPacketsGenerated;
static ReliablePacket* sendPacket( TransportData* self, Reference<Peer> peer, ISerializeSource const& what, const Endpoint& destination, bool reliable ) {
const bool checksumEnabled = !destination.getPrimaryAddress().isTLS();
++self->countPacketsGenerated;
Reference<Peer> peer = self->getPeer(destination.getPrimaryAddress(), openConnection);
// If there isn't an open connection, a public address, or the peer isn't compatible, we can't send
if (!peer || (peer->outgoingConnectionIdle && !destination.getPrimaryAddress().isPublic()) || (peer->incompatibleProtocolVersionNewer && destination.token != WLTOKEN_PING_PACKET)) {
TEST(true); // Can't send to private address without a compatible open connection
return nullptr;
}
// If there isn't an open connection, a public address, or the peer isn't compatible, we can't send
if (!peer || (peer->outgoingConnectionIdle && !destination.getPrimaryAddress().isPublic()) || (peer->incompatibleProtocolVersionNewer && destination.token != WLTOKEN_PING_PACKET)) {
TEST(true); // Can't send to private address without a compatible open connection
return (PacketID)nullptr;
bool firstUnsent = peer->unsent.empty();
PacketBuffer* pb = peer->unsent.getWriteBuffer();
ReliablePacket* rp = reliable ? new ReliablePacket : 0;
int prevBytesWritten = pb->bytes_written;
PacketBuffer* checksumPb = pb;
PacketWriter wr(pb,rp,AssumeVersion(currentProtocolVersion)); // SOMEDAY: Can we downgrade to talk to older peers?
// Reserve some space for packet length and checksum, write them after serializing data
SplitBuffer packetInfoBuffer;
uint32_t len, checksum = 0;
int packetInfoSize = sizeof(len);
if (checksumEnabled) {
packetInfoSize += sizeof(checksum);
}
wr.writeAhead(packetInfoSize , &packetInfoBuffer);
wr << destination.token;
what.serializePacketWriter(wr, FLOW_KNOBS->USE_OBJECT_SERIALIZER);
pb = wr.finish();
len = wr.size() - packetInfoSize;
if (checksumEnabled) {
// Find the correct place to start calculating checksum
uint32_t checksumUnprocessedLength = len;
prevBytesWritten += packetInfoSize;
if (prevBytesWritten >= checksumPb->bytes_written) {
prevBytesWritten -= checksumPb->bytes_written;
checksumPb = checksumPb->nextPacketBuffer();
}
bool firstUnsent = peer->unsent.empty();
PacketBuffer* pb = peer->unsent.getWriteBuffer();
ReliablePacket* rp = reliable ? new ReliablePacket : 0;
int prevBytesWritten = pb->bytes_written;
PacketBuffer* checksumPb = pb;
PacketWriter wr(pb,rp,AssumeVersion(currentProtocolVersion)); // SOMEDAY: Can we downgrade to talk to older peers?
// Reserve some space for packet length and checksum, write them after serializing data
SplitBuffer packetInfoBuffer;
uint32_t len, checksum = 0;
int packetInfoSize = sizeof(len);
if (checksumEnabled) {
packetInfoSize += sizeof(checksum);
// Checksum calculation
while (checksumUnprocessedLength > 0) {
uint32_t processLength =
std::min(checksumUnprocessedLength, (uint32_t)(checksumPb->bytes_written - prevBytesWritten));
checksum = crc32c_append(checksum, checksumPb->data() + prevBytesWritten, processLength);
checksumUnprocessedLength -= processLength;
checksumPb = checksumPb->nextPacketBuffer();
prevBytesWritten = 0;
}
}
wr.writeAhead(packetInfoSize , &packetInfoBuffer);
wr << destination.token;
what.serializePacketWriter(wr, FLOW_KNOBS->USE_OBJECT_SERIALIZER);
pb = wr.finish();
len = wr.size() - packetInfoSize;
// Write packet length and checksum into packet buffer
packetInfoBuffer.write(&len, sizeof(len));
if (checksumEnabled) {
packetInfoBuffer.write(&checksum, sizeof(checksum), sizeof(len));
}
if (checksumEnabled) {
// Find the correct place to start calculating checksum
uint32_t checksumUnprocessedLength = len;
prevBytesWritten += packetInfoSize;
if (prevBytesWritten >= checksumPb->bytes_written) {
prevBytesWritten -= checksumPb->bytes_written;
checksumPb = checksumPb->nextPacketBuffer();
}
if (len > FLOW_KNOBS->PACKET_LIMIT) {
TraceEvent(SevError, "Net2_PacketLimitExceeded").detail("ToPeer", destination.getPrimaryAddress()).detail("Length", (int)len);
// throw platform_error(); // FIXME: How to recover from this situation?
}
else if (len > FLOW_KNOBS->PACKET_WARNING) {
TraceEvent(self->warnAlwaysForLargePacket ? SevWarnAlways : SevWarn, "Net2_LargePacket")
.suppressFor(1.0)
.detail("ToPeer", destination.getPrimaryAddress())
.detail("Length", (int)len)
.detail("Token", destination.token)
.backtrace();
// Checksum calculation
while (checksumUnprocessedLength > 0) {
uint32_t processLength =
std::min(checksumUnprocessedLength, (uint32_t)(checksumPb->bytes_written - prevBytesWritten));
checksum = crc32c_append(checksum, checksumPb->data() + prevBytesWritten, processLength);
checksumUnprocessedLength -= processLength;
checksumPb = checksumPb->nextPacketBuffer();
prevBytesWritten = 0;
}
}
// Write packet length and checksum into packet buffer
packetInfoBuffer.write(&len, sizeof(len));
if (checksumEnabled) {
packetInfoBuffer.write(&checksum, sizeof(checksum), sizeof(len));
}
if (len > FLOW_KNOBS->PACKET_LIMIT) {
TraceEvent(SevError, "Net2_PacketLimitExceeded").detail("ToPeer", destination.getPrimaryAddress()).detail("Length", (int)len);
// throw platform_error(); // FIXME: How to recover from this situation?
}
else if (len > FLOW_KNOBS->PACKET_WARNING) {
TraceEvent(self->warnAlwaysForLargePacket ? SevWarnAlways : SevWarn, "Net2_LargePacket")
.suppressFor(1.0)
.detail("ToPeer", destination.getPrimaryAddress())
.detail("Length", (int)len)
.detail("Token", destination.token)
.backtrace();
if(g_network->isSimulated())
self->warnAlwaysForLargePacket = false;
}
if(g_network->isSimulated())
self->warnAlwaysForLargePacket = false;
}
#if VALGRIND
SendBuffer *checkbuf = pb;
while (checkbuf) {
int size = checkbuf->bytes_written;
const uint8_t* data = checkbuf->data;
VALGRIND_CHECK_MEM_IS_DEFINED(data, size);
checkbuf = checkbuf -> next;
}
SendBuffer *checkbuf = pb;
while (checkbuf) {
int size = checkbuf->bytes_written;
const uint8_t* data = checkbuf->data;
VALGRIND_CHECK_MEM_IS_DEFINED(data, size);
checkbuf = checkbuf -> next;
}
#endif
peer->send(pb, rp, firstUnsent);
if (destination.token != WLTOKEN_PING_PACKET) {
peer->lastDataPacketSentTime = now();
}
return (PacketID)rp;
peer->send(pb, rp, firstUnsent);
if (destination.token != WLTOKEN_PING_PACKET) {
peer->lastDataPacketSentTime = now();
}
return rp;
}
PacketID FlowTransport::sendReliable( ISerializeSource const& what, const Endpoint& destination ) {
return sendPacket( self, what, destination, true, true );
ReliablePacket* FlowTransport::sendReliable( ISerializeSource const& what, const Endpoint& destination ) {
if (self->isLocalAddress(destination.getPrimaryAddress())) {
sendLocal( self, what, destination );
return nullptr;
}
Reference<Peer> peer = self->getPeer(destination.getPrimaryAddress());
return sendPacket( self, peer, what, destination, true );
}
void FlowTransport::cancelReliable( PacketID pid ) {
ReliablePacket* p = (ReliablePacket*)pid;
void FlowTransport::cancelReliable( ReliablePacket* p ) {
if (p) p->remove();
// SOMEDAY: Call reliable.compact() if a lot of memory is wasted in PacketBuffers by formerly reliable packets mixed with a few reliable ones. Don't forget to delref the new PacketBuffers since they are unsent.
}
void FlowTransport::sendUnreliable( ISerializeSource const& what, const Endpoint& destination, bool openConnection ) {
sendPacket( self, what, destination, false, openConnection );
Reference<Peer> FlowTransport::sendUnreliable( ISerializeSource const& what, const Endpoint& destination, bool openConnection ) {
if (self->isLocalAddress(destination.getPrimaryAddress())) {
sendLocal( self, what, destination );
return Reference<Peer>();
}
Reference<Peer> peer = self->getPeer(destination.getPrimaryAddress(), openConnection);
sendPacket( self, peer, what, destination, false );
return peer;
}
int FlowTransport::getEndpointCount() {

View File

@ -26,6 +26,7 @@
#include "flow/genericactors.actor.h"
#include "flow/network.h"
#include "flow/FileIdentifier.h"
#include "flow/Net2Packet.h"
#pragma pack(push, 4)
class Endpoint {
@ -103,7 +104,39 @@ public:
virtual bool isStream() const { return false; }
};
typedef struct NetworkPacket* PacketID;
struct TransportData;
struct Peer : public ReferenceCounted<Peer> {
TransportData* transport;
NetworkAddress destination;
UnsentPacketQueue unsent;
ReliablePacketList reliable;
AsyncTrigger dataToSend; // Triggered when unsent.empty() becomes false
Future<Void> connect;
AsyncTrigger resetPing;
bool compatible;
bool outgoingConnectionIdle; // We don't actually have a connection open and aren't trying to open one because we don't have anything to send
double lastConnectTime;
double reconnectionDelay;
int peerReferences;
bool incompatibleProtocolVersionNewer;
int64_t bytesReceived;
double lastDataPacketSentTime;
int outstandingReplies;
explicit Peer(TransportData* transport, NetworkAddress const& destination)
: transport(transport), destination(destination), outgoingConnectionIdle(false), lastConnectTime(0.0),
reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), compatible(true), outstandingReplies(0),
incompatibleProtocolVersionNewer(false), peerReferences(-1), bytesReceived(0), lastDataPacketSentTime(now()) {}
void send(PacketBuffer* pb, ReliablePacket* rp, bool firstUnsent);
void prependConnectPacket();
void discardUnreliablePackets();
void onIncomingConnection( Reference<Peer> self, Reference<IConnection> conn, Future<Void> reader );
};
class FlowTransport {
public:
@ -148,19 +181,19 @@ public:
// Sets endpoint to a new local endpoint (without changing its token) which delivers messages to the given receiver
// Implementations may have limitations on when this function is called and what endpoint.token may be!
PacketID sendReliable( ISerializeSource const& what, const Endpoint& destination );
ReliablePacket* sendReliable( ISerializeSource const& what, const Endpoint& destination );
// sendReliable will keep trying to deliver the data to the destination until cancelReliable is
// called. It will retry sending if the connection is closed or the failure manager reports
// the destination become available (edge triggered).
void cancelReliable( PacketID );
// Makes PacketID "unreliable" (either the data or a connection close event will be delivered
void cancelReliable( ReliablePacket* );
// Makes Packet "unreliable" (either the data or a connection close event will be delivered
// eventually). It can still be used safely to send a reply to a "reliable" request.
Reference<AsyncVar<bool>> getDegraded();
// This async var will be set to true when the process cannot connect to a public network address that the failure monitor thinks is healthy.
void sendUnreliable( ISerializeSource const& what, const Endpoint& destination, bool openConnection = true );// { cancelReliable(sendReliable(what,destination)); }
Reference<Peer> sendUnreliable( ISerializeSource const& what, const Endpoint& destination, bool openConnection );// { cancelReliable(sendReliable(what,destination)); }
int getEndpointCount();
// for tracing only

View File

@ -202,8 +202,10 @@ Future< REPLY_TYPE(Request) > loadBalance(
double nextMetric = 1e9;
double bestTime = 1e9;
double nextTime = 1e9;
int badServers = 0;
for(int i=0; i<alternatives->size(); i++) {
if(bestMetric < 1e8 && i == alternatives->countBest()) {
if(badServers < std::min(i, FLOW_KNOBS->LOAD_BALANCE_MAX_BAD_OPTIONS + 1) && i == alternatives->countBest()) {
break;
}
@ -213,6 +215,9 @@ Future< REPLY_TYPE(Request) > loadBalance(
if(now() > qd.failedUntil) {
double thisMetric = qd.smoothOutstanding.smoothTotal();
double thisTime = qd.latency;
if(FLOW_KNOBS->LOAD_BALANCE_PENALTY_IS_BAD && qd.penalty > 1.001) {
++badServers;
}
if(thisMetric < bestMetric) {
if(i != bestAlt) {
@ -228,7 +233,11 @@ Future< REPLY_TYPE(Request) > loadBalance(
nextMetric = thisMetric;
nextTime = thisTime;
}
} else {
++badServers;
}
} else {
++badServers;
}
}
if( nextMetric > 1e8 ) {

View File

@ -265,7 +265,7 @@ public:
void send(const T& value) const {
if (queue->isRemoteEndpoint()) {
FlowTransport::transport().sendUnreliable(SerializeSource<T>(value), getEndpoint());
FlowTransport::transport().sendUnreliable(SerializeSource<T>(value), getEndpoint(), true);
}
else
queue->send(value);
@ -317,9 +317,9 @@ public:
if (disc.isReady()) {
return ErrorOr<REPLY_TYPE(X)>(request_maybe_delivered());
}
FlowTransport::transport().sendUnreliable(SerializeSource<T>(value), getEndpoint(taskID));
Reference<Peer> peer = FlowTransport::transport().sendUnreliable(SerializeSource<T>(value), getEndpoint(taskID), true);
auto& p = getReplyPromise(value);
return waitValueOrSignal(p.getFuture(), disc, getEndpoint(taskID), p);
return waitValueOrSignal(p.getFuture(), disc, getEndpoint(taskID), p, peer);
}
send(value);
auto& p = getReplyPromise(value);
@ -333,9 +333,9 @@ public:
if (disc.isReady()) {
return ErrorOr<REPLY_TYPE(X)>(request_maybe_delivered());
}
FlowTransport::transport().sendUnreliable(SerializeSource<T>(value), getEndpoint());
Reference<Peer> peer = FlowTransport::transport().sendUnreliable(SerializeSource<T>(value), getEndpoint(), true);
auto& p = getReplyPromise(value);
return waitValueOrSignal(p.getFuture(), disc, getEndpoint(), p);
return waitValueOrSignal(p.getFuture(), disc, getEndpoint(), p, peer);
}
else {
send(value);

View File

@ -152,9 +152,24 @@ ACTOR template <class T> Future<Void> incrementalBroadcast( Future<T> input, std
// Needed for the call to endpointNotFound()
#include "fdbrpc/FailureMonitor.h"
struct PeerHolder {
Reference<Peer> peer;
explicit PeerHolder(Reference<Peer> peer) : peer(peer) {
if(peer) {
peer->outstandingReplies++;
}
}
~PeerHolder() {
if(peer) {
peer->outstandingReplies--;
}
}
};
// Implements tryGetReply, getReplyUnlessFailedFor
ACTOR template <class X>
Future<ErrorOr<X>> waitValueOrSignal( Future<X> value, Future<Void> signal, Endpoint endpoint, ReplyPromise<X> holdme = ReplyPromise<X>() ) {
Future<ErrorOr<X>> waitValueOrSignal( Future<X> value, Future<Void> signal, Endpoint endpoint, ReplyPromise<X> holdme = ReplyPromise<X>(), Reference<Peer> peer = Reference<Peer>() ) {
state PeerHolder holder = PeerHolder(peer);
loop {
try {
choose {
@ -185,7 +200,7 @@ Future<ErrorOr<X>> waitValueOrSignal( Future<X> value, Future<Void> signal, Endp
}
ACTOR template <class T>
Future<T> sendCanceler( ReplyPromise<T> reply, PacketID send, Endpoint endpoint ) {
Future<T> sendCanceler( ReplyPromise<T> reply, ReliablePacket* send, Endpoint endpoint ) {
try {
T t = wait( reply.getFuture() );
FlowTransport::transport().cancelReliable(send);

View File

@ -35,7 +35,7 @@ void networkSender(Future<T> input, Endpoint endpoint) {
try {
T value = wait(input);
if (FLOW_KNOBS->USE_OBJECT_SERIALIZER) {
FlowTransport::transport().sendUnreliable(SerializeSource<ErrorOr<EnsureTable<T>>>(value), endpoint);
FlowTransport::transport().sendUnreliable(SerializeSource<ErrorOr<EnsureTable<T>>>(value), endpoint, false);
} else {
FlowTransport::transport().sendUnreliable(SerializeBoolAnd<T>(true, value), endpoint, false);
}
@ -43,7 +43,7 @@ void networkSender(Future<T> input, Endpoint endpoint) {
// if (err.code() == error_code_broken_promise) return;
ASSERT(err.code() != error_code_actor_cancelled);
if (FLOW_KNOBS->USE_OBJECT_SERIALIZER) {
FlowTransport::transport().sendUnreliable(SerializeSource<ErrorOr<EnsureTable<T>>>(err), endpoint);
FlowTransport::transport().sendUnreliable(SerializeSource<ErrorOr<EnsureTable<T>>>(err), endpoint, false);
} else {
FlowTransport::transport().sendUnreliable(SerializeBoolAnd<Error>(false, err), endpoint, false);
}

View File

@ -369,7 +369,13 @@ private:
g_simulator.lastConnectionFailure = now();
double a = deterministicRandom()->random01(), b = deterministicRandom()->random01();
TEST(true); // Simulated connection failure
TraceEvent("ConnectionFailure", dbgid).detail("MyAddr", process->address).detail("PeerAddr", peerProcess->address).detail("SendClosed", a > .33).detail("RecvClosed", a < .66).detail("Explicit", b < .3);
TraceEvent("ConnectionFailure", dbgid)
.detail("MyAddr", process->address)
.detail("PeerAddr", peerProcess->address)
.detail("PeerIsValid", peer.isValid())
.detail("SendClosed", a > .33)
.detail("RecvClosed", a < .66)
.detail("Explicit", b < .3);
if (a < .66 && peer) peer->closeInternal();
if (a > .33) closeInternal();
// At the moment, we occasionally notice the connection failed immediately. In principle, this could happen but only after a delay.
@ -381,7 +387,8 @@ private:
ACTOR static Future<Void> trackLeakedConnection( Sim2Conn* self ) {
wait( g_simulator.onProcess( self->process ) );
if (self->process->address.isPublic()) {
wait( delay( FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT * FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT * 1.5 ) );
wait(delay(FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT * FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT * 1.5 +
FLOW_KNOBS->CONNECTION_MONITOR_LOOP_TIME * 2.1 + FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT));
} else {
wait( delay( FLOW_KNOBS->CONNECTION_MONITOR_IDLE_TIMEOUT * 1.5 ) );
}

View File

@ -302,7 +302,8 @@ ACTOR Future<Void> leaderRegister(LeaderElectionRegInterface interf, Key key) {
//TODO: use notify to only send a heartbeat once per interval
availableLeaders.erase( LeaderInfo(req.prevChangeID) );
availableLeaders.insert( req.myInfo );
req.reply.send( currentNominee.present() && currentNominee.get().equalInternalId(req.myInfo) );
req.reply.send(
LeaderHeartbeatReply{ currentNominee.present() && currentNominee.get().equalInternalId(req.myInfo) });
}
when (ForwardRequest req = waitNext( interf.forward.getFuture() ) ) {
LeaderInfo newInfo;
@ -499,7 +500,7 @@ ACTOR Future<Void> leaderServer(LeaderElectionRegInterface interf, OnDemandStore
when ( LeaderHeartbeatRequest req = waitNext( interf.leaderHeartbeat.getFuture() ) ) {
Optional<LeaderInfo> forward = regs.getForward(req.key);
if( forward.present() )
req.reply.send( false );
req.reply.send(LeaderHeartbeatReply{ false });
else
regs.getInterface(req.key, id).leaderHeartbeat.send(req);
}

View File

@ -136,12 +136,29 @@ struct CandidacyRequest {
}
};
struct LeaderHeartbeatReply {
constexpr static FileIdentifier file_identifier = 11;
bool value = false;
LeaderHeartbeatReply() = default;
explicit LeaderHeartbeatReply(bool value) : value(value) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, value);
}
};
inline bool operator==(const LeaderHeartbeatReply& lhs, const LeaderHeartbeatReply& rhs) {
return lhs.value == rhs.value;
}
struct LeaderHeartbeatRequest {
constexpr static FileIdentifier file_identifier = 9495992;
Key key;
LeaderInfo myInfo;
UID prevChangeID;
ReplyPromise<bool> reply;
ReplyPromise<LeaderHeartbeatReply> reply;
LeaderHeartbeatRequest() {}
explicit LeaderHeartbeatRequest( Key key, LeaderInfo const& myInfo, UID prevChangeID ) : key(key), myInfo(myInfo), prevChangeID(prevChangeID) {}

View File

@ -3008,7 +3008,7 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
}
} catch(Error& e) {
if(logTeamEvents) {
TraceEvent("TeamTrackerStopping", self->distributorId).detail("Team", team->getDesc());
TraceEvent("TeamTrackerStopping", self->distributorId).detail("Team", team->getDesc()).detail("Priority", team->getPriority());
}
self->priority_teams[team->getPriority()]--;
if (team->isHealthy()) {
@ -4347,7 +4347,7 @@ ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<As
std::vector<Future<Void>> disablePops;
for (const auto & tlog : tlogs) {
disablePops.push_back(
transformErrors(throwErrorOr(tlog.disablePopRequest.tryGetReply(TLogDisablePopRequest(snapReq.snapUID))), operation_failed())
transformErrors(throwErrorOr(tlog.disablePopRequest.tryGetReply(TLogDisablePopRequest(snapReq.snapUID))), snap_disable_tlog_pop_failed())
);
}
wait(waitForAll(disablePops));
@ -4356,14 +4356,14 @@ ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<As
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
// snap local storage nodes
std::vector<WorkerInterface> storageWorkers = wait(getStorageWorkers(cx, db, true /* localOnly */));
std::vector<WorkerInterface> storageWorkers = wait(transformErrors(getStorageWorkers(cx, db, true /* localOnly */), snap_storage_failed()));
TraceEvent("SnapDataDistributor_GotStorageWorkers")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
std::vector<Future<Void>> storageSnapReqs;
for (const auto & worker : storageWorkers) {
storageSnapReqs.push_back(
transformErrors(throwErrorOr(worker.workerSnapReq.tryGetReply(WorkerSnapRequest(snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("storage")))), operation_failed())
transformErrors(throwErrorOr(worker.workerSnapReq.tryGetReply(WorkerSnapRequest(snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("storage")))), snap_storage_failed())
);
}
wait(waitForAll(storageSnapReqs));
@ -4375,7 +4375,7 @@ ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<As
std::vector<Future<Void>> tLogSnapReqs;
for (const auto & tlog : tlogs) {
tLogSnapReqs.push_back(
transformErrors(throwErrorOr(tlog.snapRequest.tryGetReply(TLogSnapRequest(snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("tlog")))), operation_failed())
transformErrors(throwErrorOr(tlog.snapRequest.tryGetReply(TLogSnapRequest(snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("tlog")))), snap_tlog_failed())
);
}
wait(waitForAll(tLogSnapReqs));
@ -4387,7 +4387,7 @@ ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<As
std::vector<Future<Void>> enablePops;
for (const auto & tlog : tlogs) {
enablePops.push_back(
transformErrors(throwErrorOr(tlog.enablePopRequest.tryGetReply(TLogEnablePopRequest(snapReq.snapUID))), operation_failed())
transformErrors(throwErrorOr(tlog.enablePopRequest.tryGetReply(TLogEnablePopRequest(snapReq.snapUID))), snap_enable_tlog_pop_failed())
);
}
wait(waitForAll(enablePops));
@ -4403,18 +4403,36 @@ ACTOR Future<Void> ddSnapCreateCore(DistributorSnapRequest snapReq, Reference<As
std::vector<Future<Void>> coordSnapReqs;
for (const auto & worker : coordWorkers) {
coordSnapReqs.push_back(
transformErrors(throwErrorOr(worker.workerSnapReq.tryGetReply(WorkerSnapRequest(snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("coord")))), operation_failed())
transformErrors(throwErrorOr(worker.workerSnapReq.tryGetReply(WorkerSnapRequest(snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("coord")))), snap_coord_failed())
);
}
wait(waitForAll(coordSnapReqs));
TraceEvent("SnapDataDistributor_AfterSnapCoords")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
} catch (Error& e) {
} catch (Error& err) {
state Error e = err;
TraceEvent("SnapDataDistributor_SnapReqExit")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID)
.error(e, true /*includeCancelled */);
if (e.code() == error_code_snap_storage_failed
|| e.code() == error_code_snap_tlog_failed
|| e.code() == error_code_operation_cancelled) {
// enable tlog pop on local tlog nodes
std::vector<TLogInterface> tlogs = db->get().logSystemConfig.allLocalLogs(false);
try {
std::vector<Future<Void>> enablePops;
for (const auto & tlog : tlogs) {
enablePops.push_back(
transformErrors(throwErrorOr(tlog.enablePopRequest.tryGetReply(TLogEnablePopRequest(snapReq.snapUID))), snap_enable_tlog_pop_failed())
);
}
wait(waitForAll(enablePops));
} catch (Error& error) {
TraceEvent(SevDebug, "IgnoreEnableTLogPopFailure");
}
}
throw e;
}
return Void();
@ -4435,7 +4453,7 @@ ACTOR Future<Void> ddSnapCreate(DistributorSnapRequest snapReq, Reference<AsyncV
TraceEvent("SnapDDCreateDBInfoChanged")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
snapReq.reply.sendError(operation_failed());
snapReq.reply.sendError(snap_with_recovery_unsupported());
}
when (wait(ddSnapCreateCore(snapReq, db))) {
TraceEvent("SnapDDCreateSuccess")

View File

@ -888,6 +888,8 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
state bool allHealthy = true;
state bool anyWithSource = false;
state std::vector<std::pair<Reference<IDataDistributionTeam>,bool>> bestTeams;
state double startTime = now();
state std::vector<UID> destIds;
try {
if(now() - self->lastInterval < 1.0) {
@ -964,7 +966,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
wait( delay( SERVER_KNOBS->BEST_TEAM_STUCK_DELAY, TaskPriority::DataDistributionLaunch ) );
}
state std::vector<UID> destIds;
destIds.clear();
state std::vector<UID> healthyIds;
state std::vector<UID> extraIds;
state std::vector<ShardsAffectedByTeamFailure::Team> destinationTeams;
@ -1075,7 +1077,10 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
// onFinished.send( rs );
if( !error.code() ) {
TraceEvent(relocateShardInterval.end(), distributorId).detail("Result","Success");
TraceEvent(relocateShardInterval.end(), distributorId).detail("Duration", now() - startTime).detail("Result","Success");
if(now() - startTime > 600) {
TraceEvent(SevWarnAlways, "RelocateShardTooLong").detail("Duration", now() - startTime).detail("Dest", describe(destIds)).detail("Src", describe(rd.src));
}
if(rd.keys.begin == keyServersPrefix) {
TraceEvent("MovedKeyServerKeys").detail("Dest", describe(destIds)).trackLatest("MovedKeyServers");
}
@ -1099,7 +1104,10 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
}
}
} catch (Error& e) {
TraceEvent(relocateShardInterval.end(), distributorId).error(e, true);
TraceEvent(relocateShardInterval.end(), distributorId).error(e, true).detail("Duration", now() - startTime);
if(now() - startTime > 600) {
TraceEvent(SevWarnAlways, "RelocateShardTooLong").error(e, true).detail("Duration", now() - startTime).detail("Dest", describe(destIds)).detail("Src", describe(rd.src));
}
if( !signalledTransferComplete )
dataTransferComplete.send( rd );

View File

@ -21,7 +21,6 @@ ExecCmdValueString::ExecCmdValueString(StringRef pCmdValueString) {
void ExecCmdValueString::setCmdValueString(StringRef pCmdValueString) {
// reset everything
binaryPath = StringRef();
keyValueMap.clear();
// set the new cmdValueString
cmdValueString = pCmdValueString;
@ -42,18 +41,10 @@ VectorRef<StringRef> ExecCmdValueString::getBinaryArgs() {
return binaryArgs;
}
StringRef ExecCmdValueString::getBinaryArgValue(StringRef key) {
StringRef res;
if (keyValueMap.find(key) != keyValueMap.end()) {
res = keyValueMap[key];
}
return res;
}
void ExecCmdValueString::parseCmdValue() {
StringRef param = this->cmdValueString;
// get the binary path
this->binaryPath = param.eat(LiteralStringRef(":"));
this->binaryPath = param.eat(LiteralStringRef(" "));
// no arguments provided
if (param == StringRef()) {
@ -62,11 +53,8 @@ void ExecCmdValueString::parseCmdValue() {
// extract the arguments
while (param != StringRef()) {
StringRef token = param.eat(LiteralStringRef(","));
StringRef token = param.eat(LiteralStringRef(" "));
this->binaryArgs.push_back(this->binaryArgs.arena(), token);
StringRef key = token.eat(LiteralStringRef("="));
keyValueMap.insert(std::make_pair(key, token));
}
return;
}
@ -153,29 +141,30 @@ ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> par
}
#endif
ACTOR Future<int> execHelper(ExecCmdValueString* execArg, std::string folder, std::string role) {
state StringRef uidStr = execArg->getBinaryArgValue(LiteralStringRef("uid"));
ACTOR Future<int> execHelper(ExecCmdValueString* execArg, UID snapUID, std::string folder, std::string role) {
state Standalone<StringRef> uidStr = snapUID.toString();
state int err = 0;
state Future<int> cmdErr;
state double maxWaitTime = SERVER_KNOBS->SNAP_CREATE_MAX_TIMEOUT;
if (!g_network->isSimulated()) {
// get bin path
auto snapBin = execArg->getBinaryPath();
auto dataFolder = "path=" + folder;
std::vector<std::string> paramList;
paramList.push_back(snapBin.toString());
// get user passed arguments
auto listArgs = execArg->getBinaryArgs();
for (auto elem : listArgs) {
paramList.push_back(elem.toString());
}
// get additional arguments
paramList.push_back(dataFolder);
paramList.push_back("--path");
paramList.push_back(folder);
const char* version = FDB_VT_VERSION;
std::string versionString = "version=";
versionString += version;
paramList.push_back(versionString);
paramList.push_back("--version");
paramList.push_back(version);
paramList.push_back("--role");
paramList.push_back(role);
paramList.push_back("--uid");
paramList.push_back(uidStr.toString());
cmdErr = spawnProcess(snapBin.toString(), paramList, maxWaitTime, false /*isSync*/, 0);
wait(success(cmdErr));
err = cmdErr.get();

View File

@ -27,7 +27,6 @@ public: // ctor & dtor
public: // interfaces
StringRef getBinaryPath();
VectorRef<StringRef> getBinaryArgs();
StringRef getBinaryArgValue(StringRef key);
void setCmdValueString(StringRef cmdValueString);
StringRef getCmdValueString(void);
@ -41,7 +40,6 @@ private: // data
Standalone<StringRef> cmdValueString;
Standalone<VectorRef<StringRef>> binaryArgs;
StringRef binaryPath;
std::map<StringRef, StringRef> keyValueMap;
};
// FIXME: move this function to a common location
@ -52,7 +50,7 @@ private: // data
ACTOR Future<int> spawnProcess(std::string binPath, std::vector<std::string> paramList, double maxWaitTime, bool isSync, double maxSimDelayTime);
// helper to run all the work related to running the exec command
ACTOR Future<int> execHelper(ExecCmdValueString* execArg, std::string folder, std::string role);
ACTOR Future<int> execHelper(ExecCmdValueString* execArg, UID snapUID, std::string folder, std::string role);
// returns true if the execUID op is in progress
bool isExecOpInProgress(UID execUID);

View File

@ -177,7 +177,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( MAX_TEAMS_PER_SERVER, 5*DESIRED_TEAMS_PER_SERVER );
init( DD_SHARD_SIZE_GRANULARITY, 5000000 );
init( DD_SHARD_SIZE_GRANULARITY_SIM, 500000 ); if( randomize && BUGGIFY ) DD_SHARD_SIZE_GRANULARITY_SIM = 0;
init( DD_MOVE_KEYS_PARALLELISM, 20 ); if( randomize && BUGGIFY ) DD_MOVE_KEYS_PARALLELISM = 1;
init( DD_MOVE_KEYS_PARALLELISM, 15 ); if( randomize && BUGGIFY ) DD_MOVE_KEYS_PARALLELISM = 1;
init( DD_MERGE_LIMIT, 2000 ); if( randomize && BUGGIFY ) DD_MERGE_LIMIT = 2;
init( DD_SHARD_METRICS_TIMEOUT, 60.0 ); if( randomize && BUGGIFY ) DD_SHARD_METRICS_TIMEOUT = 0.1;
init( DD_LOCATION_CACHE_SIZE, 2000000 ); if( randomize && BUGGIFY ) DD_LOCATION_CACHE_SIZE = 3;
@ -352,8 +352,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( RATEKEEPER_FAILURE_TIME, 1.0 );
init( REPLACE_INTERFACE_DELAY, 60.0 );
init( REPLACE_INTERFACE_CHECK_DELAY, 5.0 );
init( COORDINATOR_REGISTER_INTERVAL, 30.0 );
init( CLIENT_REGISTER_INTERVAL, 300.0 );
init( COORDINATOR_REGISTER_INTERVAL, 5.0 );
init( CLIENT_REGISTER_INTERVAL, 600.0 );
init( INCOMPATIBLE_PEERS_LOGGING_INTERVAL, 600 ); if( randomize && BUGGIFY ) INCOMPATIBLE_PEERS_LOGGING_INTERVAL = 60.0;
init( EXPECTED_MASTER_FITNESS, ProcessClass::UnsetFit );
@ -420,8 +420,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( MAX_TPS_HISTORY_SAMPLES, 600 );
init( NEEDED_TPS_HISTORY_SAMPLES, 200 );
init( TARGET_DURABILITY_LAG_VERSIONS, 200e6 );
init( TARGET_DURABILITY_LAG_VERSIONS_BATCH, 100e6 );
init( TARGET_DURABILITY_LAG_VERSIONS, 350e6 ); // Should be larger than STORAGE_DURABILITY_LAG_SOFT_MAX
init( TARGET_DURABILITY_LAG_VERSIONS_BATCH, 250e6 ); // Should be larger than STORAGE_DURABILITY_LAG_SOFT_MAX
init( DURABILITY_LAG_UNLIMITED_THRESHOLD, 50e6 );
init( INITIAL_DURABILITY_LAG_MULTIPLIER, 1.02 );
init( DURABILITY_LAG_REDUCTION_RATE, 0.9999 );

View File

@ -183,9 +183,11 @@ ACTOR Future<Void> tryBecomeLeaderInternal(ServerCoordinators coordinators, Valu
state vector<Future<Void>> true_heartbeats;
state vector<Future<Void>> false_heartbeats;
for(int i=0; i<coordinators.leaderElectionServers.size(); i++) {
Future<bool> hb = retryBrokenPromise( coordinators.leaderElectionServers[i].leaderHeartbeat, LeaderHeartbeatRequest( coordinators.clusterKey, myInfo, prevChangeID ), TaskPriority::CoordinationReply );
true_heartbeats.push_back( onEqual(hb, true) );
false_heartbeats.push_back( onEqual(hb, false) );
Future<LeaderHeartbeatReply> hb = retryBrokenPromise(
coordinators.leaderElectionServers[i].leaderHeartbeat,
LeaderHeartbeatRequest(coordinators.clusterKey, myInfo, prevChangeID), TaskPriority::CoordinationReply);
true_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ true }));
false_heartbeats.push_back(onEqual(hb, LeaderHeartbeatReply{ false }));
}
state Future<Void> rate = delay( SERVER_KNOBS->HEARTBEAT_FREQUENCY, TaskPriority::CoordinationReply ) || asyncPriorityInfo->onChange(); // SOMEDAY: Move to server side?

View File

@ -55,10 +55,25 @@ struct MasterInterface {
}
};
struct TLogRejoinReply {
constexpr static FileIdentifier file_identifier = 11;
// false means someone else registered, so we should re-register. true means this master is recovered, so don't
// send again to the same master.
bool masterIsRecovered;
TLogRejoinReply() = default;
explicit TLogRejoinReply(bool masterIsRecovered) : masterIsRecovered(masterIsRecovered) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, masterIsRecovered);
}
};
struct TLogRejoinRequest {
constexpr static FileIdentifier file_identifier = 15692200;
TLogInterface myInterface;
ReplyPromise<bool> reply; // false means someone else registered, so we should re-register. true means this master is recovered, so don't send again to the same master.
ReplyPromise<TLogRejoinReply> reply;
TLogRejoinRequest() { }
explicit TLogRejoinRequest(const TLogInterface &interf) : myInterface(interf) { }

View File

@ -128,7 +128,7 @@ ACTOR Future<Void> getRate(UID myID, Reference<AsyncVar<ServerDBInfo>> db, int64
when ( wait( leaseTimeout ) ) {
*outTransactionRate = 0;
*outBatchTransactionRate = 0;
//TraceEvent("MasterProxyRate", myID).detail("Rate", 0).detail("BatchRate", 0).detail("Lease", "Expired");
//TraceEvent("MasterProxyRate", myID).detail("Rate", 0.0).detail("BatchRate", 0.0).detail("Lease", "Expired");
leaseTimeout = Never();
}
}
@ -156,10 +156,7 @@ ACTOR Future<Void> queueTransactionStartRequests(
stats->txnBatchPriorityStartIn += req.transactionCount;
if (transactionQueue->empty()) {
if (now() - *lastGRVTime > *GRVBatchTime)
*lastGRVTime = now() - *GRVBatchTime;
forwardPromise(GRVTimer, delayJittered(*GRVBatchTime - (now() - *lastGRVTime), TaskPriority::ProxyGRVTimer));
forwardPromise(GRVTimer, delayJittered(std::max(0.0, *GRVBatchTime - (now() - *lastGRVTime)), TaskPriority::ProxyGRVTimer));
}
transactionQueue->push(std::make_pair(req, counter--));
@ -1108,7 +1105,9 @@ struct TransactionRateInfo {
TransactionRateInfo(double rate) : rate(rate), limit(0) {}
void reset(double elapsed) {
limit = std::min(0.0,limit) + std::min(rate * elapsed, SERVER_KNOBS->START_TRANSACTION_MAX_TRANSACTIONS_TO_START);
limit = std::min(0.0, limit) + rate * elapsed; // Adjust the limit based on the full elapsed interval in order to properly erase a deficit
limit = std::min(limit, rate * SERVER_KNOBS->START_TRANSACTION_BATCH_INTERVAL_MAX); // Don't allow the rate to exceed what would be allowed in the maximum batch interval
limit = std::min(limit, SERVER_KNOBS->START_TRANSACTION_MAX_TRANSACTIONS_TO_START);
}
bool canStart(int64_t numAlreadyStarted) {
@ -1170,7 +1169,7 @@ ACTOR static Future<Void> transactionStarter(
waitNext(GRVTimer.getFuture());
// Select zero or more transactions to start
double t = now();
double elapsed = std::min<double>(now() - lastGRVTime, SERVER_KNOBS->START_TRANSACTION_BATCH_INTERVAL_MAX);
double elapsed = now() - lastGRVTime;
lastGRVTime = t;
if(elapsed == 0) elapsed = 1e-15; // resolve a possible indeterminant multiplication with infinite transaction rate
@ -1466,7 +1465,7 @@ ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co
TraceEvent("SnapMasterProxy_WhiteListCheckFailed")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
throw transaction_not_permitted();
throw snap_path_not_whitelisted();
}
// db fully recovered check
if (commitData->db->get().recoveryState != RecoveryState::FULLY_RECOVERED) {
@ -1478,7 +1477,7 @@ ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co
TraceEvent("SnapMasterProxy_ClusterNotFullyRecovered")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
throw cluster_not_fully_recovered();
throw snap_not_fully_recovered_unsupported();
}
auto result =
@ -1493,7 +1492,7 @@ ACTOR Future<Void> proxySnapCreate(ProxySnapRequest snapReq, ProxyCommitData* co
TraceEvent("SnapMasterProxy_LogAnitQuorumNotSupported")
.detail("SnapPayload", snapReq.snapPayload)
.detail("SnapUID", snapReq.snapUID);
throw txn_exec_log_anti_quorum();
throw snap_log_anti_quorum_unsupported();
}
// send a snap request to DD

View File

@ -139,8 +139,8 @@ Future<Void> checkMoveKeysLockReadOnly( Transaction* tr, MoveKeysLock lock ) {
return checkMoveKeysLock(tr, lock, false);
}
ACTOR Future<Optional<UID>> checkReadWrite( Future< ErrorOr<std::pair<Version,Version>> > fReply, UID uid, Version version ) {
ErrorOr<std::pair<Version,Version>> reply = wait( fReply );
ACTOR Future<Optional<UID>> checkReadWrite(Future<ErrorOr<GetShardStateReply>> fReply, UID uid, Version version) {
ErrorOr<GetShardStateReply> reply = wait(fReply);
if (!reply.present() || reply.get().first < version)
return Optional<UID>();
return Optional<UID>(uid);
@ -258,6 +258,14 @@ ACTOR Future<vector<vector<UID>>> additionalSources(Standalone<RangeResultRef> s
return result;
}
ACTOR Future<Void> logWarningAfter( const char * context, double duration, vector<UID> servers) {
state double startTime = now();
loop {
wait(delay(duration));
TraceEvent(SevWarnAlways, context).detail("Duration", now() - startTime).detail("Servers", describe(servers));
}
}
// keyServer: map from keys to destination servers
// serverKeys: two-dimension map: [servers][keys], value is the servers' state of having the keys: active(not-have), complete(already has), ""().
// Set keyServers[keys].dest = servers
@ -265,6 +273,7 @@ ACTOR Future<vector<vector<UID>>> additionalSources(Standalone<RangeResultRef> s
// Set serverKeys[dest][keys] = "" for the dest servers of each existing shard in keys (unless that destination is a member of servers OR if the source list is sufficiently degraded)
ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> servers, MoveKeysLock lock, FlowLock *startMoveKeysLock, UID relocationIntervalId ) {
state TraceInterval interval("RelocateShard_StartMoveKeys");
state Future<Void> warningLogger = logWarningAfter("StartMoveKeysTooLong", 600, servers);
//state TraceInterval waitInterval("");
wait( startMoveKeysLock->take( TaskPriority::DataDistributionLaunch ) );
@ -436,7 +445,8 @@ ACTOR Future<Void> startMoveKeys( Database occ, KeyRange keys, vector<UID> serve
ACTOR Future<Void> waitForShardReady( StorageServerInterface server, KeyRange keys, Version minVersion, GetShardStateRequest::waitMode mode ) {
loop {
try {
std::pair<Version,Version> rep = wait( server.getShardState.getReply( GetShardStateRequest(keys, mode), TaskPriority::MoveKeys ) );
GetShardStateReply rep =
wait(server.getShardState.getReply(GetShardStateRequest(keys, mode), TaskPriority::MoveKeys));
if (rep.first >= minVersion) {
return Void();
}
@ -502,6 +512,7 @@ ACTOR Future<Void> finishMoveKeys( Database occ, KeyRange keys, vector<UID> dest
{
state TraceInterval interval("RelocateShard_FinishMoveKeys");
state TraceInterval waitInterval("");
state Future<Void> warningLogger = logWarningAfter("FinishMoveKeysTooLong", 600, destinationTeam);
state Key begin = keys.begin;
state Key endKey;
state int retries = 0;

View File

@ -1119,11 +1119,11 @@ namespace oldTLog_4_6 {
req.myInterface = tli;
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id());
choose {
when ( bool success = wait( brokenPromiseToNever( self->dbInfo->get().master.tlogRejoin.getReply( req ) ) ) ) {
if (success)
lastMasterID = self->dbInfo->get().master.id();
}
when ( wait( self->dbInfo->onChange() ) ) { }
when(TLogRejoinReply rep =
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
if (rep.masterIsRecovered) lastMasterID = self->dbInfo->get().master.id();
}
when ( wait( self->dbInfo->onChange() ) ) { }
}
} else {
wait( self->dbInfo->onChange() );

View File

@ -1477,9 +1477,9 @@ ACTOR Future<Void> rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC
TLogRejoinRequest req(tli);
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id());
choose {
when ( bool success = wait( brokenPromiseToNever( self->dbInfo->get().master.tlogRejoin.getReply( req ) ) ) ) {
if (success)
lastMasterID = self->dbInfo->get().master.id();
when(TLogRejoinReply rep =
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
if (rep.masterIsRecovered) lastMasterID = self->dbInfo->get().master.id();
}
when ( wait( self->dbInfo->onChange() ) ) { }
}
@ -1556,8 +1556,7 @@ tLogSnapCreate(TLogSnapRequest snapReq, TLogData* self, Reference<LogData> logDa
}
ExecCmdValueString snapArg(snapReq.snapPayload);
try {
Standalone<StringRef> role = LiteralStringRef("role=").withSuffix(snapReq.role);
int err = wait(execHelper(&snapArg, self->dataFolder, role.toString()));
int err = wait(execHelper(&snapArg, snapReq.snapUID, self->dataFolder, snapReq.role.toString()));
std::string uidStr = snapReq.snapUID.toString();
TraceEvent("ExecTraceTLog")

View File

@ -93,7 +93,6 @@ struct StorageQueueInfo {
Smoother verySmoothDurableVersion, smoothLatestVersion;
Smoother smoothFreeSpace;
Smoother smoothTotalSpace;
double localRateLimit;
limitReason_t limitReason;
StorageQueueInfo(UID id, LocalityData locality) : valid(false), id(id), locality(locality), smoothDurableBytes(SERVER_KNOBS->SMOOTHING_AMOUNT),
smoothInputBytes(SERVER_KNOBS->SMOOTHING_AMOUNT), verySmoothDurableBytes(SERVER_KNOBS->SLOW_SMOOTHING_AMOUNT),
@ -147,7 +146,7 @@ struct RatekeeperLimits {
logTargetBytes(logTargetBytes),
logSpringBytes(logSpringBytes),
maxVersionDifference(maxVersionDifference),
durabilityLagTargetVersions(durabilityLagTargetVersions),
durabilityLagTargetVersions(durabilityLagTargetVersions + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS), // The read transaction life versions are expected to not be durable on the storage servers
durabilityLagLimit(std::numeric_limits<double>::infinity()),
lastDurabilityLag(0),
context(context)
@ -203,7 +202,6 @@ ACTOR Future<Void> trackStorageServerQueueInfo( RatekeeperData* self, StorageSer
myQueueInfo->value.valid = true;
myQueueInfo->value.prevReply = myQueueInfo->value.lastReply;
myQueueInfo->value.lastReply = reply.get();
myQueueInfo->value.localRateLimit = reply.get().localRateLimit;
if (myQueueInfo->value.prevReply.instanceID != reply.get().instanceID) {
myQueueInfo->value.smoothDurableBytes.reset(reply.get().bytesDurable);
myQueueInfo->value.verySmoothDurableBytes.reset(reply.get().bytesDurable);
@ -376,8 +374,6 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
int64_t worstStorageQueueStorageServer = 0;
int64_t limitingStorageQueueStorageServer = 0;
int64_t worstDurabilityLag = 0;
double worstStorageLocalLimit = 0;
double limitingStorageLocalLimit = 0;
std::multimap<double, StorageQueueInfo*> storageTpsLimitReverseIndex;
std::multimap<int64_t, StorageQueueInfo*> storageDurabilityLagReverseIndex;
@ -408,7 +404,6 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
int64_t storageQueue = ss.lastReply.bytesInput - ss.smoothDurableBytes.smoothTotal();
worstStorageQueueStorageServer = std::max(worstStorageQueueStorageServer, storageQueue);
worstStorageLocalLimit = std::min(worstStorageLocalLimit, ss.localRateLimit);
int64_t storageDurabilityLag = ss.smoothLatestVersion.smoothTotal() - ss.verySmoothDurableVersion.smoothTotal();
worstDurabilityLag = std::max(worstDurabilityLag, storageDurabilityLag);
@ -485,7 +480,6 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
}
limitingStorageQueueStorageServer = ss->second->lastReply.bytesInput - ss->second->smoothDurableBytes.smoothTotal();
limitingStorageLocalLimit = ss->second->lastReply.localRateLimit;
limits->tpsLimit = ss->first;
reasonID = storageTpsLimitReverseIndex.begin()->second->id; // Although we aren't controlling based on the worst SS, we still report it as the limiting process
limitReason = ssReasons[reasonID];
@ -679,14 +673,12 @@ void updateRate(RatekeeperData* self, RatekeeperLimits* limits) {
.detail("WorstFreeSpaceTLog", worstFreeSpaceTLog)
.detail("WorstStorageServerQueue", worstStorageQueueStorageServer)
.detail("LimitingStorageServerQueue", limitingStorageQueueStorageServer)
.detail("WorstStorageLocalLimit", worstStorageLocalLimit)
.detail("LimitingStorageLocalLimit", limitingStorageLocalLimit)
.detail("WorstTLogQueue", worstStorageQueueTLog)
.detail("TotalDiskUsageBytes", totalDiskUsageBytes)
.detail("WorstStorageServerVersionLag", worstVersionLag)
.detail("LimitingStorageServerVersionLag", limitingVersionLag)
.detail("WorstDurabilityLag", worstDurabilityLag)
.detail("LimitingDurabilityLag", limitingDurabilityLag)
.detail("WorstStorageServerDurabilityLag", worstDurabilityLag)
.detail("LimitingStorageServerDurabilityLag", limitingDurabilityLag)
.trackLatest(name.c_str());
}
}

View File

@ -103,9 +103,22 @@ struct ResolveTransactionBatchRequest {
}
};
struct ResolutionMetricsReply {
constexpr static FileIdentifier file_identifier = 3;
int64_t value;
ResolutionMetricsReply() = default;
explicit ResolutionMetricsReply(int64_t value) : value(value) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, value);
}
};
struct ResolutionMetricsRequest {
constexpr static FileIdentifier file_identifier = 11663527;
ReplyPromise<int64_t> reply;
ReplyPromise<ResolutionMetricsReply> reply;
template <class Archive>
void serialize(Archive& ar) {

View File

@ -388,6 +388,13 @@ static JsonBuilderObject machineStatusFetcher(WorkerEvents mMetrics, vector<Work
return machineMap;
}
JsonBuilderObject getLagObject(int64_t versions) {
JsonBuilderObject lag;
lag["versions"] = versions;
lag["seconds"] = versions / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
return lag;
}
struct MachineMemoryInfo {
double memoryUsage;
double numProcesses;
@ -474,17 +481,8 @@ struct RolesInfo {
obj["read_latency_bands"] = addLatencyBandInfo(readLatencyMetrics);
}
JsonBuilderObject dataLag;
dataLag["versions"] = versionLag;
dataLagSeconds = versionLag / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
dataLag["seconds"] = dataLagSeconds;
JsonBuilderObject durabilityLag;
durabilityLag["versions"] = version - durableVersion;
durabilityLag["seconds"] = (version - durableVersion) / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
obj["data_lag"] = dataLag;
obj["durability_lag"] = durabilityLag;
obj["data_lag"] = getLagObject(versionLag);
obj["durability_lag"] = getLagObject(version - durableVersion);
} catch (Error& e) {
if(e.code() != error_code_attribute_not_found)
@ -551,6 +549,11 @@ struct RolesInfo {
JsonBuilderObject& addRole(std::string const& role, InterfaceType& iface) {
return addRole(iface.address(), role, iface.id());
}
JsonBuilderObject& addCoordinatorRole(NetworkAddress addr) {
JsonBuilderObject obj;
obj["role"] = "coordinator";
return roles.insert(std::make_pair(addr, obj))->second;
}
JsonBuilderArray getStatusForAddress( NetworkAddress a ) {
JsonBuilderArray v;
auto it = roles.lower_bound(a);
@ -565,10 +568,11 @@ struct RolesInfo {
ACTOR static Future<JsonBuilderObject> processStatusFetcher(
Reference<AsyncVar<struct ServerDBInfo>> db, std::vector<WorkerDetails> workers, WorkerEvents pMetrics,
WorkerEvents mMetrics, WorkerEvents nMetrics, WorkerEvents errors, WorkerEvents traceFileOpenErrors,
WorkerEvents programStarts, std::map<std::string, std::vector<JsonBuilderObject>> processIssues,
WorkerEvents programStarts, std::map<std::string, std::vector<JsonBuilderObject>> processIssues,
vector<std::pair<StorageServerInterface, EventMap>> storageServers,
vector<std::pair<TLogInterface, EventMap>> tLogs, vector<std::pair<MasterProxyInterface, EventMap>> proxies,
Database cx, Optional<DatabaseConfiguration> configuration, Optional<Key> healthyZone, std::set<std::string>* incomplete_reasons) {
ServerCoordinators coordinators, Database cx, Optional<DatabaseConfiguration> configuration,
Optional<Key> healthyZone, std::set<std::string>* incomplete_reasons) {
state JsonBuilderObject processMap;
@ -649,6 +653,10 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
}
}
for(auto& coordinator : coordinators.ccf->getConnectionString().coordinators()) {
roles.addCoordinatorRole(coordinator);
}
state std::vector<std::pair<MasterProxyInterface, EventMap>>::iterator proxy;
for(proxy = proxies.begin(); proxy != proxies.end(); ++proxy) {
roles.addRole( "proxy", proxy->first, proxy->second );
@ -1150,8 +1158,9 @@ struct LoadConfigurationResult {
Optional<Key> healthyZone;
double healthyZoneSeconds;
bool rebalanceDDIgnored;
bool dataDistributionDisabled;
LoadConfigurationResult() : fullReplication(true), healthyZoneSeconds(0), rebalanceDDIgnored(false) {}
LoadConfigurationResult() : fullReplication(true), healthyZoneSeconds(0), rebalanceDDIgnored(false), dataDistributionDisabled(false) {}
};
ACTOR static Future<std::pair<Optional<DatabaseConfiguration>,Optional<LoadConfigurationResult>>> loadConfiguration(Database cx, JsonBuilderArray *messages, std::set<std::string> *status_incomplete_reasons){
@ -1193,12 +1202,13 @@ ACTOR static Future<std::pair<Optional<DatabaseConfiguration>,Optional<LoadConfi
}
state Future<Optional<Value>> healthyZoneValue = tr.get(healthyZoneKey);
state Future<Optional<Value>> rebalanceDDIgnored = tr.get(rebalanceDDIgnoreKey);
state Future<Optional<Value>> ddModeKey = tr.get(dataDistributionModeKey);
choose {
when(wait(waitForAll(replicasFutures) && success(healthyZoneValue) && success(rebalanceDDIgnored))) {
when(wait(waitForAll(replicasFutures) && success(healthyZoneValue) && success(rebalanceDDIgnored) && success(ddModeKey))) {
int unreplicated = 0;
for(int i = 0; i < result.get().regions.size(); i++) {
if( !replicasFutures[i].get().present() || decodeDatacenterReplicasValue(replicasFutures[i].get().get()) < result.get().storageTeamSize ) {
if( !replicasFutures[i].get().present() || decodeDatacenterReplicasValue(replicasFutures[i].get().get()) < result.get().storageTeamSize ) {
unreplicated++;
}
}
@ -1206,12 +1216,23 @@ ACTOR static Future<std::pair<Optional<DatabaseConfiguration>,Optional<LoadConfi
res.fullReplication = (!unreplicated || (result.get().usableRegions == 1 && unreplicated < result.get().regions.size()));
if(healthyZoneValue.get().present()) {
auto healthyZone = decodeHealthyZoneValue(healthyZoneValue.get().get());
if(healthyZone.second > tr.getReadVersion().get()) {
if(healthyZone.first == ignoreSSFailuresZoneString) {
res.healthyZone = healthyZone.first;
}
else if(healthyZone.second > tr.getReadVersion().get()) {
res.healthyZone = healthyZone.first;
res.healthyZoneSeconds = (healthyZone.second-tr.getReadVersion().get())/CLIENT_KNOBS->CORE_VERSIONSPERSECOND;
}
}
res.rebalanceDDIgnored = rebalanceDDIgnored.get().present();
if (ddModeKey.get().present()) {
BinaryReader rd(ddModeKey.get().get(), Unversioned());
int currentMode;
rd >> currentMode;
if (currentMode == 0) {
res.dataDistributionDisabled = true;
}
}
loadResult = res;
}
when(wait(getConfTimeout)) {
@ -1611,8 +1632,15 @@ ACTOR static Future<JsonBuilderObject> workloadStatusFetcher(Reference<AsyncVar<
(*data_overlay)["least_operating_space_bytes_storage_server"] = std::max(worstFreeSpaceStorageServer, (int64_t)0);
(*qos).setKeyRawNumber("worst_queue_bytes_storage_server", ratekeeper.getValue("WorstStorageServerQueue"));
(*qos).setKeyRawNumber("limiting_queue_bytes_storage_server", ratekeeper.getValue("LimitingStorageServerQueue"));
// TODO: These can be removed in the next release after 6.2
(*qos).setKeyRawNumber("worst_version_lag_storage_server", ratekeeper.getValue("WorstStorageServerVersionLag"));
(*qos).setKeyRawNumber("limiting_version_lag_storage_server", ratekeeper.getValue("LimitingStorageServerVersionLag"));
(*qos)["worst_data_lag_storage_server"] = getLagObject(ratekeeper.getInt64("WorstStorageServerVersionLag"));
(*qos)["limiting_data_lag_storage_server"] = getLagObject(ratekeeper.getInt64("LimitingStorageServerVersionLag"));
(*qos)["worst_durability_lag_storage_server"] = getLagObject(ratekeeper.getInt64("WorstStorageServerDurabilityLag"));
(*qos)["limiting_durability_lag_storage_server"] = getLagObject(ratekeeper.getInt64("LimitingStorageServerDurabilityLag"));
}
if(tlogCount > 0) {
@ -2173,6 +2201,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
if (loadResult.get().rebalanceDDIgnored) {
statusObj["data_distribution_disabled_for_rebalance"] = true;
}
if (loadResult.get().dataDistributionDisabled) {
statusObj["data_distribution_disabled"] = true;
}
}
statusObj["machines"] = machineStatusFetcher(mMetrics, workers, configuration, &status_incomplete_reasons);
@ -2207,7 +2238,6 @@ ACTOR Future<StatusReply> clusterGetStatus(
futures2.push_back(layerStatusFetcher(cx, &messages, &status_incomplete_reasons));
futures2.push_back(lockedStatusFetcher(db, &messages, &status_incomplete_reasons));
futures2.push_back(clusterSummaryStatisticsFetcher(pMetrics, storageServerFuture, tLogFuture, &status_incomplete_reasons));
state std::vector<JsonBuilderObject> workerStatuses = wait(getAll(futures2));
int oldLogFaultTolerance = 100;
@ -2293,8 +2323,9 @@ ACTOR Future<StatusReply> clusterGetStatus(
JsonBuilderObject processStatus = wait(processStatusFetcher(db, workers, pMetrics, mMetrics, networkMetrics,
latestError, traceFileOpenErrors, programStarts,
processIssues, storageServers, tLogs, proxies, cx,
configuration, loadResult.present() ? loadResult.get().healthyZone : Optional<Key>(),
processIssues, storageServers, tLogs, proxies,
coordinators, cx, configuration,
loadResult.present() ? loadResult.get().healthyZone : Optional<Key>(),
&status_incomplete_reasons));
statusObj["processes"] = processStatus;
statusObj["clients"] = clientStatusFetcher(clientStatus);
@ -2304,11 +2335,7 @@ ACTOR Future<StatusReply> clusterGetStatus(
incompatibleConnectionsArray.push_back(it.toString());
}
statusObj["incompatible_connections"] = incompatibleConnectionsArray;
StatusObject datacenterLag;
datacenterLag["versions"] = datacenterVersionDifference;
datacenterLag["seconds"] = datacenterVersionDifference / (double)SERVER_KNOBS->VERSIONS_PER_SECOND;
statusObj["datacenter_lag"] = datacenterLag;
statusObj["datacenter_lag"] = getLagObject(datacenterVersionDifference);
int totalDegraded = 0;
for(auto& it : workers) {

View File

@ -216,6 +216,19 @@ struct TagMessagesRef {
}
};
struct TLogCommitReply {
constexpr static FileIdentifier file_identifier = 3;
Version version;
TLogCommitReply() = default;
explicit TLogCommitReply(Version version) : version(version) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, version);
}
};
struct TLogCommitRequest {
constexpr static FileIdentifier file_identifier = 4022206;
Arena arena;
@ -223,7 +236,7 @@ struct TLogCommitRequest {
StringRef messages;// Each message prefixed by a 4-byte length
ReplyPromise<Version> reply;
ReplyPromise<TLogCommitReply> reply;
Optional<UID> debugID;
TLogCommitRequest() {}

View File

@ -1861,9 +1861,9 @@ ACTOR Future<Void> rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC
TLogRejoinRequest req(tli);
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id());
choose {
when ( bool success = wait( brokenPromiseToNever( self->dbInfo->get().master.tlogRejoin.getReply( req ) ) ) ) {
if (success)
lastMasterID = self->dbInfo->get().master.id();
when(TLogRejoinReply rep =
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
if (rep.masterIsRecovered) lastMasterID = self->dbInfo->get().master.id();
}
when ( wait( self->dbInfo->onChange() ) ) { }
}
@ -1941,8 +1941,7 @@ tLogSnapCreate(TLogSnapRequest snapReq, TLogData* self, Reference<LogData> logDa
}
ExecCmdValueString snapArg(snapReq.snapPayload);
try {
Standalone<StringRef> role = LiteralStringRef("role=").withSuffix(snapReq.role);
int err = wait(execHelper(&snapArg, self->dataFolder, role.toString()));
int err = wait(execHelper(&snapArg, snapReq.snapUID, self->dataFolder, snapReq.role.toString()));
std::string uidStr = snapReq.snapUID.toString();
TraceEvent("ExecTraceTLog")

View File

@ -30,12 +30,12 @@
#include "fdbserver/RecoveryState.h"
#include "flow/actorcompiler.h" // This must be the last #include.
ACTOR Future<Version> minVersionWhenReady( Future<Void> f, std::vector<Future<Version>> replies) {
ACTOR Future<Version> minVersionWhenReady(Future<Void> f, std::vector<Future<TLogCommitReply>> replies) {
wait(f);
Version minVersion = std::numeric_limits<Version>::max();
for(auto& reply : replies) {
if(reply.isReady() && !reply.isError()) {
minVersion = std::min(minVersion, reply.get());
minVersion = std::min(minVersion, reply.get().version);
}
}
return minVersion;
@ -429,7 +429,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
virtual Future<Version> push( Version prevVersion, Version version, Version knownCommittedVersion, Version minKnownCommittedVersion, LogPushData& data, Optional<UID> debugID ) {
// FIXME: Randomize request order as in LegacyLogSystem?
vector<Future<Void>> quorumResults;
vector<Future<Version>> allReplies;
vector<Future<TLogCommitReply>> allReplies;
int location = 0;
for(auto& it : tLogs) {
if(it->isLocal && it->logServers.size()) {
@ -2271,7 +2271,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
ACTOR static Future<Void> trackRejoins( UID dbgid, std::vector<Reference<AsyncVar<OptionalInterface<TLogInterface>>>> logServers, FutureStream< struct TLogRejoinRequest > rejoinRequests ) {
state std::map<UID,ReplyPromise<bool>> lastReply;
state std::map<UID, ReplyPromise<TLogRejoinReply>> lastReply;
try {
loop {
@ -2287,7 +2287,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
TraceEvent("TLogJoinedMe", dbgid).detail("TLog", req.myInterface.id()).detail("Address", req.myInterface.commit.getEndpoint().getPrimaryAddress().toString());
if( !logServers[pos]->get().present() || req.myInterface.commit.getEndpoint() != logServers[pos]->get().interf().commit.getEndpoint())
logServers[pos]->setUnconditional( OptionalInterface<TLogInterface>(req.myInterface) );
lastReply[req.myInterface.id()].send(false);
lastReply[req.myInterface.id()].send(TLogRejoinReply{ false });
lastReply[req.myInterface.id()] = req.reply;
}
else {
@ -2296,8 +2296,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
}
}
} catch (...) {
for( auto it = lastReply.begin(); it != lastReply.end(); ++it)
it->second.send(true);
for (auto it = lastReply.begin(); it != lastReply.end(); ++it) it->second.send(TLogRejoinReply{ true });
throw;
}
}

View File

@ -31,12 +31,22 @@
#include "fdbrpc/PerfMetric.h"
#include "fdbclient/NativeAPI.actor.h"
#include "flow/actorcompiler.h" // has to be last include
struct CheckReply {
constexpr static FileIdentifier file_identifier = 11;
bool value = false;
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, value);
}
};
struct WorkloadInterface {
constexpr static FileIdentifier file_identifier = 4454551;
RequestStream<ReplyPromise<Void>> setup;
RequestStream<ReplyPromise<Void>> start;
RequestStream<ReplyPromise<bool>> check;
RequestStream<ReplyPromise<CheckReply>> check;
RequestStream<ReplyPromise< std::vector<PerfMetric> > > metrics;
RequestStream<ReplyPromise<Void>> stop;
@ -70,7 +80,7 @@ struct WorkloadRequest {
VectorRef< VectorRef<KeyValueRef> > options;
int clientId; // the "id" of the client recieving the request (0 indexed)
int clientId; // the "id" of the client receiving the request (0 indexed)
int clientCount; // the total number of test clients participating in the workload
ReplyPromise< struct WorkloadInterface > reply;

View File

@ -574,6 +574,9 @@ static void printUsage( const char *name, bool devhelp ) {
" Delete the oldest log file when the total size of all log\n"
" files exceeds SIZE bytes. If set to 0, old log files will not\n"
" be deleted. The default value is 100MiB.\n");
printf(" --loggroup LOG_GROUP\n"
" Sets the LogGroup field with the specified value for all\n"
" events in the trace output (defaults to `default').\n");
printf(" --trace_format FORMAT\n"
" Select the format of the log files. xml (the default) and json\n"
" are supported.\n");

View File

@ -1018,7 +1018,7 @@ ACTOR Future<Void> resolutionBalancing(Reference<MasterData> self) {
wait(delay(SERVER_KNOBS->MIN_BALANCE_TIME, TaskPriority::ResolutionMetrics));
while(self->resolverChanges.get().size())
wait(self->resolverChanges.onChange());
state std::vector<Future<int64_t>> futures;
state std::vector<Future<ResolutionMetricsReply>> futures;
for (auto& p : self->resolvers)
futures.push_back(brokenPromiseToNever(p.metrics.getReply(ResolutionMetricsRequest(), TaskPriority::ResolutionMetrics)));
wait( waitForAll(futures) );
@ -1026,8 +1026,8 @@ ACTOR Future<Void> resolutionBalancing(Reference<MasterData> self) {
int64_t total = 0;
for (int i = 0; i < futures.size(); i++) {
total += futures[i].get();
metrics.insert(std::make_pair(futures[i].get(), i), NoMetric());
total += futures[i].get().value;
metrics.insert(std::make_pair(futures[i].get().value, i), NoMetric());
//TraceEvent("ResolverMetric").detail("I", i).detail("Metric", futures[i].get());
}
if( metrics.lastItem()->first - metrics.begin()->first > SERVER_KNOBS->MIN_BALANCE_DIFFERENCE ) {

View File

@ -934,7 +934,7 @@ ACTOR Future<Void> watchValue_impl( StorageServer* data, WatchValueRequest req )
g_traceBatch.addEvent("WatchValueDebug", req.debugID.get().first(), "watchValueQ.AfterRead"); //.detail("TaskID", g_network->getCurrentTask());
if( reply.value != req.value ) {
req.reply.send( latest );
req.reply.send(WatchValueReply{ latest });
return Void();
}
@ -1012,7 +1012,7 @@ ACTOR Future<Void> getShardState_impl( StorageServer* data, GetShardStateRequest
}
if( !onChange.size() ) {
req.reply.send(std::make_pair(data->version.get(), data->durableVersion.get()));
req.reply.send(GetShardStateReply{ data->version.get(), data->durableVersion.get() });
return Void();
}
@ -1936,9 +1936,18 @@ void splitMutation(StorageServer* data, KeyRangeMap<T>& map, MutationRef const&
ASSERT(false); // Unknown mutation type in splitMutations
}
ACTOR Future<Void> logFetchKeysWarning(AddingShard* shard) {
state double startTime = now();
loop {
wait(delay(600));
TraceEvent(SevWarnAlways, "FetchKeysTooLong").detail("Duration", now() - startTime).detail("Phase", shard->phase).detail("Begin", shard->keys.begin.printable()).detail("End", shard->keys.end.printable());
}
}
ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
state TraceInterval interval("FetchKeys");
state KeyRange keys = shard->keys;
state Future<Void> warningLogger = logFetchKeysWarning(shard);
state double startt = now();
state int fetchBlockBytes = BUGGIFY ? SERVER_KNOBS->BUGGIFY_BLOCK_BYTES : SERVER_KNOBS->FETCH_BLOCK_BYTES;
@ -3313,7 +3322,7 @@ ACTOR Future<Void> waitMetrics( StorageServerMetrics* self, WaitMetricsRequest r
when( StorageMetrics c = waitNext( change.getFuture() ) ) {
metrics += c;
// SOMEDAY: validation! The changes here are possibly partial changes (we recieve multiple messages per
// SOMEDAY: validation! The changes here are possibly partial changes (we receive multiple messages per
// update to our requested range). This means that the validation would have to occur after all
// the messages for one clear or set have been dispatched.
@ -3501,7 +3510,7 @@ ACTOR Future<Void> storageServerCore( StorageServer* self, StorageServerInterfac
when( GetValueRequest req = waitNext(ssi.getValue.getFuture()) ) {
// Warning: This code is executed at extremely high priority (TaskPriority::LoadBalancedEndpoint), so downgrade before doing real work
if( req.debugID.present() )
g_traceBatch.addEvent("GetValueDebug", req.debugID.get().first(), "storageServer.recieved"); //.detail("TaskID", g_network->getCurrentTask());
g_traceBatch.addEvent("GetValueDebug", req.debugID.get().first(), "storageServer.received"); //.detail("TaskID", g_network->getCurrentTask());
if (SHORT_CIRCUT_ACTUAL_STORAGE && normalKeys.contains(req.key))
req.reply.send(GetValueReply());
@ -3524,7 +3533,7 @@ ACTOR Future<Void> storageServerCore( StorageServer* self, StorageServerInterfac
when (GetShardStateRequest req = waitNext(ssi.getShardState.getFuture()) ) {
if (req.mode == GetShardStateRequest::NO_WAIT ) {
if( self->isReadable( req.keys ) )
req.reply.send(std::make_pair(self->version.get(),self->durableVersion.get()));
req.reply.send(GetShardStateReply{ self->version.get(), self->durableVersion.get() });
else
req.reply.sendError(wrong_shard_server());
} else {
@ -3534,7 +3543,7 @@ ACTOR Future<Void> storageServerCore( StorageServer* self, StorageServerInterfac
when (StorageQueuingMetricsRequest req = waitNext(ssi.getQueuingMetrics.getFuture())) {
getQueuingMetrics(self, req);
}
when( ReplyPromise<Version> reply = waitNext(ssi.getVersion.getFuture()) ) {
when(ReplyPromise<VersionReply> reply = waitNext(ssi.getVersion.getFuture())) {
reply.send( self->version.get() );
}
when( ReplyPromise<KeyValueStoreType> reply = waitNext(ssi.getKeyValueStoreType.getFuture()) ) {

View File

@ -404,10 +404,10 @@ ACTOR Future<Void> runWorkloadAsync( Database cx, WorkloadInterface workIface, T
state unique_ptr<TestWorkload> delw(workload);
state Optional<ErrorOr<Void>> setupResult;
state Optional<ErrorOr<Void>> startResult;
state Optional<ErrorOr<bool>> checkResult;
state Optional<ErrorOr<CheckReply>> checkResult;
state ReplyPromise<Void> setupReq;
state ReplyPromise<Void> startReq;
state ReplyPromise<bool> checkReq;
state ReplyPromise<CheckReply> checkReq;
TraceEvent("TestBeginAsync", workIface.id()).detail("Workload", workload->description()).detail("DatabasePingDelay", databasePingDelay);
@ -452,12 +452,12 @@ ACTOR Future<Void> runWorkloadAsync( Database cx, WorkloadInterface workIface, T
}
sendResult( startReq, startResult );
}
when( ReplyPromise<bool> req = waitNext( workIface.check.getFuture() ) ) {
when(ReplyPromise<CheckReply> req = waitNext(workIface.check.getFuture())) {
checkReq = req;
if (!checkResult.present()) {
try {
bool check = wait( timeoutError( workload->check(cx), workload->getCheckTimeout() ) );
checkResult = (!startResult.present() || !startResult.get().isError()) && check;
checkResult = CheckReply{ (!startResult.present() || !startResult.get().isError()) && check };
} catch (Error& e) {
checkResult = operation_failed(); // was: checkResult = false;
if( e.code() == error_code_please_reboot || e.code() == error_code_please_reboot_delete) throw;
@ -693,16 +693,16 @@ ACTOR Future<DistributedTestResults> runWorkload( Database cx, std::vector< Test
wait( delay(3.0) );
}
state std::vector< Future<ErrorOr<bool>> > checks;
state std::vector<Future<ErrorOr<CheckReply>>> checks;
TraceEvent("CheckingResults");
printf("checking test (%s)...\n", printable(spec.title).c_str());
for(int i= 0; i < workloads.size(); i++)
checks.push_back( workloads[i].check.template getReplyUnlessFailedFor<bool>(waitForFailureTime, 0) );
checks.push_back(workloads[i].check.template getReplyUnlessFailedFor<CheckReply>(waitForFailureTime, 0));
wait( waitForAll( checks ) );
throwIfError(checks, "CheckFailedForWorkload" + printable(spec.title));
for(int i = 0; i < checks.size(); i++) {
if(checks[i].get().get())
if (checks[i].get().get().value)
success++;
else
failure++;

View File

@ -658,8 +658,7 @@ void endRole(const Role &role, UID id, std::string reason, bool ok, Error e) {
ACTOR Future<Void> workerSnapCreate(WorkerSnapRequest snapReq, StringRef snapFolder) {
state ExecCmdValueString snapArg(snapReq.snapPayload);
try {
Standalone<StringRef> role = LiteralStringRef("role=").withSuffix(snapReq.role);
int err = wait(execHelper(&snapArg, snapFolder.toString(), role.toString()));
int err = wait(execHelper(&snapArg, snapReq.snapUID, snapFolder.toString(), snapReq.role.toString()));
std::string uidStr = snapReq.snapUID.toString();
TraceEvent("ExecTraceWorker")
.detail("Uid", uidStr)
@ -819,10 +818,11 @@ ACTOR Future<Void> workerServer(
DUMPTOKEN(recruited.traceBatchDumpRequest);
}
state std::vector<Future<Void>> recoveries;
try {
std::vector<DiskStore> stores = getDiskStores( folder );
bool validateDataFiles = deleteFile(joinPath(folder, validationFilename));
std::vector<Future<Void>> recoveries;
for( int f = 0; f < stores.size(); f++ ) {
DiskStore s = stores[f];
// FIXME: Error handling
@ -1239,6 +1239,8 @@ ACTOR Future<Void> workerServer(
when( wait( handleErrors ) ) {}
}
} catch (Error& err) {
// Make sure actors are cancelled before "recovery" promises are destructed.
for (auto f : recoveries) f.cancel();
state Error e = err;
bool ok = e.code() == error_code_please_reboot || e.code() == error_code_actor_cancelled || e.code() == error_code_please_reboot_delete;
@ -1389,8 +1391,10 @@ ACTOR Future<Void> fdbd(
int64_t memoryProfileThreshold,
std::string whitelistBinPaths)
{
try {
state vector<Future<Void>> actors;
state Promise<Void> recoveredDiskFiles;
try {
ServerCoordinators coordinators( connFile );
if (g_network->isSimulated()) {
whitelistBinPaths = ",, random_path, /bin/snap_create.sh,,";
@ -1398,10 +1402,9 @@ ACTOR Future<Void> fdbd(
TraceEvent("StartingFDBD").detail("ZoneID", localities.zoneId()).detail("MachineId", localities.machineId()).detail("DiskPath", dataFolder).detail("CoordPath", coordFolder).detail("WhiteListBinPath", whitelistBinPaths);
// SOMEDAY: start the services on the machine in a staggered fashion in simulation?
state vector<Future<Void>> v;
// Endpoints should be registered first before any process trying to connect to it. So coordinationServer actor should be the first one executed before any other.
if ( coordFolder.size() )
v.push_back( fileNotFoundToNever( coordinationServer( coordFolder ) ) ); //SOMEDAY: remove the fileNotFound wrapper and make DiskQueue construction safe from errors setting up their files
actors.push_back( fileNotFoundToNever( coordinationServer( coordFolder ) ) ); //SOMEDAY: remove the fileNotFound wrapper and make DiskQueue construction safe from errors setting up their files
state UID processIDUid = wait(createAndLockProcessIdFile(dataFolder));
localities.set(LocalityData::keyProcessId, processIDUid.toString());
@ -1411,19 +1414,21 @@ ACTOR Future<Void> fdbd(
Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> cc(new AsyncVar<Optional<ClusterControllerFullInterface>>);
Reference<AsyncVar<Optional<ClusterInterface>>> ci(new AsyncVar<Optional<ClusterInterface>>);
Reference<AsyncVar<ClusterControllerPriorityInfo>> asyncPriorityInfo(new AsyncVar<ClusterControllerPriorityInfo>(getCCPriorityInfo(fitnessFilePath, processClass)));
Promise<Void> recoveredDiskFiles;
v.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo"));
v.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") );
v.push_back( reportErrors(extractClusterInterface( cc, ci ), "ExtractClusterInterface") );
v.push_back( reportErrors(failureMonitorClient( ci, true ), "FailureMonitorClient") );
v.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles, memoryProfileThreshold, coordFolder, whitelistBinPaths), "WorkerServer", UID(), &normalWorkerErrors()) );
actors.push_back(reportErrors(monitorAndWriteCCPriorityInfo(fitnessFilePath, asyncPriorityInfo), "MonitorAndWriteCCPriorityInfo"));
actors.push_back( reportErrors( processClass == ProcessClass::TesterClass ? monitorLeader( connFile, cc ) : clusterController( connFile, cc , asyncPriorityInfo, recoveredDiskFiles.getFuture(), localities ), "ClusterController") );
actors.push_back( reportErrors(extractClusterInterface( cc, ci ), "ExtractClusterInterface") );
actors.push_back( reportErrors(failureMonitorClient( ci, true ), "FailureMonitorClient") );
actors.push_back( reportErrorsExcept(workerServer(connFile, cc, localities, asyncPriorityInfo, processClass, dataFolder, memoryLimit, metricsConnFile, metricsPrefix, recoveredDiskFiles, memoryProfileThreshold, coordFolder, whitelistBinPaths), "WorkerServer", UID(), &normalWorkerErrors()) );
state Future<Void> firstConnect = reportErrors( printOnFirstConnected(ci), "ClusterFirstConnectedError" );
wait( quorum(v,1) );
wait( quorum(actors,1) );
ASSERT(false); // None of these actors should terminate normally
throw internal_error();
} catch(Error &e) {
} catch (Error& e) {
// Make sure actors are cancelled before recoveredDiskFiles is destructed.
// Otherwise, these actors may get a broken promise error.
for (auto f : actors) f.cancel();
Error err = checkIOTimeout(e);
throw err;
}

View File

@ -38,7 +38,7 @@ static std::set<int> const& normalAttritionErrors() {
ACTOR Future<bool> ignoreSSFailuresForDuration(Database cx, double duration) {
// duration doesn't matter since this won't timeout
TraceEvent("IgnoreSSFailureStart");
bool _ = wait(setHealthyZone(cx, ignoreSSFailuresZoneString, 0));
bool _ = wait(setHealthyZone(cx, ignoreSSFailuresZoneString, 0));
TraceEvent("IgnoreSSFailureWait");
wait(delay(duration));
TraceEvent("IgnoreSSFailureClear");
@ -199,7 +199,8 @@ struct MachineAttritionWorkload : TestWorkload {
// }
} else if (BUGGIFY_WITH_PROB(0.005)) {
TEST(true); // Disable DD for all storage server failures
self->ignoreSSFailures = ignoreSSFailuresForDuration(cx, deterministicRandom()->random01() * 5);
self->ignoreSSFailures =
uncancellable(ignoreSSFailuresForDuration(cx, deterministicRandom()->random01() * 5));
}
TraceEvent("Assassination").detail("TargetMachine", targetMachine.toString())

View File

@ -211,7 +211,7 @@ public: // workload functions
wait(status);
break;
} catch (Error& e) {
if (e.code() == error_code_txn_exec_log_anti_quorum) {
if (e.code() == error_code_snap_log_anti_quorum_unsupported) {
snapFailed = true;
break;
}
@ -298,12 +298,12 @@ public: // workload functions
wait(status);
break;
} catch (Error& e) {
if (e.code() == error_code_cluster_not_fully_recovered ||
e.code() == error_code_txn_exec_log_anti_quorum) {
if (e.code() == error_code_snap_not_fully_recovered_unsupported ||
e.code() == error_code_snap_log_anti_quorum_unsupported) {
snapFailed = true;
break;
}
if (e.code() == error_code_transaction_not_permitted) {
if (e.code() == error_code_snap_path_not_whitelisted) {
testedFailure = true;
break;
}

View File

@ -37,7 +37,8 @@ class ASIOReactor {
public:
explicit ASIOReactor(Net2*);
void sleepAndReact(double timeout);
void sleep(double timeout);
void react();
void wake();

View File

@ -72,68 +72,3 @@ template <class T, uint32_t B>
struct ComposedIdentifierExternal<T, B, true> {
static constexpr FileIdentifier value = ComposedIdentifier<T, B>::file_identifier;
};
template <>
struct FileIdentifierFor<int> {
constexpr static FileIdentifier value = 1;
};
template <>
struct FileIdentifierFor<unsigned> {
constexpr static FileIdentifier value = 2;
};
template <>
struct FileIdentifierFor<long> {
constexpr static FileIdentifier value = 3;
};
template <>
struct FileIdentifierFor<unsigned long> {
constexpr static FileIdentifier value = 4;
};
template <>
struct FileIdentifierFor<long long> {
constexpr static FileIdentifier value = 5;
};
template <>
struct FileIdentifierFor<unsigned long long> {
constexpr static FileIdentifier value = 6;
};
template <>
struct FileIdentifierFor<short> {
constexpr static FileIdentifier value = 7;
};
template <>
struct FileIdentifierFor<unsigned short> {
constexpr static FileIdentifier value = 8;
};
template <>
struct FileIdentifierFor<signed char> {
constexpr static FileIdentifier value = 9;
};
template <>
struct FileIdentifierFor<unsigned char> {
constexpr static FileIdentifier value = 10;
};
template <>
struct FileIdentifierFor<bool> {
constexpr static FileIdentifier value = 11;
};
template <>
struct FileIdentifierFor<float> {
constexpr static FileIdentifier value = 7266212;
};
template <>
struct FileIdentifierFor<double> {
constexpr static FileIdentifier value = 9348150;
};

View File

@ -150,7 +150,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
init( METRIC_LIMIT_RESPONSE_FACTOR, 10 ); // The additional queue size at which to disable logging of another level (higher == less restrictive)
//Load Balancing
init( LOAD_BALANCE_ZONE_ID_LOCALITY_ENABLED, 1 );
init( LOAD_BALANCE_ZONE_ID_LOCALITY_ENABLED, 0 );
init( LOAD_BALANCE_DC_ID_LOCALITY_ENABLED, 1 );
init( LOAD_BALANCE_MAX_BACKOFF, 5.0 );
init( LOAD_BALANCE_START_BACKOFF, 0.01 );
@ -172,6 +172,8 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
init( FUTURE_VERSION_INITIAL_BACKOFF, 1.0 );
init( FUTURE_VERSION_MAX_BACKOFF, 8.0 );
init( FUTURE_VERSION_BACKOFF_GROWTH, 2.0 );
init( LOAD_BALANCE_MAX_BAD_OPTIONS, 1 ); //should be the same as MAX_MACHINES_FALLING_BEHIND
init( LOAD_BALANCE_PENALTY_IS_BAD, true );
}
static std::string toLower( std::string const& name ) {

View File

@ -194,6 +194,8 @@ public:
double FUTURE_VERSION_INITIAL_BACKOFF;
double FUTURE_VERSION_MAX_BACKOFF;
double FUTURE_VERSION_BACKOFF_GROWTH;
int LOAD_BALANCE_MAX_BAD_OPTIONS;
bool LOAD_BALANCE_PENALTY_IS_BAD;
FlowKnobs(bool randomize = false, bool isSimulated = false);
};

View File

@ -209,6 +209,8 @@ public:
Int64MetricHandle countASIOEvents;
Int64MetricHandle countSlowTaskSignals;
Int64MetricHandle priorityMetric;
DoubleMetricHandle countLaunchTime;
DoubleMetricHandle countReactTime;
BoolMetricHandle awakeMetric;
EventMetricHandle<SlowTask> slowTaskMetric;
@ -545,6 +547,8 @@ void Net2::initMetrics() {
priorityMetric.init(LiteralStringRef("Net2.Priority"));
awakeMetric.init(LiteralStringRef("Net2.Awake"));
slowTaskMetric.init(LiteralStringRef("Net2.SlowTask"));
countLaunchTime.init(LiteralStringRef("Net2.CountLaunchTime"));
countReactTime.init(LiteralStringRef("Net2.CountReactTime"));
}
void Net2::run() {
@ -580,7 +584,9 @@ void Net2::run() {
taskBegin = nnow;
trackMinPriority(TaskPriority::RunCycleFunction, taskBegin);
runFunc();
checkForSlowTask(tsc_begin, __rdtsc(), timer_monotonic() - taskBegin, TaskPriority::RunCycleFunction);
double taskEnd = timer_monotonic();
countLaunchTime += taskEnd - taskBegin;
checkForSlowTask(tsc_begin, __rdtsc(), taskEnd - taskBegin, TaskPriority::RunCycleFunction);
}
double sleepTime = 0;
@ -596,18 +602,26 @@ void Net2::run() {
if (!timers.empty()) {
sleepTime = timers.top().at - sleepStart; // + 500e-6?
}
trackMinPriority(TaskPriority::Zero, sleepStart);
if (sleepTime > 0) {
trackMinPriority(TaskPriority::Zero, sleepStart);
awakeMetric = false;
priorityMetric = 0;
reactor.sleep(sleepTime);
awakeMetric = true;
}
}
awakeMetric = false;
if( sleepTime > 0 )
priorityMetric = 0;
reactor.sleepAndReact(sleepTime);
awakeMetric = true;
tsc_begin = __rdtsc();
taskBegin = timer_monotonic();
trackMinPriority(TaskPriority::ASIOReactor, taskBegin);
reactor.react();
updateNow();
double now = this->currentTime;
countReactTime += now - taskBegin;
checkForSlowTask(tsc_begin, __rdtsc(), now - taskBegin, TaskPriority::ASIOReactor);
if ((now-nnow) > FLOW_KNOBS->SLOW_LOOP_CUTOFF && nondeterministicRandom()->random01() < (now-nnow)*FLOW_KNOBS->SLOW_LOOP_SAMPLING_RATE)
TraceEvent("SomewhatSlowRunLoopTop").detail("Elapsed", now - nnow);
@ -988,7 +1002,7 @@ ASIOReactor::ASIOReactor(Net2* net)
#endif
}
void ASIOReactor::sleepAndReact(double sleepTime) {
void ASIOReactor::sleep(double sleepTime) {
if (sleepTime > FLOW_KNOBS->BUSY_WAIT_THRESHOLD) {
if (FLOW_KNOBS->REACTOR_FLAGS & 4) {
#ifdef __linux
@ -1015,6 +1029,9 @@ void ASIOReactor::sleepAndReact(double sleepTime) {
if (!(FLOW_KNOBS->REACTOR_FLAGS & 8))
threadYield();
}
}
void ASIOReactor::react() {
while (ios.poll_one()) ++network->countASIOEvents; // Make this a task?
}

View File

@ -137,7 +137,9 @@ SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *sta
.detail("WriteProbes", netData.countWriteProbes - statState->networkState.countWriteProbes)
.detail("PacketsRead", netData.countPacketsReceived - statState->networkState.countPacketsReceived)
.detail("PacketsGenerated", netData.countPacketsGenerated - statState->networkState.countPacketsGenerated)
.detail("WouldBlock", netData.countWouldBlock - statState->networkState.countWouldBlock);
.detail("WouldBlock", netData.countWouldBlock - statState->networkState.countWouldBlock)
.detail("LaunchTime", netData.countLaunchTime - statState->networkState.countLaunchTime)
.detail("ReactTime", netData.countReactTime - statState->networkState.countReactTime);
for (int i = 0; i<NetworkMetrics::SLOW_EVENT_BINS; i++) {
if (int c = g_network->networkMetrics.countSlowEvents[i] - statState->networkMetricsState.countSlowEvents[i]) {

View File

@ -80,53 +80,49 @@ struct NetworkData {
int64_t countConnEstablished;
int64_t countConnClosedWithError;
int64_t countConnClosedWithoutError;
double countLaunchTime;
double countReactTime;
void init() {
auto getValue = [] (StringRef name) -> int64_t {
Reference<Int64Metric> r = Int64Metric::getOrCreateInstance(name);
int64_t v = 0;
if(r)
v = r->getValue();
return v;
};
bytesSent = getValue(LiteralStringRef("Net2.BytesSent"));
countPacketsReceived = getValue(LiteralStringRef("Net2.CountPacketsReceived"));
countPacketsGenerated = getValue(LiteralStringRef("Net2.CountPacketsGenerated"));
bytesReceived = getValue(LiteralStringRef("Net2.BytesReceived"));
countWriteProbes = getValue(LiteralStringRef("Net2.CountWriteProbes"));
countReadProbes = getValue(LiteralStringRef("Net2.CountReadProbes"));
countReads = getValue(LiteralStringRef("Net2.CountReads"));
countWouldBlock = getValue(LiteralStringRef("Net2.CountWouldBlock"));
countWrites = getValue(LiteralStringRef("Net2.CountWrites"));
countRunLoop = getValue(LiteralStringRef("Net2.CountRunLoop"));
countCantSleep = getValue(LiteralStringRef("Net2.CountCantSleep"));
countWontSleep = getValue(LiteralStringRef("Net2.CountWontSleep"));
countTimers = getValue(LiteralStringRef("Net2.CountTimers"));
countTasks = getValue(LiteralStringRef("Net2.CountTasks"));
countYields = getValue(LiteralStringRef("Net2.CountYields"));
countYieldBigStack = getValue(LiteralStringRef("Net2.CountYieldBigStack"));
countYieldCalls = getValue(LiteralStringRef("Net2.CountYieldCalls"));
countASIOEvents = getValue(LiteralStringRef("Net2.CountASIOEvents"));
countYieldCallsTrue = getValue(LiteralStringRef("Net2.CountYieldCallsTrue"));
countSlowTaskSignals = getValue(LiteralStringRef("Net2.CountSlowTaskSignals"));
countConnEstablished = getValue(LiteralStringRef("Net2.CountConnEstablished"));
countConnClosedWithError = getValue(LiteralStringRef("Net2.CountConnClosedWithError"));
countConnClosedWithoutError = getValue(LiteralStringRef("Net2.CountConnClosedWithoutError"));
countFileLogicalWrites = getValue(LiteralStringRef("AsyncFile.CountLogicalWrites"));
countFileLogicalReads = getValue(LiteralStringRef("AsyncFile.CountLogicalReads"));
countAIOSubmit = getValue(LiteralStringRef("AsyncFile.CountAIOSubmit"));
countAIOCollect = getValue(LiteralStringRef("AsyncFile.CountAIOCollect"));
countFileCacheWrites = getValue(LiteralStringRef("AsyncFile.CountCacheWrites"));
countFileCacheReads = getValue(LiteralStringRef("AsyncFile.CountCacheReads"));
countFileCacheWritesBlocked = getValue(LiteralStringRef("AsyncFile.CountCacheWritesBlocked"));
countFileCacheReadsBlocked = getValue(LiteralStringRef("AsyncFile.CountCacheReadsBlocked"));
countFileCachePageReadsMerged = getValue(LiteralStringRef("AsyncFile.CountCachePageReadsMerged"));
countFileCacheFinds = getValue(LiteralStringRef("AsyncFile.CountCacheFinds"));
countFileCacheReadBytes = getValue(LiteralStringRef("AsyncFile.CountCacheReadBytes"));
countFilePageCacheHits = getValue(LiteralStringRef("AsyncFile.CountCachePageReadsHit"));
countFilePageCacheMisses = getValue(LiteralStringRef("AsyncFile.CountCachePageReadsMissed"));
countFilePageCacheEvictions = getValue(LiteralStringRef("EvictablePageCache.CacheEvictions"));
bytesSent = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.BytesSent"));
countPacketsReceived = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountPacketsReceived"));
countPacketsGenerated = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountPacketsGenerated"));
bytesReceived = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.BytesReceived"));
countWriteProbes = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountWriteProbes"));
countReadProbes = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountReadProbes"));
countReads = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountReads"));
countWouldBlock = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountWouldBlock"));
countWrites = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountWrites"));
countRunLoop = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountRunLoop"));
countCantSleep = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountCantSleep"));
countWontSleep = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountWontSleep"));
countTimers = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountTimers"));
countTasks = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountTasks"));
countYields = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountYields"));
countYieldBigStack = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountYieldBigStack"));
countYieldCalls = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountYieldCalls"));
countASIOEvents = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountASIOEvents"));
countYieldCallsTrue = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountYieldCallsTrue"));
countSlowTaskSignals = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountSlowTaskSignals"));
countConnEstablished = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountConnEstablished"));
countConnClosedWithError = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountConnClosedWithError"));
countConnClosedWithoutError = Int64Metric::getValueOrDefault(LiteralStringRef("Net2.CountConnClosedWithoutError"));
countLaunchTime = DoubleMetric::getValueOrDefault(LiteralStringRef("Net2.CountLaunchTime"));
countReactTime = DoubleMetric::getValueOrDefault(LiteralStringRef("Net2.CountReactTime"));
countFileLogicalWrites = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountLogicalWrites"));
countFileLogicalReads = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountLogicalReads"));
countAIOSubmit = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountAIOSubmit"));
countAIOCollect = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountAIOCollect"));
countFileCacheWrites = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountCacheWrites"));
countFileCacheReads = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountCacheReads"));
countFileCacheWritesBlocked = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountCacheWritesBlocked"));
countFileCacheReadsBlocked = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountCacheReadsBlocked"));
countFileCachePageReadsMerged = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountCachePageReadsMerged"));
countFileCacheFinds = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountCacheFinds"));
countFileCacheReadBytes = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountCacheReadBytes"));
countFilePageCacheHits = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountCachePageReadsHit"));
countFilePageCacheMisses = Int64Metric::getValueOrDefault(LiteralStringRef("AsyncFile.CountCachePageReadsMissed"));
countFilePageCacheEvictions = Int64Metric::getValueOrDefault(LiteralStringRef("EvictablePageCache.CacheEvictions"));
}
};

View File

@ -269,6 +269,14 @@ struct MetricUtil {
return m;
}
static ValueType getValueOrDefault(StringRef const& name, StringRef const& id = StringRef(), ValueType defaultValue = ValueType()) {
Reference<T> r = getOrCreateInstance(name, id);
if(r) {
return r->getValue();
}
return defaultValue;
}
// Lookup the T metric by name and return its value (or nullptr if it doesn't exist)
static T * lookupMetric(MetricNameRef const &name) {
auto it = T::metricMap().find(name);
@ -1319,6 +1327,7 @@ public:
};
typedef ContinuousMetric<int64_t> Int64Metric;
typedef ContinuousMetric<double> DoubleMetric;
typedef Int64Metric VersionMetric;
typedef ContinuousMetric<bool> BoolMetric;
typedef ContinuousMetric<Standalone<StringRef>> StringMetric;
@ -1406,6 +1415,7 @@ typedef MetricHandle<Int64Metric> Int64MetricHandle;
typedef MetricHandle<VersionMetric> VersionMetricHandle;
typedef MetricHandle<BoolMetric> BoolMetricHandle;
typedef MetricHandle<StringMetric> StringMetricHandle;
typedef MetricHandle<DoubleMetric> DoubleMetricHandle;
template <typename E>
using EventMetricHandle = MetricHandle<EventMetric<E>>;

View File

@ -23,6 +23,7 @@
const StringRef BaseEventMetric::metricType = LiteralStringRef("Event");
template<> const StringRef Int64Metric::metricType = LiteralStringRef("Int64");
template<> const StringRef DoubleMetric::metricType = LiteralStringRef("Double");
template<> const StringRef BoolMetric::metricType = LiteralStringRef("Bool");
template<> const StringRef StringMetric::metricType = LiteralStringRef("String");

View File

@ -65,9 +65,6 @@ ERROR( lookup_failed, 1041, "DNS lookup failed" )
ERROR( proxy_memory_limit_exceeded, 1042, "Proxy commit memory limit exceeded" )
ERROR( shutdown_in_progress, 1043, "Operation no longer supported due to shutdown" )
ERROR( serialization_failed, 1044, "Failed to deserialize an object" )
ERROR( transaction_not_permitted, 1045, "Operation not permitted")
ERROR( cluster_not_fully_recovered, 1046, "Cluster not fully recovered")
ERROR( txn_exec_log_anti_quorum, 1047, "Execute Transaction not supported when log anti quorum is configured")
ERROR( connection_unreferenced, 1048, "No peer references for connection" )
ERROR( connection_idle, 1049, "Connection closed after idle timeout" )
ERROR( disk_adapter_reset, 1050, "The disk queue adpater reset" )
@ -206,6 +203,17 @@ ERROR( key_not_found, 2400, "Expected key is missing")
ERROR( json_malformed, 2401, "JSON string was malformed")
ERROR( json_eof_expected, 2402, "JSON string did not terminate where expected")
// 2500 - disk snapshot based backup errors
ERROR( snap_disable_tlog_pop_failed, 2500, "Disk Snapshot error")
ERROR( snap_storage_failed, 2501, "Failed to snapshot storage nodes")
ERROR( snap_tlog_failed, 2502, "Failed to snapshot TLog nodes")
ERROR( snap_coord_failed, 2503, "Failed to snapshot coordinator nodes")
ERROR( snap_enable_tlog_pop_failed, 2504, "Disk Snapshot error")
ERROR( snap_path_not_whitelisted, 2505, "Snapshot create binary path not whitelisted")
ERROR( snap_not_fully_recovered_unsupported, 2506, "Unsupported when the cluster is not fully recovered")
ERROR( snap_log_anti_quorum_unsupported, 2507, "Unsupported when log anti quorum is configured")
ERROR( snap_with_recovery_unsupported, 2508, "Cluster recovery during snapshot operation not supported")
// 4xxx Internal errors (those that should be generated only by bugs) are decimal 4xxx
ERROR( unknown_error, 4000, "An unknown error occurred" ) // C++ exception not of type Error
ERROR( internal_error, 4100, "An internal error occurred" )

View File

@ -1140,12 +1140,19 @@ inline FileIdentifier read_file_identifier(const uint8_t* in) {
return result;
}
namespace detail {
template <class T>
struct YesFileIdentifier {
constexpr static FileIdentifier file_identifier = FileIdentifierFor<T>::value;
};
struct NoFileIdentifier {};
}; // namespace detail
// members of unions must be tables in flatbuffers, so you can use this to
// introduce the indirection only when necessary.
template <class T>
struct EnsureTable {
static_assert(HasFileIdentifier<T>::value);
constexpr static FileIdentifier file_identifier = FileIdentifierFor<T>::value;
struct EnsureTable
: std::conditional_t<HasFileIdentifier<T>::value, detail::YesFileIdentifier<T>, detail::NoFileIdentifier> {
EnsureTable() = default;
EnsureTable(const T& t) : t(t) {}
template <class Archive>

View File

@ -249,10 +249,24 @@ void enableBuggify(bool enabled, BuggifyType type) {
buggifyActivated[int(type)] = enabled;
}
namespace {
// Simple message for flatbuffers unittests
struct Int {
constexpr static FileIdentifier file_identifier = 12345;
uint32_t value;
Int() = default;
Int(uint32_t value) : value(value) {}
template <class Ar>
void serialize(Ar& ar) {
serializer(ar, value);
}
};
} // namespace
TEST_CASE("/flow/FlatBuffers/ErrorOr") {
{
ErrorOr<int> in(worker_removed());
ErrorOr<int> out;
ErrorOr<Int> in(worker_removed());
ErrorOr<Int> out;
ObjectWriter writer(Unversioned());
writer.serialize(in);
Standalone<StringRef> copy = writer.toStringRef();
@ -262,23 +276,23 @@ TEST_CASE("/flow/FlatBuffers/ErrorOr") {
ASSERT(out.getError().code() == in.getError().code());
}
{
ErrorOr<uint32_t> in(deterministicRandom()->randomUInt32());
ErrorOr<uint32_t> out;
ErrorOr<Int> in(deterministicRandom()->randomUInt32());
ErrorOr<Int> out;
ObjectWriter writer(Unversioned());
writer.serialize(in);
Standalone<StringRef> copy = writer.toStringRef();
ArenaObjectReader reader(copy.arena(), copy, Unversioned());
reader.deserialize(out);
ASSERT(!out.isError());
ASSERT(out.get() == in.get());
ASSERT(out.get().value == in.get().value);
}
return Void();
}
TEST_CASE("/flow/FlatBuffers/Optional") {
{
Optional<int> in;
Optional<int> out;
Optional<Int> in;
Optional<Int> out;
ObjectWriter writer(Unversioned());
writer.serialize(in);
Standalone<StringRef> copy = writer.toStringRef();
@ -287,15 +301,15 @@ TEST_CASE("/flow/FlatBuffers/Optional") {
ASSERT(!out.present());
}
{
Optional<uint32_t> in(deterministicRandom()->randomUInt32());
Optional<uint32_t> out;
Optional<Int> in(deterministicRandom()->randomUInt32());
Optional<Int> out;
ObjectWriter writer(Unversioned());
writer.serialize(in);
Standalone<StringRef> copy = writer.toStringRef();
ArenaObjectReader reader(copy.arena(), copy, Unversioned());
reader.deserialize(out);
ASSERT(out.present());
ASSERT(out.get() == in.get());
ASSERT(out.get().value == in.get().value);
}
return Void();
}

View File

@ -32,6 +32,7 @@
enum class TaskPriority {
Max = 1000000,
ASIOReactor = 20001,
RunCycleFunction = 20000,
FlushTrace = 10500,
WriteSocket = 10000,

View File

@ -31,9 +31,15 @@ if [ "$1" = configure ]; then
fi
fi
# It would be better to use 'systemctl start foundationdb.service'.
# Since it does not work on Ubuntu 14.04, use this workaround as of now.
/etc/init.d/foundationdb start
# Start the service with systemd if it is available.
if pidof systemd > /dev/null; then
# Use deb-systemd-invoke if available to respect policy-rc.d.
systemctl=$(command -v deb-systemd-invoke || command -v systemctl)
systemctl --system daemon-reload > /dev/null || true
systemctl start foundationdb.service
else
/etc/init.d/foundationdb start
fi
if [ "$2" = "" ]; then
update-rc.d foundationdb defaults

View File

@ -32,7 +32,7 @@
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
<Product Name='$(var.Title)'
Id='{E2FB8839-9C35-4E40-AFB1-7409961781F7}'
Id='{7AD1AE5E-FD5B-42F3-A638-A81A963B1CE4}'
UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
Version='$(var.Version)'
Manufacturer='$(var.Manufacturer)'