Merge branch 'release-6.2' into features/toml11-docker
This commit is contained in:
commit
5b57f03eac
|
@ -89,3 +89,4 @@ flow/coveragetool/obj
|
|||
temp/
|
||||
/compile_commands.json
|
||||
/.ccls-cache
|
||||
.clangd/
|
|
@ -18,7 +18,7 @@
|
|||
# limitations under the License.
|
||||
cmake_minimum_required(VERSION 3.12)
|
||||
project(foundationdb
|
||||
VERSION 6.2.28
|
||||
VERSION 6.2.29
|
||||
DESCRIPTION "FoundationDB is a scalable, fault-tolerant, ordered key-value store with full ACID transactions."
|
||||
HOMEPAGE_URL "http://www.foundationdb.org/"
|
||||
LANGUAGES C CXX ASM)
|
||||
|
|
|
@ -1,17 +1,27 @@
|
|||
FROM centos:6
|
||||
|
||||
# Clean yum cache, disable default Base repo and enable Vault
|
||||
RUN yum clean all &&\
|
||||
sed -i -e 's/gpgcheck=1/enabled=0/g' /etc/yum.repos.d/CentOS-Base.repo &&\
|
||||
sed -i -e 's/enabled=0/enabled=1/g' /etc/yum.repos.d/CentOS-Vault.repo &&\
|
||||
sed -i -n '/6.1/q;p' /etc/yum.repos.d/CentOS-Vault.repo &&\
|
||||
sed -i -e "s/6\.0/$(cut -d\ -f3 /etc/redhat-release)/g" /etc/yum.repos.d/CentOS-Vault.repo &&\
|
||||
yum install -y yum-utils &&\
|
||||
yum-config-manager --enable rhel-server-rhscl-7-rpms &&\
|
||||
yum -y install centos-release-scl-rh epel-release \
|
||||
http://opensource.wandisco.com/centos/6/git/x86_64/wandisco-git-release-6-1.noarch.rpm &&\
|
||||
sed -i -e 's/#baseurl=/baseurl=/g' -e 's/mirror.centos.org/vault.centos.org/g' \
|
||||
-e 's/mirrorlist=/#mirrorlist=/g' /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo &&\
|
||||
yum clean all
|
||||
|
||||
# Install dependencies for developer tools, bindings,\
|
||||
# documentation, actorcompiler, and packaging tools\
|
||||
RUN yum install -y yum-utils &&\
|
||||
yum-config-manager --enable rhel-server-rhscl-7-rpms &&\
|
||||
yum -y install centos-release-scl epel-release \
|
||||
http://opensource.wandisco.com/centos/6/git/x86_64/wandisco-git-release-6-1.noarch.rpm &&\
|
||||
yum -y install devtoolset-8-8.1-1.el6 java-1.8.0-openjdk-devel \
|
||||
devtoolset-8-gcc-8.3.1 devtoolset-8-gcc-c++-8.3.1 \
|
||||
devtoolset-8-libubsan-devel devtoolset-8-libasan-devel devtoolset-8-valgrind-devel \
|
||||
rh-python36-python-devel rh-ruby24 golang python27 rpm-build \
|
||||
mono-core debbuild python-pip dos2unix valgrind-devel ccache \
|
||||
distcc wget git lz4 lz4-devel lz4-static &&\
|
||||
RUN yum -y install devtoolset-8-8.1-1.el6 java-1.8.0-openjdk-devel \
|
||||
devtoolset-8-gcc-8.3.1 devtoolset-8-gcc-c++-8.3.1 \
|
||||
devtoolset-8-libubsan-devel devtoolset-8-libasan-devel devtoolset-8-valgrind-devel \
|
||||
rh-python36-python-devel rh-ruby24 golang python27 rpm-build \
|
||||
mono-core debbuild python-pip dos2unix valgrind-devel ccache \
|
||||
distcc wget libxslt git lz4 lz4-devel lz4-static &&\
|
||||
pip install boto3==1.1.1
|
||||
|
||||
USER root
|
||||
|
@ -41,6 +51,8 @@ RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.1
|
|||
|
||||
# install Ninja
|
||||
RUN cd /tmp && curl -L https://github.com/ninja-build/ninja/archive/v1.9.0.zip -o ninja.zip &&\
|
||||
echo "8e2e654a418373f10c22e4cc9bdbe9baeca8527ace8d572e0b421e9d9b85b7ef ninja.zip" > /tmp/ninja-sha.txt &&\
|
||||
sha256sum -c /tmp/ninja-sha.txt &&\
|
||||
unzip ninja.zip && cd ninja-1.9.0 && scl enable devtoolset-8 -- ./configure.py --bootstrap && cp ninja /usr/bin &&\
|
||||
cd .. && rm -rf ninja-1.9.0 ninja.zip
|
||||
|
||||
|
@ -64,13 +76,48 @@ RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.
|
|||
echo "d573d2f15cdda883714f7e0bc87b814a8d4a53a82edde558f08f940e905541ee rocksdb.tar.gz" > rocksdb-sha.txt &&\
|
||||
sha256sum -c rocksdb-sha.txt && tar xf rocksdb.tar.gz && rm -rf rocksdb.tar.gz rocksdb-sha.txt
|
||||
|
||||
RUN cd /opt/ && curl -L https://github.com/manticoresoftware/manticoresearch/raw/master/misc/junit/ctest2junit.xsl -o ctest2junit.xsl
|
||||
|
||||
# Setting this environment variable switches from OpenSSL to BoringSSL
|
||||
#ENV OPENSSL_ROOT_DIR=/opt/boringssl
|
||||
|
||||
# install BoringSSL: TODO: They don't seem to have releases(?) I picked today's master SHA.
|
||||
RUN cd /opt &&\
|
||||
git clone https://boringssl.googlesource.com/boringssl &&\
|
||||
cd boringssl &&\
|
||||
git checkout e796cc65025982ed1fb9ef41b3f74e8115092816 &&\
|
||||
mkdir build
|
||||
|
||||
# ninja doesn't respect CXXFLAGS, and the boringssl CMakeLists doesn't expose an option to define __STDC_FORMAT_MACROS
|
||||
# also, enable -fPIC.
|
||||
# this is moderately uglier than creating a patchfile, but easier to maintain.
|
||||
RUN cd /opt/boringssl &&\
|
||||
for f in crypto/fipsmodule/rand/fork_detect_test.cc \
|
||||
include/openssl/bn.h \
|
||||
ssl/test/bssl_shim.cc ; do \
|
||||
perl -p -i -e 's/#include <inttypes.h>/#define __STDC_FORMAT_MACROS 1\n#include <inttypes.h>/g;' $f ; \
|
||||
done &&\
|
||||
perl -p -i -e 's/-Werror/-Werror -fPIC/' CMakeLists.txt &&\
|
||||
git diff
|
||||
|
||||
RUN cd /opt/boringssl/build &&\
|
||||
scl enable devtoolset-8 rh-python36 rh-ruby24 -- cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. &&\
|
||||
scl enable devtoolset-8 rh-python36 rh-ruby24 -- ninja &&\
|
||||
./ssl/ssl_test &&\
|
||||
mkdir -p ../lib && cp crypto/libcrypto.a ssl/libssl.a ../lib
|
||||
|
||||
# Localize time zone
|
||||
ARG TIMEZONEINFO=America/Los_Angeles
|
||||
RUN rm -f /etc/localtime && ln -s /usr/share/zoneinfo/${TIMEZONEINFO} /etc/localtime
|
||||
|
||||
LABEL version=0.1.20
|
||||
ENV DOCKER_IMAGEVER=0.1.20
|
||||
LABEL version=0.1.22
|
||||
ENV DOCKER_IMAGEVER=0.1.22
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
|
||||
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++
|
||||
|
||||
ENV CCACHE_NOHASHDIR=true
|
||||
ENV CCACHE_UMASK=0000
|
||||
ENV CCACHE_SLOPPINESS="file_macro,time_macros,include_file_mtime,include_file_ctime,file_stat_matches"
|
||||
|
||||
CMD scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
FROM foundationdb/foundationdb-build:0.1.19
|
||||
ARG IMAGE_TAG=0.1.21
|
||||
FROM foundationdb/foundationdb-build:${IMAGE_TAG}
|
||||
|
||||
USER root
|
||||
|
||||
|
@ -50,8 +51,8 @@ RUN cp -iv /usr/local/bin/clang++ /usr/local/bin/clang++.deref &&\
|
|||
ldconfig &&\
|
||||
rm -rf /mnt/artifacts
|
||||
|
||||
LABEL version=0.11.11
|
||||
ENV DOCKER_IMAGEVER=0.11.11
|
||||
LABEL version=0.11.13
|
||||
ENV DOCKER_IMAGEVER=0.11.13
|
||||
|
||||
ENV CLANGCC=/usr/local/bin/clang.de8a65ef
|
||||
ENV CLANGCXX=/usr/local/bin/clang++.de8a65ef
|
||||
|
@ -63,8 +64,5 @@ ENV CC=/usr/local/bin/clang.de8a65ef
|
|||
ENV CXX=/usr/local/bin/clang++.de8a65ef
|
||||
ENV USE_LD=LLD
|
||||
ENV USE_LIBCXX=1
|
||||
ENV CCACHE_NOHASHDIR=true
|
||||
ENV CCACHE_UMASK=0000
|
||||
ENV CCACHE_SLOPPINESS="file_macro,time_macros,include_file_mtime,include_file_ctime,file_stat_matches"
|
||||
|
||||
CMD scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash
|
||||
|
|
|
@ -2,7 +2,7 @@ version: "3"
|
|||
|
||||
services:
|
||||
common: &common
|
||||
image: foundationdb/foundationdb-build:0.1.20
|
||||
image: foundationdb/foundationdb-build:0.1.22
|
||||
|
||||
build-setup: &build-setup
|
||||
<<: *common
|
||||
|
|
|
@ -233,7 +233,6 @@ else()
|
|||
-Wno-unused-function
|
||||
-Wno-unused-local-typedef
|
||||
-Wno-unused-parameter
|
||||
-Wno-unused-value
|
||||
)
|
||||
if (USE_CCACHE)
|
||||
add_compile_options(
|
||||
|
|
|
@ -229,7 +229,7 @@ Furthermore, this version can only be called with a ``Database``, making it impo
|
|||
Note that by default, the operation will be retried an infinite number of times and the transaction will never time out. It is therefore recommended that the client choose a default transaction retry limit or timeout value that is suitable for their application. This can be set either at the transaction level using the ``SetRetryLimit`` or ``SetTimeout`` transaction options or at the database level with the ``SetTransactionRetryLimit`` or ``SetTransactionTimeout`` database options. For example, one can set a one minute timeout on each transaction and a default retry limit of 100 by calling::
|
||||
|
||||
db.Options().SetTransactionTimeout(60000) // 60,000 ms = 1 minute
|
||||
db.Options().SetRetryLimit(100)
|
||||
db.Options().SetTransactionRetryLimit(100)
|
||||
|
||||
Making some sample classes
|
||||
--------------------------
|
||||
|
|
|
@ -157,7 +157,7 @@ If instead you pass a :class:`Transaction` for the :class:`TransactionContext` p
|
|||
Note that by default, the operation will be retried an infinite number of times and the transaction will never time out. It is therefore recommended that the client choose a default transaction retry limit or timeout value that is suitable for their application. This can be set either at the transaction level using the ``setRetryLimit`` or ``setTimeout`` transaction options or at the database level with the ``setTransactionRetryLimit`` or ``setTransactionTimeout`` database options. For example, one can set a one minute timeout on each transaction and a default retry limit of 100 by calling::
|
||||
|
||||
db.options().setTransactionTimeout(60000); // 60,000 ms = 1 minute
|
||||
db.options().setRetryLimit(100);
|
||||
db.options().setTransactionRetryLimit(100);
|
||||
|
||||
Making some sample classes
|
||||
--------------------------
|
||||
|
@ -444,7 +444,7 @@ Here's the code for the scheduling tutorial:
|
|||
fdb = FDB.selectAPIVersion(620);
|
||||
db = fdb.open();
|
||||
db.options().setTransactionTimeout(60000); // 60,000 ms = 1 minute
|
||||
db.options().setRetryLimit(100);
|
||||
db.options().setTransactionRetryLimit(100);
|
||||
}
|
||||
|
||||
// Generate 1,620 classes like '9:00 chem for dummies'
|
||||
|
|
|
@ -126,7 +126,7 @@ If instead you pass a :class:`Transaction` for the ``db_or_tr`` parameter, the t
|
|||
Note that by default, the operation will be retried an infinite number of times and the transaction will never time out. It is therefore recommended that the client choose a default transaction retry limit or timeout value that is suitable for their application. This can be set either at the transaction level using the ``set_retry_limit`` or ``set_timeout`` transaction options or at the database level with the ``set_transaction_retry_limit`` or ``set_transaction_timeout`` database options. For example, one can set a one minute timeout on each transaction and a default retry limit of 100 by calling::
|
||||
|
||||
@db.options.set_transaction_timeout(60000) # 60,000 ms = 1 minute
|
||||
@db.options.set_retry_limit(100)
|
||||
@db.options.set_transaction_retry_limit(100)
|
||||
|
||||
Making some sample classes
|
||||
--------------------------
|
||||
|
|
|
@ -10,38 +10,38 @@ macOS
|
|||
|
||||
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
|
||||
|
||||
* `FoundationDB-6.2.27.pkg <https://www.foundationdb.org/downloads/6.2.27/macOS/installers/FoundationDB-6.2.27.pkg>`_
|
||||
* `FoundationDB-6.2.28.pkg <https://www.foundationdb.org/downloads/6.2.28/macOS/installers/FoundationDB-6.2.28.pkg>`_
|
||||
|
||||
Ubuntu
|
||||
------
|
||||
|
||||
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
|
||||
|
||||
* `foundationdb-clients-6.2.27-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.27/ubuntu/installers/foundationdb-clients_6.2.27-1_amd64.deb>`_
|
||||
* `foundationdb-server-6.2.27-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.27/ubuntu/installers/foundationdb-server_6.2.27-1_amd64.deb>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.2.28-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.28/ubuntu/installers/foundationdb-clients_6.2.28-1_amd64.deb>`_
|
||||
* `foundationdb-server-6.2.28-1_amd64.deb <https://www.foundationdb.org/downloads/6.2.28/ubuntu/installers/foundationdb-server_6.2.28-1_amd64.deb>`_ (depends on the clients package)
|
||||
|
||||
RHEL/CentOS EL6
|
||||
---------------
|
||||
|
||||
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
|
||||
|
||||
* `foundationdb-clients-6.2.27-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.27/rhel6/installers/foundationdb-clients-6.2.27-1.el6.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.2.27-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.27/rhel6/installers/foundationdb-server-6.2.27-1.el6.x86_64.rpm>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.2.28-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.28/rhel6/installers/foundationdb-clients-6.2.28-1.el6.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.2.28-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.28/rhel6/installers/foundationdb-server-6.2.28-1.el6.x86_64.rpm>`_ (depends on the clients package)
|
||||
|
||||
RHEL/CentOS EL7
|
||||
---------------
|
||||
|
||||
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
|
||||
|
||||
* `foundationdb-clients-6.2.27-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.27/rhel7/installers/foundationdb-clients-6.2.27-1.el7.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.2.27-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.27/rhel7/installers/foundationdb-server-6.2.27-1.el7.x86_64.rpm>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.2.28-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.28/rhel7/installers/foundationdb-clients-6.2.28-1.el7.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.2.28-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.2.28/rhel7/installers/foundationdb-server-6.2.28-1.el7.x86_64.rpm>`_ (depends on the clients package)
|
||||
|
||||
Windows
|
||||
-------
|
||||
|
||||
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
|
||||
|
||||
* `foundationdb-6.2.27-x64.msi <https://www.foundationdb.org/downloads/6.2.27/windows/installers/foundationdb-6.2.27-x64.msi>`_
|
||||
* `foundationdb-6.2.28-x64.msi <https://www.foundationdb.org/downloads/6.2.28/windows/installers/foundationdb-6.2.28-x64.msi>`_
|
||||
|
||||
API Language Bindings
|
||||
=====================
|
||||
|
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
|
|||
|
||||
If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package:
|
||||
|
||||
* `foundationdb-6.2.27.tar.gz <https://www.foundationdb.org/downloads/6.2.27/bindings/python/foundationdb-6.2.27.tar.gz>`_
|
||||
* `foundationdb-6.2.28.tar.gz <https://www.foundationdb.org/downloads/6.2.28/bindings/python/foundationdb-6.2.28.tar.gz>`_
|
||||
|
||||
Ruby 1.9.3/2.0.0+
|
||||
-----------------
|
||||
|
||||
* `fdb-6.2.27.gem <https://www.foundationdb.org/downloads/6.2.27/bindings/ruby/fdb-6.2.27.gem>`_
|
||||
* `fdb-6.2.28.gem <https://www.foundationdb.org/downloads/6.2.28/bindings/ruby/fdb-6.2.28.gem>`_
|
||||
|
||||
Java 8+
|
||||
-------
|
||||
|
||||
* `fdb-java-6.2.27.jar <https://www.foundationdb.org/downloads/6.2.27/bindings/java/fdb-java-6.2.27.jar>`_
|
||||
* `fdb-java-6.2.27-javadoc.jar <https://www.foundationdb.org/downloads/6.2.27/bindings/java/fdb-java-6.2.27-javadoc.jar>`_
|
||||
* `fdb-java-6.2.28.jar <https://www.foundationdb.org/downloads/6.2.28/bindings/java/fdb-java-6.2.28.jar>`_
|
||||
* `fdb-java-6.2.28-javadoc.jar <https://www.foundationdb.org/downloads/6.2.28/bindings/java/fdb-java-6.2.28-javadoc.jar>`_
|
||||
|
||||
Go 1.11+
|
||||
--------
|
||||
|
|
|
@ -4,12 +4,17 @@
|
|||
Release Notes
|
||||
#############
|
||||
|
||||
6.2.29
|
||||
======
|
||||
* Fix invalid memory access on data distributor when snapshotting large clusters. `(PR #4076) <https://github.com/apple/foundationdb/pull/4076>`_
|
||||
* Add human-readable DateTime to trace events `(PR #4087) <https://github.com/apple/foundationdb/pull/4087>`_
|
||||
* Proxy rejects transaction batch that exceeds MVCC window `(PR #4113) <https://github.com/apple/foundationdb/pull/4113>`_
|
||||
|
||||
6.2.28
|
||||
======
|
||||
* Log detailed team collection information when median available space ratio of all teams is too low. `(PR #3912) <https://github.com/apple/foundationdb/pull/3912>`_
|
||||
* Bug fix, blob client did not support authentication key sizes over 64 bytes. `(PR #3964) <https://github.com/apple/foundationdb/pull/3964>`_
|
||||
|
||||
|
||||
6.2.27
|
||||
======
|
||||
* For clusters with a large number of shards, avoid slow tasks in the data distributor by adding yields to the shard map destruction. `(PR #3834) <https://github.com/apple/foundationdb/pull/3834>`_
|
||||
|
|
|
@ -37,18 +37,23 @@ typedef StringRef ValueRef;
|
|||
typedef int64_t Generation;
|
||||
|
||||
enum {
|
||||
tagLocalitySpecial = -1,
|
||||
tagLocalitySpecial = -1, // tag with this locality means it is invalidTag (id=0), txsTag (id=1), or cacheTag (id=2)
|
||||
tagLocalityLogRouter = -2,
|
||||
tagLocalityRemoteLog = -3,
|
||||
tagLocalityRemoteLog = -3, // tag created by log router for remote tLogs
|
||||
tagLocalityUpgraded = -4,
|
||||
tagLocalitySatellite = -5,
|
||||
tagLocalityLogRouterMapped = -6,
|
||||
tagLocalityLogRouterMapped = -6, // The pseudo tag used by log routers to pop the real LogRouter tag (i.e., -2)
|
||||
tagLocalityTxs = -7,
|
||||
tagLocalityInvalid = -99
|
||||
}; //The TLog and LogRouter require these number to be as compact as possible
|
||||
}; // The TLog and LogRouter require these number to be as compact as possible
|
||||
|
||||
#pragma pack(push, 1)
|
||||
struct Tag {
|
||||
// if locality > 0,
|
||||
// locality decides which DC id the tLog is in;
|
||||
// id decides which SS owns the tag; id <-> SS mapping is in the system keyspace: serverTagKeys.
|
||||
// if locality < 0, locality decides the type of tLog set: satellite, LR, or remote tLog, etc.
|
||||
// id decides which tLog in the tLog type will be used.
|
||||
int8_t locality;
|
||||
uint16_t id;
|
||||
|
||||
|
@ -138,6 +143,10 @@ static std::string describe( Reference<T> const& item ) {
|
|||
return item->toString();
|
||||
}
|
||||
|
||||
static std::string describe(UID const& item) {
|
||||
return item.shortString();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static std::string describe( T const& item ) {
|
||||
return item.toString();
|
||||
|
|
|
@ -1230,6 +1230,12 @@ ACTOR Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLoca
|
|||
}
|
||||
}
|
||||
|
||||
// Get the SS locations for each shard in the 'keys' key-range;
|
||||
// Returned vector size is the number of shards in the input keys key-range.
|
||||
// Returned vector element is <ShardRange, storage server location info> pairs, where
|
||||
// ShardRange is the whole shard key-range, not a part of the given key range.
|
||||
// Example: If query the function with key range (b, d), the returned list of pairs could be something like:
|
||||
// [([a, b1), locationInfo), ([b1, c), locationInfo), ([c, d1), locationInfo)].
|
||||
template <class F>
|
||||
Future< vector< pair<KeyRange,Reference<LocationInfo>> > > getKeyRangeLocations( Database const& cx, KeyRange const& keys, int limit, bool reverse, F StorageServerInterface::*member, TransactionInfo const& info ) {
|
||||
ASSERT (!keys.empty());
|
||||
|
@ -1896,7 +1902,6 @@ ACTOR Future<Standalone<RangeResultRef>> getRange( Database cx, Reference<Transa
|
|||
}
|
||||
|
||||
++cx->transactionPhysicalReads;
|
||||
++cx->transactionGetRangeRequests;
|
||||
state GetKeyValuesReply rep;
|
||||
try {
|
||||
if (CLIENT_BUGGIFY) {
|
||||
|
|
|
@ -220,18 +220,36 @@ ACTOR Future<Void> pingLatencyLogger(TransportData* self) {
|
|||
if(!peer) {
|
||||
TraceEvent(SevWarnAlways, "MissingNetworkAddress").suppressFor(10.0).detail("PeerAddr", lastAddress);
|
||||
}
|
||||
if (peer->lastLoggedTime <= 0.0) {
|
||||
peer->lastLoggedTime = peer->lastConnectTime;
|
||||
}
|
||||
|
||||
if(peer && peer->pingLatencies.getPopulationSize() >= 10) {
|
||||
TraceEvent("PingLatency")
|
||||
.detail("PeerAddr", lastAddress)
|
||||
.detail("MinLatency", peer->pingLatencies.min())
|
||||
.detail("MaxLatency", peer->pingLatencies.max())
|
||||
.detail("MeanLatency", peer->pingLatencies.mean())
|
||||
.detail("MedianLatency", peer->pingLatencies.median())
|
||||
.detail("P90Latency", peer->pingLatencies.percentile(0.90))
|
||||
.detail("Count", peer->pingLatencies.getPopulationSize())
|
||||
.detail("BytesReceived", peer->bytesReceived - peer->lastLoggedBytesReceived)
|
||||
.detail("BytesSent", peer->bytesSent - peer->lastLoggedBytesSent);
|
||||
.detail("Elapsed", now() - peer->lastLoggedTime)
|
||||
.detail("PeerAddr", lastAddress)
|
||||
.detail("MinLatency", peer->pingLatencies.min())
|
||||
.detail("MaxLatency", peer->pingLatencies.max())
|
||||
.detail("MeanLatency", peer->pingLatencies.mean())
|
||||
.detail("MedianLatency", peer->pingLatencies.median())
|
||||
.detail("P90Latency", peer->pingLatencies.percentile(0.90))
|
||||
.detail("Count", peer->pingLatencies.getPopulationSize())
|
||||
.detail("BytesReceived", peer->bytesReceived - peer->lastLoggedBytesReceived)
|
||||
.detail("BytesSent", peer->bytesSent - peer->lastLoggedBytesSent)
|
||||
.detail("ConnectOutgoingCount", peer->connectOutgoingCount)
|
||||
.detail("ConnectIncomingCount", peer->connectIncomingCount)
|
||||
.detail("ConnectFailedCount", peer->connectFailedCount)
|
||||
.detail("ConnectMinLatency", peer->connectLatencies.min())
|
||||
.detail("ConnectMaxLatency", peer->connectLatencies.max())
|
||||
.detail("ConnectMeanLatency", peer->connectLatencies.mean())
|
||||
.detail("ConnectMedianLatency", peer->connectLatencies.median())
|
||||
.detail("ConnectP90Latency", peer->connectLatencies.percentile(0.90));
|
||||
peer->lastLoggedTime = now();
|
||||
peer->connectOutgoingCount = 0;
|
||||
peer->connectIncomingCount = 0;
|
||||
peer->connectFailedCount = 0;
|
||||
peer->pingLatencies.clear();
|
||||
peer->connectLatencies.clear();
|
||||
peer->lastLoggedBytesReceived = peer->bytesReceived;
|
||||
peer->lastLoggedBytesSent = peer->bytesSent;
|
||||
wait(delay(FLOW_KNOBS->PING_LOGGING_INTERVAL));
|
||||
|
@ -476,7 +494,7 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
|
|||
std::max(0.0, self->lastConnectTime + self->reconnectionDelay -
|
||||
now()))); // Don't connect() to the same peer more than once per 2 sec
|
||||
self->lastConnectTime = now();
|
||||
|
||||
++self->connectOutgoingCount;
|
||||
TraceEvent("ConnectingTo", conn ? conn->getDebugID() : UID()).suppressFor(1.0).detail("PeerAddr", self->destination);
|
||||
|
||||
try {
|
||||
|
@ -484,6 +502,7 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
|
|||
when( Reference<IConnection> _conn = wait( INetworkConnections::net()->connect(self->destination) ) ) {
|
||||
conn = _conn;
|
||||
wait(conn->connectHandshake());
|
||||
self->connectLatencies.addSample(now() - self->lastConnectTime);
|
||||
if (FlowTransport::isClient()) {
|
||||
IFailureMonitor::failureMonitor().setStatus(self->destination, FailureStatus(false));
|
||||
}
|
||||
|
@ -505,6 +524,7 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
|
|||
}
|
||||
}
|
||||
} catch( Error &e ) {
|
||||
++self->connectFailedCount;
|
||||
if(e.code() != error_code_connection_failed) {
|
||||
throw;
|
||||
}
|
||||
|
@ -648,6 +668,7 @@ void Peer::discardUnreliablePackets() {
|
|||
void Peer::onIncomingConnection( Reference<Peer> self, Reference<IConnection> conn, Future<Void> reader ) {
|
||||
// In case two processes are trying to connect to each other simultaneously, the process with the larger canonical NetworkAddress
|
||||
// gets to keep its outgoing connection.
|
||||
++self->connectIncomingCount;
|
||||
if ( !destination.isPublic() && !outgoingConnectionIdle ) throw address_in_use();
|
||||
NetworkAddress compatibleAddr = transport->localAddresses.address;
|
||||
if(transport->localAddresses.secondaryAddress.present() && transport->localAddresses.secondaryAddress.get().isTLS() == destination.isTLS()) {
|
||||
|
|
|
@ -127,15 +127,23 @@ struct Peer : public ReferenceCounted<Peer> {
|
|||
double lastDataPacketSentTime;
|
||||
int outstandingReplies;
|
||||
ContinuousSample<double> pingLatencies;
|
||||
double lastLoggedTime;
|
||||
int64_t lastLoggedBytesReceived;
|
||||
int64_t lastLoggedBytesSent;
|
||||
|
||||
// Cleared every time stats are logged for this peer.
|
||||
int connectOutgoingCount;
|
||||
int connectIncomingCount;
|
||||
int connectFailedCount;
|
||||
ContinuousSample<double> connectLatencies;
|
||||
|
||||
explicit Peer(TransportData* transport, NetworkAddress const& destination)
|
||||
: transport(transport), destination(destination), outgoingConnectionIdle(true), lastConnectTime(0.0),
|
||||
reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), compatible(true), outstandingReplies(0),
|
||||
incompatibleProtocolVersionNewer(false), peerReferences(-1), bytesReceived(0), lastDataPacketSentTime(now()),
|
||||
pingLatencies(destination.isPublic() ? FLOW_KNOBS->PING_SAMPLE_AMOUNT : 1), lastLoggedBytesReceived(0),
|
||||
bytesSent(0), lastLoggedBytesSent(0) {}
|
||||
pingLatencies(destination.isPublic() ? FLOW_KNOBS->PING_SAMPLE_AMOUNT : 1), lastLoggedBytesReceived(0),
|
||||
bytesSent(0), lastLoggedBytesSent(0), lastLoggedTime(0.0), connectOutgoingCount(0), connectIncomingCount(0),
|
||||
connectFailedCount(0), connectLatencies(destination.isPublic() ? FLOW_KNOBS->NETWORK_CONNECT_SAMPLE_AMOUNT : 1) {}
|
||||
|
||||
void send(PacketBuffer* pb, ReliablePacket* rp, bool firstUnsent);
|
||||
|
||||
|
|
|
@ -171,14 +171,11 @@ void addLaggingRequest(Future<Optional<Reply>> reply, Promise<Void> requestFinis
|
|||
// failMon's information for load balancing and avoiding failed servers
|
||||
// If ALL the servers are failed and the list of servers is not fresh, throws an exception to let the caller refresh the list of servers
|
||||
ACTOR template <class Interface, class Request, class Multi>
|
||||
Future< REPLY_TYPE(Request) > loadBalance(
|
||||
Reference<MultiInterface<Multi>> alternatives,
|
||||
RequestStream<Request> Interface::* channel,
|
||||
Request request = Request(),
|
||||
TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint,
|
||||
bool atMostOnce = false, // if true, throws request_maybe_delivered() instead of retrying automatically
|
||||
QueueModel* model = NULL)
|
||||
{
|
||||
Future<REPLY_TYPE(Request)> loadBalance(
|
||||
Reference<MultiInterface<Multi>> alternatives, RequestStream<Request> Interface::*channel,
|
||||
Request request = Request(), TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint,
|
||||
bool atMostOnce = false, // if true, throws request_maybe_delivered() instead of retrying automatically
|
||||
QueueModel* model = NULL) {
|
||||
state Future<Optional<REPLY_TYPE(Request)>> firstRequest;
|
||||
state Optional<uint64_t> firstRequestEndpoint;
|
||||
state Future<Optional<REPLY_TYPE(Request)>> secondRequest;
|
||||
|
|
|
@ -76,7 +76,9 @@ void CounterCollection::logToTraceEvent(TraceEvent &te) const {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> traceCounters(std::string traceEventName, UID traceEventID, double interval, CounterCollection* counters, std::string trackLatestName) {
|
||||
ACTOR Future<Void> traceCounters(std::string traceEventName, UID traceEventID, double interval,
|
||||
CounterCollection* counters, std::string trackLatestName,
|
||||
std::function<void(TraceEvent&)> decorator) {
|
||||
wait(delay(0)); // Give an opportunity for all members used in special counters to be initialized
|
||||
|
||||
for (ICounter* c : counters->counters)
|
||||
|
@ -89,6 +91,7 @@ ACTOR Future<Void> traceCounters(std::string traceEventName, UID traceEventID, d
|
|||
te.detail("Elapsed", now() - last_interval);
|
||||
|
||||
counters->logToTraceEvent(te);
|
||||
decorator(te);
|
||||
|
||||
if (!trackLatestName.empty()) {
|
||||
te.trackLatest(trackLatestName);
|
||||
|
|
|
@ -132,7 +132,9 @@ struct SpecialCounter : ICounter, FastAllocated<SpecialCounter<F>>, NonCopyable
|
|||
template <class F>
|
||||
static void specialCounter(CounterCollection& collection, std::string const& name, F && f) { new SpecialCounter<F>(collection, name, std::move(f)); }
|
||||
|
||||
Future<Void> traceCounters(std::string const& traceEventName, UID const& traceEventID, double const& interval, CounterCollection* const& counters, std::string const& trackLatestName = std::string());
|
||||
Future<Void> traceCounters(std::string const& traceEventName, UID const& traceEventID, double const& interval,
|
||||
CounterCollection* const& counters, std::string const& trackLatestName = std::string(),
|
||||
std::function<void(TraceEvent&)> const& decorator = [](TraceEvent& te) {});
|
||||
|
||||
class LatencyBands {
|
||||
public:
|
||||
|
|
|
@ -102,8 +102,6 @@ bool onlyBeforeSimulatorInit() {
|
|||
|
||||
const UID TOKEN_ENDPOINT_NOT_FOUND(-1, -1);
|
||||
|
||||
ISimulator* g_pSimulator = 0;
|
||||
thread_local ISimulator::ProcessInfo* ISimulator::currentProcess = 0;
|
||||
int openCount = 0;
|
||||
|
||||
struct SimClogging {
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "flow/flow.h"
|
||||
#include "flow/Histogram.h"
|
||||
#include "fdbrpc/FailureMonitor.h"
|
||||
#include "fdbrpc/Locality.h"
|
||||
#include "fdbrpc/IAsyncFile.h"
|
||||
|
@ -54,6 +55,7 @@ public:
|
|||
LocalityData locality;
|
||||
ProcessClass startingClass;
|
||||
TDMetricCollection tdmetrics;
|
||||
HistogramRegistry histograms;
|
||||
std::map<NetworkAddress, Reference<IListener>> listenerMap;
|
||||
bool failed;
|
||||
bool excluded;
|
||||
|
|
|
@ -146,8 +146,10 @@ public:
|
|||
vector<Reference<TCMachineInfo>> machines;
|
||||
vector<Standalone<StringRef>> machineIDs;
|
||||
vector<Reference<TCTeamInfo>> serverTeams;
|
||||
UID id;
|
||||
|
||||
explicit TCMachineTeamInfo(vector<Reference<TCMachineInfo>> const& machines) : machines(machines) {
|
||||
explicit TCMachineTeamInfo(vector<Reference<TCMachineInfo>> const& machines)
|
||||
: machines(machines), id(deterministicRandom()->randomUniqueID()) {
|
||||
machineIDs.reserve(machines.size());
|
||||
for (int i = 0; i < machines.size(); i++) {
|
||||
machineIDs.push_back(machines[i]->machineID);
|
||||
|
@ -180,6 +182,7 @@ class TCTeamInfo : public ReferenceCounted<TCTeamInfo>, public IDataDistribution
|
|||
private:
|
||||
vector< Reference<TCServerInfo> > servers;
|
||||
vector<UID> serverIDs;
|
||||
UID id;
|
||||
|
||||
public:
|
||||
Reference<TCMachineTeamInfo> machineTeam;
|
||||
|
@ -189,7 +192,8 @@ public:
|
|||
int priority;
|
||||
|
||||
explicit TCTeamInfo(vector<Reference<TCServerInfo>> const& servers)
|
||||
: servers(servers), healthy(true), priority(SERVER_KNOBS->PRIORITY_TEAM_HEALTHY), wrongConfiguration(false) {
|
||||
: servers(servers), healthy(true), priority(SERVER_KNOBS->PRIORITY_TEAM_HEALTHY), wrongConfiguration(false),
|
||||
id(deterministicRandom()->randomUniqueID()) {
|
||||
if (servers.empty()) {
|
||||
TraceEvent(SevInfo, "ConstructTCTeamFromEmptyServers");
|
||||
}
|
||||
|
@ -199,6 +203,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
std::string getTeamID() override { return id.shortString(); }
|
||||
|
||||
virtual vector<StorageServerInterface> getLastKnownServerInterfaces() {
|
||||
vector<StorageServerInterface> v;
|
||||
v.reserve(servers.size());
|
||||
|
@ -631,6 +637,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
int highestUtilizationTeam;
|
||||
|
||||
AsyncTrigger printDetailedTeamsInfo;
|
||||
PromiseStream<GetMetricsRequest> getShardMetrics;
|
||||
|
||||
void resetLocalitySet() {
|
||||
storageServerSet = Reference<LocalitySet>(new LocalityMap<UID>());
|
||||
|
@ -662,7 +669,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
DatabaseConfiguration configuration, std::vector<Optional<Key>> includedDCs,
|
||||
Optional<std::vector<Optional<Key>>> otherTrackedDCs, Future<Void> readyToStart,
|
||||
Reference<AsyncVar<bool>> zeroHealthyTeams, bool primary,
|
||||
Reference<AsyncVar<bool>> processingUnhealthy)
|
||||
Reference<AsyncVar<bool>> processingUnhealthy, PromiseStream<GetMetricsRequest> getShardMetrics)
|
||||
: cx(cx), distributorId(distributorId), lock(lock), output(output),
|
||||
shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), doBuildTeams(true), lastBuildTeamsFailed(false),
|
||||
teamBuilder(Void()), badTeamRemover(Void()), redundantMachineTeamRemover(Void()),
|
||||
|
@ -675,8 +682,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
initializationDoneActor(logOnCompletion(readyToStart && initialFailureReactionDelay, this)),
|
||||
optimalTeamCount(0), recruitingStream(0), restartRecruiting(SERVER_KNOBS->DEBOUNCE_RECRUITING_DELAY),
|
||||
unhealthyServers(0), includedDCs(includedDCs), otherTrackedDCs(otherTrackedDCs),
|
||||
zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary), medianAvailableSpace(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO),
|
||||
lastMedianAvailableSpaceUpdate(0), processingUnhealthy(processingUnhealthy), lowestUtilizationTeam(0), highestUtilizationTeam(0) {
|
||||
zeroHealthyTeams(zeroHealthyTeams), zeroOptimalTeams(true), primary(primary),
|
||||
medianAvailableSpace(SERVER_KNOBS->MIN_AVAILABLE_SPACE_RATIO), lastMedianAvailableSpaceUpdate(0),
|
||||
processingUnhealthy(processingUnhealthy), lowestUtilizationTeam(0), highestUtilizationTeam(0),
|
||||
getShardMetrics(getShardMetrics) {
|
||||
if(!primary || configuration.usableRegions == 1) {
|
||||
TraceEvent("DDTrackerStarting", distributorId)
|
||||
.detail( "State", "Inactive" )
|
||||
|
@ -1352,7 +1361,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
.detail("TeamIndex", i++)
|
||||
.detail("Healthy", team->isHealthy())
|
||||
.detail("TeamSize", team->size())
|
||||
.detail("MemberIDs", team->getServerIDsStr());
|
||||
.detail("MemberIDs", team->getServerIDsStr())
|
||||
.detail("TeamID", team->getTeamID());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2094,7 +2104,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
.detail("Primary", primary)
|
||||
.detail("AddedTeams", 0)
|
||||
.detail("TeamsToBuild", 0)
|
||||
.detail("CurrentTeams", teams.size())
|
||||
.detail("CurrentServerTeams", teams.size())
|
||||
.detail("DesiredTeams", desiredServerTeams)
|
||||
.detail("MaxTeams", maxServerTeams)
|
||||
.detail("StorageTeamSize", configuration.storageTeamSize)
|
||||
|
@ -2143,11 +2153,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
}
|
||||
}
|
||||
uniqueMachines = machines.size();
|
||||
TraceEvent("BuildTeams")
|
||||
.detail("ServerCount", self->server_info.size())
|
||||
.detail("UniqueMachines", uniqueMachines)
|
||||
.detail("Primary", self->primary)
|
||||
.detail("StorageTeamSize", self->configuration.storageTeamSize);
|
||||
TraceEvent("BuildTeams", self->distributorId)
|
||||
.detail("ServerCount", self->server_info.size())
|
||||
.detail("UniqueMachines", uniqueMachines)
|
||||
.detail("Primary", self->primary)
|
||||
.detail("StorageTeamSize", self->configuration.storageTeamSize);
|
||||
|
||||
// If there are too few machines to even build teams or there are too few represented datacenters, build no new teams
|
||||
if( uniqueMachines >= self->configuration.storageTeamSize ) {
|
||||
|
@ -2174,11 +2184,11 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
.detail("TeamsToBuild", teamsToBuild)
|
||||
.detail("DesiredTeams", desiredTeams)
|
||||
.detail("MaxTeams", maxTeams)
|
||||
.detail("BadTeams", self->badTeams.size())
|
||||
.detail("BadServerTeams", self->badTeams.size())
|
||||
.detail("UniqueMachines", uniqueMachines)
|
||||
.detail("TeamSize", self->configuration.storageTeamSize)
|
||||
.detail("Servers", serverCount)
|
||||
.detail("CurrentTrackedTeams", self->teams.size())
|
||||
.detail("CurrentTrackedServerTeams", self->teams.size())
|
||||
.detail("HealthyTeamCount", teamCount)
|
||||
.detail("TotalTeamCount", totalTeamCount)
|
||||
.detail("MachineTeamCount", self->machineTeams.size())
|
||||
|
@ -2195,9 +2205,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
int addedTeams = self->addTeamsBestOf(teamsToBuild, desiredTeams, maxTeams);
|
||||
|
||||
if (addedTeams <= 0 && self->teams.size() == 0) {
|
||||
TraceEvent(SevWarn, "NoTeamAfterBuildTeam")
|
||||
.detail("TeamNum", self->teams.size())
|
||||
.detail("Debug", "Check information below");
|
||||
TraceEvent(SevWarn, "NoTeamAfterBuildTeam", self->distributorId)
|
||||
.detail("ServerTeamNum", self->teams.size())
|
||||
.detail("Debug", "Check information below");
|
||||
// Debug: set true for traceAllInfo() to print out more information
|
||||
self->traceAllInfo();
|
||||
}
|
||||
|
@ -2215,7 +2225,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
.detail("Primary", self->primary)
|
||||
.detail("AddedTeams", 0)
|
||||
.detail("TeamsToBuild", teamsToBuild)
|
||||
.detail("CurrentTeams", self->teams.size())
|
||||
.detail("CurrentServerTeams", self->teams.size())
|
||||
.detail("DesiredTeams", desiredTeams)
|
||||
.detail("MaxTeams", maxTeams)
|
||||
.detail("StorageTeamSize", self->configuration.storageTeamSize)
|
||||
|
@ -2254,9 +2264,9 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
}
|
||||
|
||||
TraceEvent(SevWarn, "NoHealthyTeams", distributorId)
|
||||
.detail("CurrentTeamCount", teams.size())
|
||||
.detail("ServerCount", server_info.size())
|
||||
.detail("NonFailedServerCount", desiredServerSet.size());
|
||||
.detail("CurrentServerTeamCount", teams.size())
|
||||
.detail("ServerCount", server_info.size())
|
||||
.detail("NonFailedServerCount", desiredServerSet.size());
|
||||
}
|
||||
|
||||
bool shouldHandleServer(const StorageServerInterface &newServer) {
|
||||
|
@ -2284,7 +2294,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
}
|
||||
|
||||
bool removeTeam( Reference<TCTeamInfo> team ) {
|
||||
TraceEvent("RemovedTeam", distributorId).detail("Team", team->getDesc());
|
||||
TraceEvent("RemovedServerTeam", distributorId).detail("Team", team->getDesc());
|
||||
bool found = false;
|
||||
for(int t=0; t<teams.size(); t++) {
|
||||
if( teams[t] == team ) {
|
||||
|
@ -2478,9 +2488,10 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
int removedCount = 0;
|
||||
for (int t = 0; t < teams.size(); t++) {
|
||||
if ( std::count( teams[t]->getServerIDs().begin(), teams[t]->getServerIDs().end(), removedServer ) ) {
|
||||
TraceEvent("TeamRemoved")
|
||||
TraceEvent("ServerTeamRemoved")
|
||||
.detail("Primary", primary)
|
||||
.detail("TeamServerIDs", teams[t]->getServerIDsStr());
|
||||
.detail("TeamServerIDs", teams[t]->getServerIDsStr())
|
||||
.detail("TeamID", teams[t]->getTeamID());
|
||||
// removeTeam also needs to remove the team from the machine team info.
|
||||
removeTeam(teams[t]);
|
||||
t--;
|
||||
|
@ -2547,8 +2558,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
|||
restartTeamBuilder.trigger();
|
||||
|
||||
TraceEvent("DataDistributionTeamCollectionUpdate", distributorId)
|
||||
.detail("Teams", teams.size())
|
||||
.detail("BadTeams", badTeams.size())
|
||||
.detail("ServerTeams", teams.size())
|
||||
.detail("BadServerTeams", badTeams.size())
|
||||
.detail("Servers", allServers.size())
|
||||
.detail("Machines", machine_info.size())
|
||||
.detail("MachineTeams", machineTeams.size())
|
||||
|
@ -2772,7 +2783,7 @@ ACTOR Future<Void> removeBadTeams(DDTeamCollection* self) {
|
|||
wait(self->initialFailureReactionDelay);
|
||||
wait(waitUntilHealthy(self));
|
||||
wait(self->addSubsetComplete.getFuture());
|
||||
TraceEvent("DDRemovingBadTeams", self->distributorId).detail("Primary", self->primary);
|
||||
TraceEvent("DDRemovingBadServerTeams", self->distributorId).detail("Primary", self->primary);
|
||||
for(auto it : self->badTeams) {
|
||||
it->tracker.cancel();
|
||||
}
|
||||
|
@ -2842,9 +2853,9 @@ ACTOR Future<Void> machineTeamRemover(DDTeamCollection* self) {
|
|||
// Check if a server will have 0 team after the team is removed
|
||||
for (auto& s : team->getServers()) {
|
||||
if (s->teams.size() == 0) {
|
||||
TraceEvent(SevError, "TeamRemoverTooAggressive")
|
||||
TraceEvent(SevError, "MachineTeamRemoverTooAggressive", self->distributorId)
|
||||
.detail("Server", s->id)
|
||||
.detail("Team", team->getServerIDsStr());
|
||||
.detail("ServerTeam", team->getDesc());
|
||||
self->traceAllInfo(true);
|
||||
}
|
||||
}
|
||||
|
@ -2867,6 +2878,7 @@ ACTOR Future<Void> machineTeamRemover(DDTeamCollection* self) {
|
|||
}
|
||||
|
||||
TraceEvent("MachineTeamRemover", self->distributorId)
|
||||
.detail("MachineTeamIDToRemove", mt->id.shortString())
|
||||
.detail("MachineTeamToRemove", mt->getMachineIDsStr())
|
||||
.detail("NumProcessTeamsOnTheMachineTeam", minNumProcessTeams)
|
||||
.detail("CurrentMachineTeams", self->machineTeams.size())
|
||||
|
@ -2882,7 +2894,7 @@ ACTOR Future<Void> machineTeamRemover(DDTeamCollection* self) {
|
|||
} else {
|
||||
if (numMachineTeamRemoved > 0) {
|
||||
// Only trace the information when we remove a machine team
|
||||
TraceEvent("TeamRemoverDone")
|
||||
TraceEvent("MachineTeamRemoverDone", self->distributorId)
|
||||
.detail("HealthyMachines", healthyMachineCount)
|
||||
// .detail("CurrentHealthyMachineTeams", currentHealthyMTCount)
|
||||
.detail("CurrentMachineTeams", self->machineTeams.size())
|
||||
|
@ -2946,6 +2958,7 @@ ACTOR Future<Void> serverTeamRemover(DDTeamCollection* self) {
|
|||
|
||||
TraceEvent("ServerTeamRemover", self->distributorId)
|
||||
.detail("ServerTeamToRemove", st->getServerIDsStr())
|
||||
.detail("ServerTeamID", st->getTeamID())
|
||||
.detail("NumProcessTeamsOnTheServerTeam", maxNumProcessTeams)
|
||||
.detail("CurrentServerTeams", self->teams.size())
|
||||
.detail("DesiredServerTeams", desiredServerTeams);
|
||||
|
@ -2965,6 +2978,35 @@ ACTOR Future<Void> serverTeamRemover(DDTeamCollection* self) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> zeroServerLeftLogger_impl(DDTeamCollection* self, Reference<TCTeamInfo> team) {
|
||||
wait(delay(SERVER_KNOBS->DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY));
|
||||
state vector<KeyRange> shards = self->shardsAffectedByTeamFailure->getShardsFor(
|
||||
ShardsAffectedByTeamFailure::Team(team->getServerIDs(), self->primary));
|
||||
state std::vector<Future<StorageMetrics>> sizes;
|
||||
sizes.reserve(shards.size());
|
||||
|
||||
for (auto const& shard : shards) {
|
||||
sizes.emplace_back(brokenPromiseToNever(self->getShardMetrics.getReply(GetMetricsRequest(shard))));
|
||||
TraceEvent(SevWarnAlways, "DDShardLost", self->distributorId)
|
||||
.detail("ServerTeamID", team->getTeamID())
|
||||
.detail("ShardBegin", shard.begin)
|
||||
.detail("ShardEnd", shard.end);
|
||||
}
|
||||
|
||||
wait(waitForAll(sizes));
|
||||
|
||||
int64_t bytesLost = 0;
|
||||
for (auto const& size : sizes) {
|
||||
bytesLost += size.get().bytes;
|
||||
}
|
||||
|
||||
TraceEvent(SevWarnAlways, "DDZeroServerLeftInTeam", self->distributorId)
|
||||
.detail("Team", team->getDesc())
|
||||
.detail("TotalBytesLost", bytesLost);
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
// Track a team and issue RelocateShards when the level of degradation changes
|
||||
// A badTeam can be unhealthy or just a redundantTeam removed by machineTeamRemover() or serverTeamRemover()
|
||||
ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> team, bool badTeam, bool redundantTeam) {
|
||||
|
@ -2979,18 +3021,22 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
|
|||
state bool lastZeroHealthy = self->zeroHealthyTeams->get();
|
||||
state bool firstCheck = true;
|
||||
|
||||
state Future<Void> zeroServerLeftLogger;
|
||||
|
||||
if(logTeamEvents) {
|
||||
TraceEvent("TeamTrackerStarting", self->distributorId).detail("Reason", "Initial wait complete (sc)").detail("Team", team->getDesc());
|
||||
TraceEvent("ServerTeamTrackerStarting", self->distributorId)
|
||||
.detail("Reason", "Initial wait complete (sc)")
|
||||
.detail("ServerTeam", team->getDesc());
|
||||
}
|
||||
self->priority_teams[team->getPriority()]++;
|
||||
|
||||
try {
|
||||
loop {
|
||||
if(logTeamEvents) {
|
||||
TraceEvent("TeamHealthChangeDetected", self->distributorId)
|
||||
.detail("Team", team->getDesc())
|
||||
.detail("Primary", self->primary)
|
||||
.detail("IsReady", self->initialFailureReactionDelay.isReady());
|
||||
TraceEvent("ServerTeamHealthChangeDetected", self->distributorId)
|
||||
.detail("ServerTeam", team->getDesc())
|
||||
.detail("Primary", self->primary)
|
||||
.detail("IsReady", self->initialFailureReactionDelay.isReady());
|
||||
self->traceTeamCollectionInfo();
|
||||
}
|
||||
// Check if the number of degraded machines has changed
|
||||
|
@ -3053,10 +3099,13 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
|
|||
if (serversLeft != lastServersLeft || anyUndesired != lastAnyUndesired ||
|
||||
anyWrongConfiguration != lastWrongConfiguration || recheck) { // NOTE: do not check wrongSize
|
||||
if(logTeamEvents) {
|
||||
TraceEvent("TeamHealthChanged", self->distributorId)
|
||||
.detail("Team", team->getDesc()).detail("ServersLeft", serversLeft)
|
||||
.detail("LastServersLeft", lastServersLeft).detail("ContainsUndesiredServer", anyUndesired)
|
||||
.detail("HealthyTeamsCount", self->healthyTeamCount).detail("IsWrongConfiguration", anyWrongConfiguration);
|
||||
TraceEvent("ServerTeamHealthChanged", self->distributorId)
|
||||
.detail("ServerTeam", team->getDesc())
|
||||
.detail("ServersLeft", serversLeft)
|
||||
.detail("LastServersLeft", lastServersLeft)
|
||||
.detail("ContainsUndesiredServer", anyUndesired)
|
||||
.detail("HealthyTeamsCount", self->healthyTeamCount)
|
||||
.detail("IsWrongConfiguration", anyWrongConfiguration);
|
||||
}
|
||||
|
||||
team->setWrongConfiguration( anyWrongConfiguration );
|
||||
|
@ -3078,18 +3127,18 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
|
|||
self->zeroHealthyTeams->set(self->healthyTeamCount == 0);
|
||||
|
||||
if( self->healthyTeamCount == 0 ) {
|
||||
TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->distributorId)
|
||||
.detail("SignallingTeam", team->getDesc())
|
||||
.detail("Primary", self->primary);
|
||||
TraceEvent(SevWarn, "ZeroServerTeamsHealthySignalling", self->distributorId)
|
||||
.detail("SignallingTeam", team->getDesc())
|
||||
.detail("Primary", self->primary);
|
||||
}
|
||||
|
||||
if(logTeamEvents) {
|
||||
TraceEvent("TeamHealthDifference", self->distributorId)
|
||||
.detail("Team", team->getDesc())
|
||||
.detail("LastOptimal", lastOptimal)
|
||||
.detail("LastHealthy", lastHealthy)
|
||||
.detail("Optimal", optimal)
|
||||
.detail("OptimalTeamCount", self->optimalTeamCount);
|
||||
TraceEvent("ServerTeamHealthDifference", self->distributorId)
|
||||
.detail("ServerTeam", team->getDesc())
|
||||
.detail("LastOptimal", lastOptimal)
|
||||
.detail("LastHealthy", lastHealthy)
|
||||
.detail("Optimal", optimal)
|
||||
.detail("OptimalTeamCount", self->optimalTeamCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3126,12 +3175,24 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
|
|||
if(lastPriority != team->getPriority()) {
|
||||
self->priority_teams[lastPriority]--;
|
||||
self->priority_teams[team->getPriority()]++;
|
||||
if (lastPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT &&
|
||||
team->getPriority() < SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) {
|
||||
zeroServerLeftLogger = Void();
|
||||
}
|
||||
if (logTeamEvents) {
|
||||
int dataLoss = team->getPriority() == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT;
|
||||
Severity severity = dataLoss ? SevWarnAlways : SevInfo;
|
||||
TraceEvent(severity, "ServerTeamPriorityChange", self->distributorId)
|
||||
.detail("Priority", team->getPriority())
|
||||
.detail("Info", team->getDesc())
|
||||
.detail("ZeroHealthyServerTeams", self->zeroHealthyTeams->get());
|
||||
if (team->getPriority() == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) {
|
||||
// 0 servers left in this team, data might be lost.
|
||||
zeroServerLeftLogger = zeroServerLeftLogger_impl(self, team);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(logTeamEvents) {
|
||||
TraceEvent("TeamPriorityChange", self->distributorId).detail("Priority", team->getPriority())
|
||||
.detail("Info", team->getDesc()).detail("ZeroHealthyTeams", self->zeroHealthyTeams->get());
|
||||
}
|
||||
|
||||
lastZeroHealthy = self->zeroHealthyTeams->get(); //set this again in case it changed from this teams health changing
|
||||
if( self->initialFailureReactionDelay.isReady() && !self->zeroHealthyTeams->get() ) {
|
||||
|
@ -3185,17 +3246,19 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
|
|||
self->output.send(rs);
|
||||
if(deterministicRandom()->random01() < 0.01) {
|
||||
TraceEvent("SendRelocateToDDQx100", self->distributorId)
|
||||
.detail("Team", team->getDesc())
|
||||
.detail("KeyBegin", rs.keys.begin)
|
||||
.detail("KeyEnd", rs.keys.end)
|
||||
.detail("Priority", rs.priority)
|
||||
.detail("TeamFailedMachines", team->size() - serversLeft)
|
||||
.detail("TeamOKMachines", serversLeft);
|
||||
.detail("ServerTeam", team->getDesc())
|
||||
.detail("KeyBegin", rs.keys.begin)
|
||||
.detail("KeyEnd", rs.keys.end)
|
||||
.detail("Priority", rs.priority)
|
||||
.detail("ServerTeamFailedMachines", team->size() - serversLeft)
|
||||
.detail("ServerTeamOKMachines", serversLeft);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(logTeamEvents) {
|
||||
TraceEvent("TeamHealthNotReady", self->distributorId).detail("HealthyTeamCount", self->healthyTeamCount);
|
||||
TraceEvent("ServerTeamHealthNotReady", self->distributorId)
|
||||
.detail("HealthyServerTeamCount", self->healthyTeamCount)
|
||||
.detail("ServerTeamID", team->getTeamID());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3206,7 +3269,9 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
|
|||
}
|
||||
} catch(Error& e) {
|
||||
if(logTeamEvents) {
|
||||
TraceEvent("TeamTrackerStopping", self->distributorId).detail("Team", team->getDesc()).detail("Priority", team->getPriority());
|
||||
TraceEvent("ServerTeamTrackerStopping", self->distributorId)
|
||||
.detail("ServerTeam", team->getDesc())
|
||||
.detail("Priority", team->getPriority());
|
||||
}
|
||||
self->priority_teams[team->getPriority()]--;
|
||||
if (team->isHealthy()) {
|
||||
|
@ -3214,7 +3279,8 @@ ACTOR Future<Void> teamTracker(DDTeamCollection* self, Reference<TCTeamInfo> tea
|
|||
ASSERT( self->healthyTeamCount >= 0 );
|
||||
|
||||
if( self->healthyTeamCount == 0 ) {
|
||||
TraceEvent(SevWarn, "ZeroTeamsHealthySignalling", self->distributorId).detail("SignallingTeam", team->getDesc());
|
||||
TraceEvent(SevWarn, "ZeroServerTeamsHealthySignalling", self->distributorId)
|
||||
.detail("SignallingServerTeam", team->getDesc());
|
||||
self->zeroHealthyTeams->set(true);
|
||||
}
|
||||
}
|
||||
|
@ -4352,7 +4418,9 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
|
|||
state DatabaseConfiguration configuration;
|
||||
state Reference<InitialDataDistribution> initData;
|
||||
state MoveKeysLock lock;
|
||||
state bool trackerCancelled;
|
||||
loop {
|
||||
trackerCancelled = false;
|
||||
try {
|
||||
loop {
|
||||
TraceEvent("DDInitTakingMoveKeysLock", self->ddId);
|
||||
|
@ -4513,18 +4581,24 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
|
|||
}
|
||||
|
||||
actors.push_back( pollMoveKeysLock(cx, lock) );
|
||||
actors.push_back(
|
||||
reportErrorsExcept(dataDistributionTracker(initData, cx, output, shardsAffectedByTeamFailure,
|
||||
getShardMetrics, getAverageShardBytes.getFuture(),
|
||||
readyToStart, anyZeroHealthyTeams, self->ddId, &shards),
|
||||
"DDTracker", self->ddId, &normalDDQueueErrors()));
|
||||
actors.push_back(reportErrorsExcept(
|
||||
dataDistributionTracker(initData, cx, output, shardsAffectedByTeamFailure, getShardMetrics,
|
||||
getAverageShardBytes.getFuture(), readyToStart, anyZeroHealthyTeams, self->ddId,
|
||||
&shards, &trackerCancelled),
|
||||
"DDTracker", self->ddId, &normalDDQueueErrors()));
|
||||
actors.push_back( reportErrorsExcept( dataDistributionQueue( cx, output, input.getFuture(), getShardMetrics, processingUnhealthy, tcis, shardsAffectedByTeamFailure, lock, getAverageShardBytes, self->ddId, storageTeamSize, configuration.storageTeamSize, &lastLimited ), "DDQueue", self->ddId, &normalDDQueueErrors() ) );
|
||||
|
||||
vector<DDTeamCollection*> teamCollectionsPtrs;
|
||||
Reference<DDTeamCollection> primaryTeamCollection( new DDTeamCollection(cx, self->ddId, lock, output, shardsAffectedByTeamFailure, configuration, primaryDcId, configuration.usableRegions > 1 ? remoteDcIds : std::vector<Optional<Key>>(), readyToStart.getFuture(), zeroHealthyTeams[0], true, processingUnhealthy) );
|
||||
Reference<DDTeamCollection> primaryTeamCollection(new DDTeamCollection(
|
||||
cx, self->ddId, lock, output, shardsAffectedByTeamFailure, configuration, primaryDcId,
|
||||
configuration.usableRegions > 1 ? remoteDcIds : std::vector<Optional<Key>>(), readyToStart.getFuture(),
|
||||
zeroHealthyTeams[0], true, processingUnhealthy, getShardMetrics));
|
||||
teamCollectionsPtrs.push_back(primaryTeamCollection.getPtr());
|
||||
if (configuration.usableRegions > 1) {
|
||||
Reference<DDTeamCollection> remoteTeamCollection( new DDTeamCollection(cx, self->ddId, lock, output, shardsAffectedByTeamFailure, configuration, remoteDcIds, Optional<std::vector<Optional<Key>>>(), readyToStart.getFuture() && remoteRecovered(self->dbInfo), zeroHealthyTeams[1], false, processingUnhealthy) );
|
||||
Reference<DDTeamCollection> remoteTeamCollection(new DDTeamCollection(
|
||||
cx, self->ddId, lock, output, shardsAffectedByTeamFailure, configuration, remoteDcIds,
|
||||
Optional<std::vector<Optional<Key>>>(), readyToStart.getFuture() && remoteRecovered(self->dbInfo),
|
||||
zeroHealthyTeams[1], false, processingUnhealthy, getShardMetrics));
|
||||
teamCollectionsPtrs.push_back(remoteTeamCollection.getPtr());
|
||||
remoteTeamCollection->teamCollections = teamCollectionsPtrs;
|
||||
actors.push_back( reportErrorsExcept( dataDistributionTeamCollection( remoteTeamCollection, initData, tcis[1], self->dbInfo ), "DDTeamCollectionSecondary", self->ddId, &normalDDQueueErrors() ) );
|
||||
|
@ -4540,6 +4614,7 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self)
|
|||
}
|
||||
catch( Error &e ) {
|
||||
state Error err = e;
|
||||
trackerCancelled = true;
|
||||
wait(shards.clearAsync());
|
||||
if (err.code() != error_code_movekeys_conflict) throw err;
|
||||
bool ddEnabled = wait( isDataDistributionEnabled(cx) );
|
||||
|
@ -4764,20 +4839,11 @@ DDTeamCollection* testTeamCollection(int teamSize, Reference<IReplicationPolicy>
|
|||
conf.storageTeamSize = teamSize;
|
||||
conf.storagePolicy = policy;
|
||||
|
||||
DDTeamCollection* collection = new DDTeamCollection(
|
||||
database,
|
||||
UID(0, 0),
|
||||
MoveKeysLock(),
|
||||
PromiseStream<RelocateShard>(),
|
||||
Reference<ShardsAffectedByTeamFailure>(new ShardsAffectedByTeamFailure()),
|
||||
conf,
|
||||
{},
|
||||
{},
|
||||
Future<Void>(Void()),
|
||||
Reference<AsyncVar<bool>>( new AsyncVar<bool>(true) ),
|
||||
true,
|
||||
Reference<AsyncVar<bool>>( new AsyncVar<bool>(false) )
|
||||
);
|
||||
DDTeamCollection* collection =
|
||||
new DDTeamCollection(database, UID(0, 0), MoveKeysLock(), PromiseStream<RelocateShard>(),
|
||||
Reference<ShardsAffectedByTeamFailure>(new ShardsAffectedByTeamFailure()), conf, {}, {},
|
||||
Future<Void>(Void()), Reference<AsyncVar<bool>>(new AsyncVar<bool>(true)), true,
|
||||
Reference<AsyncVar<bool>>(new AsyncVar<bool>(false)), PromiseStream<GetMetricsRequest>());
|
||||
|
||||
for (int id = 1; id <= processCount; ++id) {
|
||||
UID uid(id, 0);
|
||||
|
@ -4805,9 +4871,8 @@ DDTeamCollection* testMachineTeamCollection(int teamSize, Reference<IReplication
|
|||
DDTeamCollection* collection =
|
||||
new DDTeamCollection(database, UID(0, 0), MoveKeysLock(), PromiseStream<RelocateShard>(),
|
||||
Reference<ShardsAffectedByTeamFailure>(new ShardsAffectedByTeamFailure()), conf, {}, {},
|
||||
Future<Void>(Void()),
|
||||
Reference<AsyncVar<bool>>(new AsyncVar<bool>(true)), true,
|
||||
Reference<AsyncVar<bool>>(new AsyncVar<bool>(false)));
|
||||
Future<Void>(Void()), Reference<AsyncVar<bool>>(new AsyncVar<bool>(true)), true,
|
||||
Reference<AsyncVar<bool>>(new AsyncVar<bool>(false)), PromiseStream<GetMetricsRequest>());
|
||||
|
||||
for (int id = 1; id <= processCount; id++) {
|
||||
UID uid(id, 0);
|
||||
|
|
|
@ -59,10 +59,12 @@ struct IDataDistributionTeam {
|
|||
virtual bool isWrongConfiguration() = 0;
|
||||
virtual void setWrongConfiguration(bool) = 0;
|
||||
virtual void addServers(const vector<UID> &servers) = 0;
|
||||
virtual std::string getTeamID() = 0;
|
||||
|
||||
std::string getDesc() {
|
||||
const auto& servers = getLastKnownServerInterfaces();
|
||||
std::string s = format("Size %d; ", servers.size());
|
||||
std::string s = format("TeamID:%s", getTeamID().c_str());
|
||||
s += format("Size %d; ", servers.size());
|
||||
for(int i=0; i<servers.size(); i++) {
|
||||
if (i) s += ", ";
|
||||
s += servers[i].address().toString() + " " + servers[i].id().shortString();
|
||||
|
@ -186,7 +188,7 @@ struct InitialDataDistribution : ReferenceCounted<InitialDataDistribution> {
|
|||
struct ShardMetrics {
|
||||
StorageMetrics metrics;
|
||||
double lastLowBandwidthStartTime;
|
||||
int shardCount;
|
||||
int shardCount; // number of smaller shards whose metrics are aggregated in the ShardMetrics
|
||||
|
||||
bool operator==(ShardMetrics const& rhs) const {
|
||||
return metrics == rhs.metrics && lastLowBandwidthStartTime == rhs.lastLowBandwidthStartTime &&
|
||||
|
@ -209,7 +211,8 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
|
|||
PromiseStream<GetMetricsRequest> getShardMetrics,
|
||||
FutureStream<Promise<int64_t>> getAverageShardBytes,
|
||||
Promise<Void> readyToStart, Reference<AsyncVar<bool>> zeroHealthyTeams,
|
||||
UID distributorId, KeyRangeMap<ShardTrackedData>* shards);
|
||||
UID distributorId, KeyRangeMap<ShardTrackedData>* shards,
|
||||
bool const* trackerCancelled);
|
||||
|
||||
ACTOR Future<Void> dataDistributionQueue(
|
||||
Database cx, PromiseStream<RelocateShard> output, FutureStream<RelocateShard> input,
|
||||
|
|
|
@ -18,8 +18,9 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <numeric>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/Util.h"
|
||||
|
@ -83,10 +84,10 @@ struct RelocateData {
|
|||
|
||||
class ParallelTCInfo : public ReferenceCounted<ParallelTCInfo>, public IDataDistributionTeam {
|
||||
public:
|
||||
vector<Reference<IDataDistributionTeam>> teams;
|
||||
vector<UID> tempServerIDs;
|
||||
std::vector<Reference<IDataDistributionTeam>> teams;
|
||||
std::vector<UID> tempServerIDs;
|
||||
|
||||
ParallelTCInfo() { }
|
||||
ParallelTCInfo() {}
|
||||
|
||||
void addTeam(Reference<IDataDistributionTeam> team) {
|
||||
teams.push_back(team);
|
||||
|
@ -105,11 +106,11 @@ public:
|
|||
}
|
||||
|
||||
template<class T>
|
||||
vector<T> collect(std::function < vector<T>(Reference<IDataDistributionTeam>)> func) {
|
||||
vector<T> result;
|
||||
std::vector<T> collect(std::function<std::vector<T>(Reference<IDataDistributionTeam>)> func) {
|
||||
std::vector<T> result;
|
||||
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
vector<T> newItems = func(*it);
|
||||
std::vector<T> newItems = func(*it);
|
||||
result.insert(result.end(), newItems.begin(), newItems.end());
|
||||
}
|
||||
return result;
|
||||
|
@ -130,7 +131,7 @@ public:
|
|||
});
|
||||
}
|
||||
|
||||
virtual vector<StorageServerInterface> getLastKnownServerInterfaces() {
|
||||
virtual std::vector<StorageServerInterface> getLastKnownServerInterfaces() {
|
||||
return collect<StorageServerInterface>([](Reference<IDataDistributionTeam> team) {
|
||||
return team->getLastKnownServerInterfaces();
|
||||
});
|
||||
|
@ -144,10 +145,10 @@ public:
|
|||
return totalSize;
|
||||
}
|
||||
|
||||
virtual vector<UID> const& getServerIDs() {
|
||||
virtual std::vector<UID> const& getServerIDs() {
|
||||
tempServerIDs.clear();
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
vector<UID> const& childIDs = (*it)->getServerIDs();
|
||||
std::vector<UID> const& childIDs = (*it)->getServerIDs();
|
||||
tempServerIDs.insert(tempServerIDs.end(), childIDs.begin(), childIDs.end());
|
||||
}
|
||||
return tempServerIDs;
|
||||
|
@ -194,7 +195,7 @@ public:
|
|||
}
|
||||
|
||||
virtual Future<Void> updateStorageMetrics() {
|
||||
vector<Future<Void>> futures;
|
||||
std::vector<Future<Void>> futures;
|
||||
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
futures.push_back((*it)->updateStorageMetrics());
|
||||
|
@ -250,10 +251,19 @@ public:
|
|||
ASSERT(!teams.empty());
|
||||
teams[0]->addServers(servers);
|
||||
}
|
||||
|
||||
std::string getTeamID() override {
|
||||
std::string id;
|
||||
for (int i = 0; i < teams.size(); i++) {
|
||||
auto const& team = teams[i];
|
||||
id += (i == teams.size() - 1) ? team->getTeamID() : format("%s, ", team->getTeamID().c_str());
|
||||
}
|
||||
return id;
|
||||
}
|
||||
};
|
||||
|
||||
struct Busyness {
|
||||
vector<int> ledger;
|
||||
std::vector<int> ledger;
|
||||
|
||||
Busyness() : ledger( 10, 0 ) {}
|
||||
|
||||
|
@ -553,7 +563,7 @@ struct DDQueueData {
|
|||
|
||||
if(keyServersEntries.size() < SERVER_KNOBS->DD_QUEUE_MAX_KEY_SERVERS) {
|
||||
for( int shard = 0; shard < keyServersEntries.size(); shard++ ) {
|
||||
vector<UID> src, dest;
|
||||
std::vector<UID> src, dest;
|
||||
decodeKeyServersValue( keyServersEntries[shard].value, src, dest );
|
||||
ASSERT( src.size() );
|
||||
for( int i = 0; i < src.size(); i++ ) {
|
||||
|
@ -852,7 +862,7 @@ struct DDQueueData {
|
|||
startedHere++;
|
||||
|
||||
// update both inFlightActors and inFlight key range maps, cancelling deleted RelocateShards
|
||||
vector<KeyRange> ranges;
|
||||
std::vector<KeyRange> ranges;
|
||||
inFlightActors.getRangesAffectedByInsertion( rd.keys, ranges );
|
||||
inFlightActors.cancel( KeyRangeRef( ranges.front().begin, ranges.back().end ) );
|
||||
inFlight.insert( rd.keys, rd );
|
||||
|
@ -1036,6 +1046,9 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
|
|||
} else {
|
||||
TraceEvent(relocateShardInterval.severity, "RelocateShardHasDestination", distributorId)
|
||||
.detail("PairId", relocateShardInterval.pairID)
|
||||
.detail("KeyBegin", rd.keys.begin)
|
||||
.detail("KeyEnd", rd.keys.end)
|
||||
.detail("SourceServers", describe(rd.src))
|
||||
.detail("DestinationTeam", describe(destIds))
|
||||
.detail("ExtraIds", describe(extraIds));
|
||||
}
|
||||
|
@ -1421,7 +1434,7 @@ ACTOR Future<Void> dataDistributionQueue(
|
|||
state RelocateData launchData;
|
||||
state Future<Void> recordMetrics = delay(SERVER_KNOBS->DD_QUEUE_LOGGING_INTERVAL);
|
||||
|
||||
state vector<Future<Void>> balancingFutures;
|
||||
state std::vector<Future<Void>> balancingFutures;
|
||||
|
||||
state ActorCollectionNoErrors actors;
|
||||
state PromiseStream<KeyRange> rangesComplete;
|
||||
|
|
|
@ -76,14 +76,43 @@ struct DataDistributionTracker {
|
|||
Promise<Void> readyToStart;
|
||||
Reference<AsyncVar<bool>> anyZeroHealthyTeams;
|
||||
|
||||
// The reference to trackerCancelled must be extracted by actors,
|
||||
// because by the time (trackerCancelled == true) this memory cannot
|
||||
// be accessed
|
||||
bool const& trackerCancelled;
|
||||
|
||||
// This class extracts the trackerCancelled reference from a DataDistributionTracker object
|
||||
// Because some actors spawned by the dataDistributionTracker outlive the DataDistributionTracker
|
||||
// object, we must guard against memory errors by using a GetTracker functor to access
|
||||
// the DataDistributionTracker object.
|
||||
class SafeAccessor {
|
||||
bool const& trackerCancelled;
|
||||
DataDistributionTracker& tracker;
|
||||
|
||||
public:
|
||||
SafeAccessor(DataDistributionTracker* tracker)
|
||||
: trackerCancelled(tracker->trackerCancelled), tracker(*tracker) {
|
||||
ASSERT(!trackerCancelled);
|
||||
}
|
||||
|
||||
DataDistributionTracker* operator()() {
|
||||
if (trackerCancelled) {
|
||||
TEST(true); // Trying to access DataDistributionTracker after tracker has been cancelled
|
||||
throw dd_tracker_cancelled();
|
||||
}
|
||||
return &tracker;
|
||||
}
|
||||
};
|
||||
|
||||
DataDistributionTracker(Database cx, UID distributorId, Promise<Void> const& readyToStart,
|
||||
PromiseStream<RelocateShard> const& output,
|
||||
Reference<ShardsAffectedByTeamFailure> shardsAffectedByTeamFailure,
|
||||
Reference<AsyncVar<bool>> anyZeroHealthyTeams, KeyRangeMap<ShardTrackedData>& shards)
|
||||
Reference<AsyncVar<bool>> anyZeroHealthyTeams, KeyRangeMap<ShardTrackedData>& shards,
|
||||
bool const& trackerCancelled)
|
||||
: cx(cx), distributorId(distributorId), dbSizeEstimate(new AsyncVar<int64_t>()), systemSizeEstimate(0),
|
||||
maxShardSize(new AsyncVar<Optional<int64_t>>()), sizeChanges(false), readyToStart(readyToStart), output(output),
|
||||
shardsAffectedByTeamFailure(shardsAffectedByTeamFailure), anyZeroHealthyTeams(anyZeroHealthyTeams),
|
||||
shards(shards) {}
|
||||
shards(shards), trackerCancelled(trackerCancelled) {}
|
||||
|
||||
~DataDistributionTracker()
|
||||
{
|
||||
|
@ -134,11 +163,8 @@ int64_t getMaxShardSize( double dbSizeEstimate ) {
|
|||
(int64_t)SERVER_KNOBS->MAX_SHARD_BYTES);
|
||||
}
|
||||
|
||||
ACTOR Future<Void> trackShardBytes(
|
||||
DataDistributionTracker* self,
|
||||
KeyRange keys,
|
||||
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize)
|
||||
{
|
||||
ACTOR Future<Void> trackShardBytes(DataDistributionTracker::SafeAccessor self, KeyRange keys,
|
||||
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize) {
|
||||
state BandwidthStatus bandwidthStatus = shardSize->get().present() ? getBandwidthStatus( shardSize->get().get().metrics ) : BandwidthStatusNormal;
|
||||
state double lastLowBandwidthStartTime = shardSize->get().present() ? shardSize->get().get().lastLowBandwidthStartTime : now();
|
||||
state int shardCount = shardSize->get().present() ? shardSize->get().get().shardCount : 1;
|
||||
|
@ -188,7 +214,8 @@ ACTOR Future<Void> trackShardBytes(
|
|||
bounds.permittedError.iosPerKSecond = bounds.permittedError.infinity;
|
||||
|
||||
loop {
|
||||
Transaction tr(self->cx);
|
||||
Transaction tr(self()->cx);
|
||||
// metrics.second is the number of key-ranges (i.e., shards) in the 'keys' key-range
|
||||
std::pair<Optional<StorageMetrics>, int> metrics = wait( tr.waitStorageMetrics( keys, bounds.min, bounds.max, bounds.permittedError, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT, shardCount ) );
|
||||
if(metrics.first.present()) {
|
||||
BandwidthStatus newBandwidthStatus = getBandwidthStatus( metrics.first.get() );
|
||||
|
@ -211,9 +238,11 @@ ACTOR Future<Void> trackShardBytes(
|
|||
.detail("TrackerID", trackerID);*/
|
||||
|
||||
if( shardSize->get().present() ) {
|
||||
self->dbSizeEstimate->set( self->dbSizeEstimate->get() + metrics.first.get().bytes - shardSize->get().get().metrics.bytes );
|
||||
self()->dbSizeEstimate->set(self()->dbSizeEstimate->get() + metrics.first.get().bytes -
|
||||
shardSize->get().get().metrics.bytes);
|
||||
if(keys.begin >= systemKeys.begin) {
|
||||
self->systemSizeEstimate += metrics.first.get().bytes - shardSize->get().get().metrics.bytes;
|
||||
self()->systemSizeEstimate +=
|
||||
metrics.first.get().bytes - shardSize->get().get().metrics.bytes;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -230,8 +259,9 @@ ACTOR Future<Void> trackShardBytes(
|
|||
}
|
||||
}
|
||||
} catch( Error &e ) {
|
||||
if (e.code() != error_code_actor_cancelled)
|
||||
self->output.sendError(e); // Propagate failure to dataDistributionTracker
|
||||
if (e.code() != error_code_actor_cancelled && e.code() != error_code_dd_tracker_cancelled) {
|
||||
self()->output.sendError(e); // Propagate failure to dataDistributionTracker
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
@ -486,6 +516,8 @@ Future<Void> shardMerger(
|
|||
shardsMerged++;
|
||||
|
||||
auto shardBounds = getShardSizeBounds( merged, maxShardSize );
|
||||
// If we just recently get the current shard's metrics (i.e., less than DD_LOW_BANDWIDTH_DELAY ago), it means
|
||||
// the shard's metric may not be stable yet. So we cannot continue merging in this direction.
|
||||
if( endingStats.bytes >= shardBounds.min.bytes ||
|
||||
getBandwidthStatus( endingStats ) != BandwidthStatusLow ||
|
||||
now() - lastLowBandwidthStartTime < SERVER_KNOBS->DD_LOW_BANDWIDTH_DELAY ||
|
||||
|
@ -516,13 +548,21 @@ Future<Void> shardMerger(
|
|||
//restarting shard tracker will derefenced values in the shard map, so make a copy
|
||||
KeyRange mergeRange = merged;
|
||||
|
||||
// OldKeys: Shards in the key range are merged as one shard defined by NewKeys;
|
||||
// NewKeys: New key range after shards are merged;
|
||||
// EndingSize: The new merged shard size in bytes;
|
||||
// BatchedMerges: The number of shards merged. Each shard is defined in self->shards;
|
||||
// LastLowBandwidthStartTime: When does a shard's bandwidth status becomes BandwidthStatusLow. If a shard's status
|
||||
// becomes BandwidthStatusLow less than DD_LOW_BANDWIDTH_DELAY ago, the merging logic will stop at the shard;
|
||||
// ShardCount: The number of non-splittable shards that are merged. Each shard is defined in self->shards may have
|
||||
// more than 1 shards.
|
||||
TraceEvent("RelocateShardMergeMetrics", self->distributorId)
|
||||
.detail("OldKeys", keys)
|
||||
.detail("NewKeys", mergeRange)
|
||||
.detail("EndingSize", endingStats.bytes)
|
||||
.detail("BatchedMerges", shardsMerged)
|
||||
.detail("LastLowBandwidthStartTime", lastLowBandwidthStartTime)
|
||||
.detail("ShardCount", shardCount);
|
||||
.detail("OldKeys", keys)
|
||||
.detail("NewKeys", mergeRange)
|
||||
.detail("EndingSize", endingStats.bytes)
|
||||
.detail("BatchedMerges", shardsMerged)
|
||||
.detail("LastLowBandwidthStartTime", lastLowBandwidthStartTime)
|
||||
.detail("ShardCount", shardCount);
|
||||
|
||||
if(mergeRange.begin < systemKeys.begin) {
|
||||
self->systemSizeEstimate -= systemBytes;
|
||||
|
@ -584,18 +624,14 @@ ACTOR Future<Void> shardEvaluator(
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> shardTracker(
|
||||
DataDistributionTracker* self,
|
||||
KeyRange keys,
|
||||
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize)
|
||||
{
|
||||
wait( yieldedFuture(self->readyToStart.getFuture()) );
|
||||
ACTOR Future<Void> shardTracker(DataDistributionTracker::SafeAccessor self, KeyRange keys,
|
||||
Reference<AsyncVar<Optional<ShardMetrics>>> shardSize) {
|
||||
wait(yieldedFuture(self()->readyToStart.getFuture()));
|
||||
|
||||
if( !shardSize->get().present() )
|
||||
wait( shardSize->onChange() );
|
||||
|
||||
if( !self->maxShardSize->get().present() )
|
||||
wait( yieldedFuture(self->maxShardSize->onChange()) );
|
||||
if (!self()->maxShardSize->get().present()) wait(yieldedFuture(self()->maxShardSize->onChange()));
|
||||
|
||||
// Since maxShardSize will become present for all shards at once, avoid slow tasks with a short delay
|
||||
wait( delay( 0, TaskPriority::DataDistribution ) );
|
||||
|
@ -603,26 +639,27 @@ ACTOR Future<Void> shardTracker(
|
|||
// Survives multiple calls to shardEvaluator and keeps merges from happening too quickly.
|
||||
state Reference<HasBeenTrueFor> wantsToMerge( new HasBeenTrueFor( shardSize->get() ) );
|
||||
|
||||
/*TraceEvent("ShardTracker", self->distributorId)
|
||||
.detail("Begin", keys.begin)
|
||||
.detail("End", keys.end)
|
||||
.detail("TrackerID", trackerID)
|
||||
.detail("MaxBytes", self->maxShardSize->get().get())
|
||||
.detail("ShardSize", shardSize->get().get().bytes)
|
||||
.detail("BytesPerKSec", shardSize->get().get().bytesPerKSecond);*/
|
||||
/*TraceEvent("ShardTracker", self()->distributorId)
|
||||
.detail("Begin", keys.begin)
|
||||
.detail("End", keys.end)
|
||||
.detail("TrackerID", trackerID)
|
||||
.detail("MaxBytes", self()->maxShardSize->get().get())
|
||||
.detail("ShardSize", shardSize->get().get().bytes)
|
||||
.detail("BytesPerKSec", shardSize->get().get().bytesPerKSecond);*/
|
||||
|
||||
try {
|
||||
loop {
|
||||
// Use the current known size to check for (and start) splits and merges.
|
||||
wait( shardEvaluator( self, keys, shardSize, wantsToMerge ) );
|
||||
wait(shardEvaluator(self(), keys, shardSize, wantsToMerge));
|
||||
|
||||
// We could have a lot of actors being released from the previous wait at the same time. Immediately calling
|
||||
// delay(0) mitigates the resulting SlowTask
|
||||
wait( delay(0, TaskPriority::DataDistribution) );
|
||||
}
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_actor_cancelled)
|
||||
self->output.sendError(e); // Propagate failure to dataDistributionTracker
|
||||
if (e.code() != error_code_actor_cancelled && e.code() != error_code_dd_tracker_cancelled) {
|
||||
self()->output.sendError(e); // Propagate failure to dataDistributionTracker
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
@ -653,8 +690,8 @@ void restartShardTrackers(DataDistributionTracker* self, KeyRangeRef keys, Optio
|
|||
|
||||
ShardTrackedData data;
|
||||
data.stats = shardSize;
|
||||
data.trackShard = shardTracker( self, ranges[i], shardSize );
|
||||
data.trackBytes = trackShardBytes( self, ranges[i], shardSize );
|
||||
data.trackShard = shardTracker(DataDistributionTracker::SafeAccessor(self), ranges[i], shardSize);
|
||||
data.trackBytes = trackShardBytes(DataDistributionTracker::SafeAccessor(self), ranges[i], shardSize);
|
||||
self->shards.insert( ranges[i], data );
|
||||
}
|
||||
}
|
||||
|
@ -728,9 +765,10 @@ ACTOR Future<Void> dataDistributionTracker(Reference<InitialDataDistribution> in
|
|||
PromiseStream<GetMetricsRequest> getShardMetrics,
|
||||
FutureStream<Promise<int64_t>> getAverageShardBytes,
|
||||
Promise<Void> readyToStart, Reference<AsyncVar<bool>> anyZeroHealthyTeams,
|
||||
UID distributorId, KeyRangeMap<ShardTrackedData>* shards) {
|
||||
UID distributorId, KeyRangeMap<ShardTrackedData>* shards,
|
||||
bool const* trackerCancelled) {
|
||||
state DataDistributionTracker self(cx, distributorId, readyToStart, output, shardsAffectedByTeamFailure,
|
||||
anyZeroHealthyTeams, *shards);
|
||||
anyZeroHealthyTeams, *shards, *trackerCancelled);
|
||||
state Future<Void> loggingTrigger = Void();
|
||||
try {
|
||||
wait( trackInitialShards( &self, initData ) );
|
||||
|
|
|
@ -221,6 +221,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( DD_ENABLE_VERBOSE_TRACING, false ); if( randomize && BUGGIFY ) DD_ENABLE_VERBOSE_TRACING = true;
|
||||
init( DD_TEAMS_INFO_PRINT_INTERVAL, 60 ); if( randomize && BUGGIFY ) DD_TEAMS_INFO_PRINT_INTERVAL = 10;
|
||||
init( DD_TEAMS_INFO_PRINT_YIELD_COUNT, 100 ); if( randomize && BUGGIFY ) DD_TEAMS_INFO_PRINT_YIELD_COUNT = deterministicRandom()->random01() * 1000 + 1;
|
||||
init( DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY, 120 ); if( randomize && BUGGIFY ) DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY = 5;
|
||||
|
||||
// TeamRemover
|
||||
init( TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER, false ); if( randomize && BUGGIFY ) TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER = deterministicRandom()->random01() < 0.1 ? true : false; // false by default. disable the consistency check when it's true
|
||||
|
@ -343,6 +344,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( MAX_PROXY_COMPUTE, 2.0 );
|
||||
init( PROXY_COMPUTE_BUCKETS, 20000 );
|
||||
init( PROXY_COMPUTE_GROWTH_RATE, 0.01 );
|
||||
init( PROXY_REJECT_BATCH_QUEUED_TOO_LONG, true );
|
||||
|
||||
init( RESET_MASTER_BATCHES, 200 );
|
||||
init( RESET_RESOLVER_BATCHES, 200 );
|
||||
|
@ -508,6 +510,9 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( BEHIND_CHECK_COUNT, 2 );
|
||||
init( BEHIND_CHECK_VERSIONS, 5 * VERSIONS_PER_SECOND );
|
||||
init( WAIT_METRICS_WRONG_SHARD_CHANCE, isSimulated ? 1.0 : 0.1 );
|
||||
init( REPORT_DD_METRICS, true );
|
||||
init( DD_METRICS_REPORT_INTERVAL, 30.0 );
|
||||
init( FETCH_KEYS_TOO_LONG_TIME_CRITERIA, 300.0 );
|
||||
|
||||
//Wait Failure
|
||||
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
||||
|
@ -536,6 +541,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
|||
init( MAX_STATUS_REQUESTS_PER_SECOND, 256.0 );
|
||||
init( CONFIGURATION_ROWS_TO_FETCH, 20000 );
|
||||
init( DISABLE_DUPLICATE_LOG_WARNING, false );
|
||||
init( HISTOGRAM_REPORT_INTERVAL, 300.0 );
|
||||
|
||||
// IPager
|
||||
init( PAGER_RESERVED_PAGES, 1 );
|
||||
|
|
|
@ -184,6 +184,7 @@ public:
|
|||
bool DD_ENABLE_VERBOSE_TRACING;
|
||||
int DD_TEAMS_INFO_PRINT_INTERVAL;
|
||||
int DD_TEAMS_INFO_PRINT_YIELD_COUNT;
|
||||
int DD_TEAM_ZERO_SERVER_LEFT_LOG_DELAY;
|
||||
|
||||
// TeamRemover to remove redundant teams
|
||||
bool TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER; // disable the machineTeamRemover actor
|
||||
|
@ -288,6 +289,7 @@ public:
|
|||
double MAX_PROXY_COMPUTE;
|
||||
int PROXY_COMPUTE_BUCKETS;
|
||||
double PROXY_COMPUTE_GROWTH_RATE;
|
||||
bool PROXY_REJECT_BATCH_QUEUED_TOO_LONG;
|
||||
|
||||
int RESET_MASTER_BATCHES;
|
||||
int RESET_RESOLVER_BATCHES;
|
||||
|
@ -451,6 +453,9 @@ public:
|
|||
int BEHIND_CHECK_COUNT;
|
||||
int64_t BEHIND_CHECK_VERSIONS;
|
||||
double WAIT_METRICS_WRONG_SHARD_CHANCE;
|
||||
bool REPORT_DD_METRICS;
|
||||
double DD_METRICS_REPORT_INTERVAL;
|
||||
double FETCH_KEYS_TOO_LONG_TIME_CRITERIA;
|
||||
|
||||
//Wait Failure
|
||||
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
|
||||
|
@ -479,6 +484,7 @@ public:
|
|||
double MAX_STATUS_REQUESTS_PER_SECOND;
|
||||
int CONFIGURATION_ROWS_TO_FETCH;
|
||||
bool DISABLE_DUPLICATE_LOG_WARNING;
|
||||
double HISTOGRAM_REPORT_INTERVAL;
|
||||
|
||||
// IPager
|
||||
int PAGER_RESERVED_PAGES;
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
#include "fdbserver/ApplyMetadataMutation.h"
|
||||
#include "fdbserver/RecoveryState.h"
|
||||
#include "fdbclient/Atomic.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/Histogram.h"
|
||||
#include "flow/TDMetric.actor.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
|
@ -75,20 +77,26 @@ struct LogRouterData {
|
|||
|
||||
UID dbgid;
|
||||
Reference<AsyncVar<Reference<ILogSystem>>> logSystem;
|
||||
NotifiedVersion version;
|
||||
NotifiedVersion minPopped;
|
||||
Optional<UID> primaryPeekLocation;
|
||||
NotifiedVersion version; // The largest version at which the log router has peeked mutations
|
||||
// from satellite tLog or primary tLogs.
|
||||
NotifiedVersion minPopped; // The minimum version among all tags that has been popped by remote tLogs.
|
||||
Version startVersion;
|
||||
Version minKnownCommittedVersion;
|
||||
Version minKnownCommittedVersion; // The minimum durable version among all LRs.
|
||||
// A LR's durable version is the maximum version of mutations that have been
|
||||
// popped by remote tLog.
|
||||
Version poppedVersion;
|
||||
Deque<std::pair<Version, Standalone<VectorRef<uint8_t>>>> messageBlocks;
|
||||
Tag routerTag;
|
||||
bool allowPops;
|
||||
LogSet logSet;
|
||||
bool foundEpochEnd;
|
||||
double waitForVersionTime = 0;
|
||||
double maxWaitForVersionTime = 0;
|
||||
double getMoreTime = 0;
|
||||
double maxGetMoreTime = 0;
|
||||
bool foundEpochEnd; // Cluster is not fully recovered yet. LR has to handle recovery
|
||||
double waitForVersionTime = 0; // The total amount of time LR waits for remote tLog to peek and pop its data.
|
||||
double maxWaitForVersionTime = 0; // The max one-instance wait time when LR must wait for remote tLog to pop data.
|
||||
double getMoreTime = 0; // The total amount of time LR waits for satellite tLog's data to become available.
|
||||
double maxGetMoreTime = 0; // The max wait time LR spent in a pull-data-request to satellite tLog.
|
||||
int64_t generation = -1;
|
||||
Reference<Histogram> peekLatencyDist;
|
||||
|
||||
struct PeekTrackerData {
|
||||
std::map<int, Promise<std::pair<Version, bool>>> sequence_version;
|
||||
|
@ -98,7 +106,9 @@ struct LogRouterData {
|
|||
std::map<UID, PeekTrackerData> peekTracker;
|
||||
|
||||
CounterCollection cc;
|
||||
Counter getMoreCount, getMoreBlockedCount;
|
||||
Counter getMoreCount; // Increase by 1 when LR tries to pull data from satellite tLog.
|
||||
Counter
|
||||
getMoreBlockedCount; // Increase by 1 if data is not available when LR tries to pull data from satellite tLog.
|
||||
Future<Void> logger;
|
||||
Reference<EventCacheHolder> eventCacheHolder;
|
||||
|
||||
|
@ -119,9 +129,14 @@ struct LogRouterData {
|
|||
return newTagData;
|
||||
}
|
||||
|
||||
LogRouterData(UID dbgid, const InitializeLogRouterRequest& req) : dbgid(dbgid), routerTag(req.routerTag), logSystem(new AsyncVar<Reference<ILogSystem>>()),
|
||||
version(req.startVersion-1), minPopped(0), startVersion(req.startVersion), allowPops(false), minKnownCommittedVersion(0), poppedVersion(0), foundEpochEnd(false),
|
||||
cc("LogRouter", dbgid.toString()), getMoreCount("GetMoreCount", cc), getMoreBlockedCount("GetMoreBlockedCount", cc) {
|
||||
LogRouterData(UID dbgid, const InitializeLogRouterRequest& req)
|
||||
: dbgid(dbgid), routerTag(req.routerTag), logSystem(new AsyncVar<Reference<ILogSystem>>()),
|
||||
version(req.startVersion - 1), minPopped(0), generation(req.recoveryCount), startVersion(req.startVersion),
|
||||
allowPops(false), minKnownCommittedVersion(0), poppedVersion(0), foundEpochEnd(false),
|
||||
cc("LogRouter", dbgid.toString()), getMoreCount("GetMoreCount", cc),
|
||||
getMoreBlockedCount("GetMoreBlockedCount", cc),
|
||||
peekLatencyDist(Histogram::getHistogram(LiteralStringRef("LogRouter"), LiteralStringRef("PeekTLogLatency"),
|
||||
Histogram::Unit::microseconds)) {
|
||||
//setup just enough of a logSet to be able to call getPushLocations
|
||||
logSet.logServers.resize(req.tLogLocalities.size());
|
||||
logSet.tLogPolicy = req.tLogPolicy;
|
||||
|
@ -138,8 +153,10 @@ struct LogRouterData {
|
|||
|
||||
eventCacheHolder = Reference<EventCacheHolder>( new EventCacheHolder(dbgid.shortString() + ".PeekLocation") );
|
||||
|
||||
specialCounter(cc, "Version", [this](){ return this->version.get(); });
|
||||
// FetchedVersions: How many version of mutations buffered at LR and have not been popped by remote tLogs
|
||||
specialCounter(cc, "Version", [this]() { return this->version.get(); });
|
||||
specialCounter(cc, "MinPopped", [this](){ return this->minPopped.get(); });
|
||||
// TODO: Add minPopped locality and minPoppedId, similar as tLog Metrics
|
||||
specialCounter(cc, "FetchedVersions", [this](){ return std::max<Version>(0, std::min<Version>(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS, this->version.get() - this->minPopped.get())); });
|
||||
specialCounter(cc, "MinKnownCommittedVersion", [this](){ return this->minKnownCommittedVersion; });
|
||||
specialCounter(cc, "PoppedVersion", [this](){ return this->poppedVersion; });
|
||||
|
@ -148,7 +165,12 @@ struct LogRouterData {
|
|||
specialCounter(cc, "WaitForVersionMaxMS", [this](){ double val = this->maxWaitForVersionTime; this->maxWaitForVersionTime = 0; return 1000*val; });
|
||||
specialCounter(cc, "GetMoreMS", [this](){ double val = this->getMoreTime; this->getMoreTime = 0; return 1000*val; });
|
||||
specialCounter(cc, "GetMoreMaxMS", [this](){ double val = this->maxGetMoreTime; this->maxGetMoreTime = 0; return 1000*val; });
|
||||
logger = traceCounters("LogRouterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "LogRouterMetrics");
|
||||
specialCounter(cc, "Generation", [this]() { return this->generation; });
|
||||
logger = traceCounters("LogRouterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc,
|
||||
"LogRouterMetrics", [this](TraceEvent& te) {
|
||||
te.detail("PrimaryPeekLocation", this->primaryPeekLocation);
|
||||
te.detail("RouterTag", this->routerTag.toString());
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -207,8 +229,15 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
|
|||
// Since one set of log routers is created per generation of transaction logs, the gap caused by epoch end will be within MAX_VERSIONS_IN_FLIGHT of the log routers start version.
|
||||
state double startTime = now();
|
||||
if(self->version.get() < self->startVersion) {
|
||||
// Log router needs to wait for remote tLogs to process data, whose version is less than self->startVersion,
|
||||
// before the log router can pull more data (i.e., data after self->startVersion) from satellite tLog;
|
||||
// This prevents LR from getting OOM due to it pulls too much data from satellite tLog at once;
|
||||
// Note: each commit writes data to both primary tLog and satellite tLog. Satellite tLog can be viewed as
|
||||
// a part of primary tLogs.
|
||||
if(ver > self->startVersion) {
|
||||
self->version.set(self->startVersion);
|
||||
// Wait for remote tLog to peek and pop from LR,
|
||||
// so that LR's minPopped version can increase to self->startVersion
|
||||
wait(self->minPopped.whenAtLeast(self->version.get()));
|
||||
}
|
||||
self->waitForVersionTime += now() - startTime;
|
||||
|
@ -216,6 +245,9 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
|
|||
return Void();
|
||||
}
|
||||
if(!self->foundEpochEnd) {
|
||||
// Similar to proxy that does not keep more than MAX_READ_TRANSACTION_LIFE_VERSIONS transactions oustanding;
|
||||
// Log router does not keep more than MAX_READ_TRANSACTION_LIFE_VERSIONS transactions outstanding because
|
||||
// remote SS cannot roll back to more than MAX_READ_TRANSACTION_LIFE_VERSIONS ago.
|
||||
wait(self->minPopped.whenAtLeast(std::min(self->version.get(), ver - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS)));
|
||||
} else {
|
||||
while(self->minPopped.get() + SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS < ver) {
|
||||
|
@ -235,6 +267,7 @@ ACTOR Future<Void> waitForVersion( LogRouterData *self, Version ver ) {
|
|||
return Void();
|
||||
}
|
||||
|
||||
// Log router pull data from satellite tLog
|
||||
ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
|
||||
state Future<Void> dbInfoChange = Void();
|
||||
state Reference<ILogSystem::IPeekCursor> r;
|
||||
|
@ -256,14 +289,17 @@ ACTOR Future<Void> pullAsyncData( LogRouterData *self ) {
|
|||
state double startTime = now();
|
||||
choose {
|
||||
when(wait( getMoreF ) ) {
|
||||
self->getMoreTime += now() - startTime;
|
||||
self->maxGetMoreTime = std::max(self->maxGetMoreTime, now() - startTime);
|
||||
double peekTime = now() - startTime;
|
||||
self->peekLatencyDist->sampleSeconds(peekTime);
|
||||
self->getMoreTime += peekTime;
|
||||
self->maxGetMoreTime = std::max(self->maxGetMoreTime, peekTime);
|
||||
break;
|
||||
}
|
||||
when( wait( dbInfoChange ) ) { //FIXME: does this actually happen?
|
||||
if(r) tagPopped = std::max(tagPopped, r->popped());
|
||||
if( self->logSystem->get() ) {
|
||||
r = self->logSystem->get()->peekLogRouter( self->dbgid, tagAt, self->routerTag );
|
||||
self->primaryPeekLocation = r->getPrimaryPeekLocation();
|
||||
TraceEvent("LogRouterPeekLocation", self->dbgid).detail("LogID", r->getPrimaryPeekLocation()).trackLatest(self->eventCacheHolder->trackingKey);
|
||||
} else {
|
||||
r = Reference<ILogSystem::IPeekCursor>();
|
||||
|
@ -564,6 +600,7 @@ ACTOR Future<Void> logRouterCore(
|
|||
addActor.send( logRouterPeekMessages( &logRouterData, req ) );
|
||||
}
|
||||
when( TLogPopRequest req = waitNext( interf.popMessages.getFuture() ) ) {
|
||||
// Request from remote tLog to pop data from LR
|
||||
addActor.send( logRouterPop( &logRouterData, req ) );
|
||||
}
|
||||
when (wait(error)) {}
|
||||
|
|
|
@ -138,8 +138,20 @@ ACTOR Future<Void> resetChecker( ILogSystem::ServerPeekCursor* self, NetworkAddr
|
|||
self->unknownReplies = 0;
|
||||
self->fastReplies = 0;
|
||||
wait(delay(SERVER_KNOBS->PEEK_STATS_INTERVAL));
|
||||
TraceEvent("SlowPeekStats").detail("PeerAddress", addr).detail("SlowReplies", self->slowReplies).detail("FastReplies", self->fastReplies).detail("UnknownReplies", self->unknownReplies);
|
||||
if(self->slowReplies >= SERVER_KNOBS->PEEK_STATS_SLOW_AMOUNT && self->slowReplies/double(self->slowReplies+self->fastReplies) >= SERVER_KNOBS->PEEK_STATS_SLOW_RATIO) {
|
||||
TraceEvent("SlowPeekStats", self->randomID)
|
||||
.detail("PeerAddress", addr)
|
||||
.detail("SlowReplies", self->slowReplies)
|
||||
.detail("FastReplies", self->fastReplies)
|
||||
.detail("UnknownReplies", self->unknownReplies);
|
||||
|
||||
if (self->slowReplies >= SERVER_KNOBS->PEEK_STATS_SLOW_AMOUNT &&
|
||||
self->slowReplies / double(self->slowReplies + self->fastReplies) >= SERVER_KNOBS->PEEK_STATS_SLOW_RATIO) {
|
||||
|
||||
TraceEvent("ConnectionResetSlowPeek", self->randomID)
|
||||
.detail("PeerAddress", addr)
|
||||
.detail("SlowReplies", self->slowReplies)
|
||||
.detail("FastReplies", self->fastReplies)
|
||||
.detail("UnknownReplies", self->unknownReplies);
|
||||
FlowTransport::transport().resetConnection(addr);
|
||||
self->lastReset = now();
|
||||
}
|
||||
|
|
|
@ -63,6 +63,14 @@ struct ProxyStats {
|
|||
Counter conflictRanges;
|
||||
Counter keyServerLocationIn, keyServerLocationOut, keyServerLocationErrors;
|
||||
Version lastCommitVersionAssigned;
|
||||
double transactionRateAllowed, batchTransactionRateAllowed;
|
||||
double transactionLimit, batchTransactionLimit;
|
||||
// how much of the GRV requests queue was processed in one attempt to hand out read version.
|
||||
double percentageOfDefaultGRVQueueProcessed;
|
||||
double percentageOfBatchGRVQueueProcessed;
|
||||
|
||||
LatencySample defaultTxnGRVTimeInQueue;
|
||||
LatencySample batchTxnGRVTimeInQueue;
|
||||
|
||||
LatencySample commitLatencySample;
|
||||
LatencySample grvLatencySample;
|
||||
|
@ -72,24 +80,56 @@ struct ProxyStats {
|
|||
|
||||
Future<Void> logger;
|
||||
|
||||
explicit ProxyStats(UID id, Version* pVersion, NotifiedVersion* pCommittedVersion, int64_t *commitBatchesMemBytesCountPtr)
|
||||
: cc("ProxyStats", id.toString()), txnRequestIn("TxnRequestIn", cc), txnRequestOut("TxnRequestOut", cc), txnRequestErrors("TxnRequestErrors", cc),
|
||||
txnStartIn("TxnStartIn", cc), txnStartOut("TxnStartOut", cc), txnStartBatch("TxnStartBatch", cc), txnSystemPriorityStartIn("TxnSystemPriorityStartIn", cc), txnSystemPriorityStartOut("TxnSystemPriorityStartOut", cc), txnBatchPriorityStartIn("TxnBatchPriorityStartIn", cc), txnBatchPriorityStartOut("TxnBatchPriorityStartOut", cc),
|
||||
txnDefaultPriorityStartIn("TxnDefaultPriorityStartIn", cc), txnDefaultPriorityStartOut("TxnDefaultPriorityStartOut", cc), txnCommitIn("TxnCommitIn", cc), txnCommitVersionAssigned("TxnCommitVersionAssigned", cc), txnCommitResolving("TxnCommitResolving", cc), txnCommitResolved("TxnCommitResolved", cc), txnCommitOut("TxnCommitOut", cc),
|
||||
txnCommitOutSuccess("TxnCommitOutSuccess", cc), txnCommitErrors("TxnCommitErrors", cc), txnConflicts("TxnConflicts", cc), commitBatchIn("CommitBatchIn", cc), commitBatchOut("CommitBatchOut", cc), mutationBytes("MutationBytes", cc), mutations("Mutations", cc), conflictRanges("ConflictRanges", cc), keyServerLocationIn("KeyServerLocationIn", cc), keyServerLocationOut("KeyServerLocationOut", cc), keyServerLocationErrors("KeyServerLocationErrors", cc),
|
||||
lastCommitVersionAssigned(0), commitLatencySample("CommitLatencyMetrics", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, SERVER_KNOBS->LATENCY_SAMPLE_SIZE), grvLatencySample("GRVLatencyMetrics", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||
commitLatencyBands("CommitLatencyBands", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY), grvLatencyBands("GRVLatencyBands", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY)
|
||||
{
|
||||
explicit ProxyStats(UID id, Version* pVersion, NotifiedVersion* pCommittedVersion,
|
||||
int64_t* commitBatchesMemBytesCountPtr)
|
||||
: cc("ProxyStats", id.toString()), txnRequestIn("TxnRequestIn", cc), txnRequestOut("TxnRequestOut", cc),
|
||||
txnRequestErrors("TxnRequestErrors", cc), txnStartIn("TxnStartIn", cc), txnStartOut("TxnStartOut", cc),
|
||||
txnStartBatch("TxnStartBatch", cc), txnSystemPriorityStartIn("TxnSystemPriorityStartIn", cc),
|
||||
txnSystemPriorityStartOut("TxnSystemPriorityStartOut", cc),
|
||||
txnBatchPriorityStartIn("TxnBatchPriorityStartIn", cc),
|
||||
txnBatchPriorityStartOut("TxnBatchPriorityStartOut", cc),
|
||||
txnDefaultPriorityStartIn("TxnDefaultPriorityStartIn", cc),
|
||||
txnDefaultPriorityStartOut("TxnDefaultPriorityStartOut", cc), txnCommitIn("TxnCommitIn", cc),
|
||||
txnCommitVersionAssigned("TxnCommitVersionAssigned", cc), txnCommitResolving("TxnCommitResolving", cc),
|
||||
txnCommitResolved("TxnCommitResolved", cc), txnCommitOut("TxnCommitOut", cc),
|
||||
txnCommitOutSuccess("TxnCommitOutSuccess", cc), txnCommitErrors("TxnCommitErrors", cc),
|
||||
txnConflicts("TxnConflicts", cc), commitBatchIn("CommitBatchIn", cc), commitBatchOut("CommitBatchOut", cc),
|
||||
mutationBytes("MutationBytes", cc), mutations("Mutations", cc), conflictRanges("ConflictRanges", cc),
|
||||
keyServerLocationIn("KeyServerLocationIn", cc), keyServerLocationOut("KeyServerLocationOut", cc),
|
||||
keyServerLocationErrors("KeyServerLocationErrors", cc), lastCommitVersionAssigned(0),
|
||||
commitLatencySample("CommitLatencyMetrics", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
||||
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||
grvLatencySample("GRVLatencyMetrics", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
||||
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||
commitLatencyBands("CommitLatencyBands", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY),
|
||||
grvLatencyBands("GRVLatencyBands", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY),
|
||||
defaultTxnGRVTimeInQueue("DefaultTxnGRVTimeInQueue", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
||||
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||
batchTxnGRVTimeInQueue("BatchTxnGRVTimeInQueue", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
|
||||
SERVER_KNOBS->LATENCY_SAMPLE_SIZE),
|
||||
transactionRateAllowed(0), batchTransactionRateAllowed(0), transactionLimit(0), batchTransactionLimit(0),
|
||||
percentageOfDefaultGRVQueueProcessed(0), percentageOfBatchGRVQueueProcessed(0) {
|
||||
specialCounter(cc, "LastAssignedCommitVersion", [this](){return this->lastCommitVersionAssigned;});
|
||||
specialCounter(cc, "Version", [pVersion](){return *pVersion; });
|
||||
specialCounter(cc, "CommittedVersion", [pCommittedVersion](){ return pCommittedVersion->get(); });
|
||||
specialCounter(cc, "CommitBatchesMemBytesCount", [commitBatchesMemBytesCountPtr]() { return *commitBatchesMemBytesCountPtr; });
|
||||
// The rate at which the limit(budget) is allowed to grow.
|
||||
specialCounter(cc, "SystemAndDefaultTxnRateAllowed", [this]() { return this->transactionRateAllowed; });
|
||||
specialCounter(cc, "BatchTransactionRateAllowed", [this]() { return this->batchTransactionRateAllowed; });
|
||||
specialCounter(cc, "SystemAndDefaultTxnLimit", [this]() { return this->transactionLimit; });
|
||||
specialCounter(cc, "BatchTransactionLimit", [this]() { return this->batchTransactionLimit; });
|
||||
specialCounter(cc, "PercentageOfDefaultGRVQueueProcessed",
|
||||
[this]() { return this->percentageOfDefaultGRVQueueProcessed; });
|
||||
specialCounter(cc, "PercentageOfBatchGRVQueueProcessed",
|
||||
[this]() { return this->percentageOfBatchGRVQueueProcessed; });
|
||||
logger = traceCounters("ProxyMetrics", id, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, &cc, "ProxyMetrics");
|
||||
}
|
||||
};
|
||||
|
||||
ACTOR Future<Void> getRate(UID myID, Reference<AsyncVar<ServerDBInfo>> db, int64_t* inTransactionCount, int64_t* inBatchTransactionCount, double* outTransactionRate,
|
||||
double* outBatchTransactionRate, GetHealthMetricsReply* healthMetricsReply, GetHealthMetricsReply* detailedHealthMetricsReply) {
|
||||
ACTOR Future<Void> getRate(UID myID, Reference<AsyncVar<ServerDBInfo>> db, int64_t* inTransactionCount,
|
||||
int64_t* inBatchTransactionCount, double* outTransactionRate,
|
||||
double* outBatchTransactionRate, GetHealthMetricsReply* healthMetricsReply,
|
||||
GetHealthMetricsReply* detailedHealthMetricsReply, ProxyStats* stats) {
|
||||
state Future<Void> nextRequestTimer = Never();
|
||||
state Future<Void> leaseTimeout = Never();
|
||||
state Future<GetRateInfoReply> reply = Never();
|
||||
|
@ -120,7 +160,14 @@ ACTOR Future<Void> getRate(UID myID, Reference<AsyncVar<ServerDBInfo>> db, int64
|
|||
reply = Never();
|
||||
*outTransactionRate = rep.transactionRate;
|
||||
*outBatchTransactionRate = rep.batchTransactionRate;
|
||||
//TraceEvent("MasterProxyRate", myID).detail("Rate", rep.transactionRate).detail("BatchRate", rep.batchTransactionRate).detail("Lease", rep.leaseDuration).detail("ReleasedTransactions", *inTransactionCount - lastTC);
|
||||
stats->transactionRateAllowed = rep.transactionRate;
|
||||
stats->batchTransactionRateAllowed = rep.batchTransactionRate;
|
||||
// TraceEvent("MasterProxyTxRate", myID)
|
||||
// .detail("RKID", db->get().ratekeeper.get().id())
|
||||
// .detail("RateAllowed", rep.transactionRate)
|
||||
// .detail("BatchRateAllowed", rep.batchTransactionRate)
|
||||
// .detail("Lease", rep.leaseDuration)
|
||||
// .detail("ReleasedTransactions", *inTransactionCount - lastTC);
|
||||
lastTC = *inTransactionCount;
|
||||
leaseTimeout = delay(rep.leaseDuration);
|
||||
nextRequestTimer = delayJittered(rep.leaseDuration / 2);
|
||||
|
@ -520,6 +567,20 @@ ACTOR Future<Void> releaseResolvingAfter(ProxyCommitData* self, Future<Void> rel
|
|||
return Void();
|
||||
}
|
||||
|
||||
// Try to identify recovery transaction and backup's apply mutations (blind writes).
|
||||
// Both cannot be rejected and are approximated by looking at first mutation
|
||||
// starting with 0xff.
|
||||
bool canReject(const std::vector<CommitTransactionRequest>& trs) {
|
||||
for (const auto& tr : trs) {
|
||||
if (tr.transaction.mutations.empty()) continue;
|
||||
if (tr.transaction.mutations[0].param1.startsWith(LiteralStringRef("\xff")) ||
|
||||
tr.transaction.read_conflict_ranges.empty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> commitBatch(
|
||||
ProxyCommitData* self,
|
||||
vector<CommitTransactionRequest> trs,
|
||||
|
@ -565,9 +626,13 @@ ACTOR Future<Void> commitBatch(
|
|||
|
||||
if (debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.Before");
|
||||
state double timeStart = now();
|
||||
|
||||
if(localBatchNumber-self->latestLocalCommitBatchResolving.get()>SERVER_KNOBS->RESET_MASTER_BATCHES && now()-self->lastMasterReset>SERVER_KNOBS->RESET_MASTER_DELAY) {
|
||||
TraceEvent(SevWarnAlways, "ResetMasterNetwork").detail("CurrentBatch", localBatchNumber).detail("InProcessBatch", self->latestLocalCommitBatchResolving.get());
|
||||
TraceEvent(SevWarnAlways, "ConnectionResetMaster", self->dbgid)
|
||||
.detail("PeerAddress", self->master.address())
|
||||
.detail("CurrentBatch", localBatchNumber)
|
||||
.detail("InProcessBatch", self->latestLocalCommitBatchResolving.get());
|
||||
FlowTransport::transport().resetConnection(self->master.address());
|
||||
self->lastMasterReset=now();
|
||||
}
|
||||
|
@ -575,6 +640,32 @@ ACTOR Future<Void> commitBatch(
|
|||
/////// Phase 1: Pre-resolution processing (CPU bound except waiting for a version # which is separately pipelined and *should* be available by now (unless empty commit); ordered; currently atomic but could yield)
|
||||
TEST(self->latestLocalCommitBatchResolving.get() < localBatchNumber-1); // Queuing pre-resolution commit processing
|
||||
wait(self->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber-1));
|
||||
double queuingDelay = g_network->now() - timeStart;
|
||||
if ((queuingDelay > (double)SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS / SERVER_KNOBS->VERSIONS_PER_SECOND ||
|
||||
(g_network->isSimulated() && BUGGIFY_WITH_PROB(0.01))) &&
|
||||
SERVER_KNOBS->PROXY_REJECT_BATCH_QUEUED_TOO_LONG && canReject(trs)) {
|
||||
// Disabled for the recovery transaction. otherwise, recovery can't finish and keeps doing more recoveries.
|
||||
TEST(true); // Reject transactions in the batch
|
||||
TraceEvent(SevWarnAlways, "ProxyReject", self->dbgid)
|
||||
.suppressFor(0.1)
|
||||
.detail("QDelay", queuingDelay)
|
||||
.detail("Transactions", trs.size())
|
||||
.detail("BatchNumber", localBatchNumber);
|
||||
ASSERT(self->latestLocalCommitBatchResolving.get() == localBatchNumber - 1);
|
||||
self->latestLocalCommitBatchResolving.set(localBatchNumber);
|
||||
|
||||
wait(self->latestLocalCommitBatchLogging.whenAtLeast(localBatchNumber-1));
|
||||
ASSERT(self->latestLocalCommitBatchLogging.get() == localBatchNumber - 1);
|
||||
self->latestLocalCommitBatchLogging.set(localBatchNumber);
|
||||
for (const auto& tr : trs) {
|
||||
tr.reply.sendError(transaction_too_old());
|
||||
}
|
||||
++self->stats.commitBatchOut;
|
||||
self->stats.txnCommitOut += trs.size();
|
||||
self->stats.txnConflicts += trs.size();
|
||||
return Void();
|
||||
}
|
||||
|
||||
state Future<Void> releaseDelay = delay(std::min(SERVER_KNOBS->MAX_PROXY_COMPUTE, batchOperations*self->commitComputePerOperation[latencyBucket]), TaskPriority::ProxyMasterVersionReply);
|
||||
|
||||
if (debugID.present())
|
||||
|
@ -628,9 +719,14 @@ ACTOR Future<Void> commitBatch(
|
|||
state vector<vector<int>> transactionResolverMap = std::move( requests.transactionResolverMap );
|
||||
state Future<Void> releaseFuture = releaseResolvingAfter(self, releaseDelay, localBatchNumber);
|
||||
|
||||
if(localBatchNumber-self->latestLocalCommitBatchLogging.get()>SERVER_KNOBS->RESET_RESOLVER_BATCHES && now()-self->lastResolverReset>SERVER_KNOBS->RESET_RESOLVER_DELAY) {
|
||||
TraceEvent(SevWarnAlways, "ResetResolverNetwork").detail("CurrentBatch", localBatchNumber).detail("InProcessBatch", self->latestLocalCommitBatchLogging.get());
|
||||
if (localBatchNumber - self->latestLocalCommitBatchLogging.get() > SERVER_KNOBS->RESET_RESOLVER_BATCHES &&
|
||||
now() - self->lastResolverReset > SERVER_KNOBS->RESET_RESOLVER_DELAY) {
|
||||
|
||||
for (int r = 0; r<self->resolvers.size(); r++) {
|
||||
TraceEvent(SevWarnAlways, "ConnectionResetResolver", self->dbgid)
|
||||
.detail("PeerAddr", self->resolvers[r].address())
|
||||
.detail("CurrentBatch", localBatchNumber)
|
||||
.detail("InProcessBatch", self->latestLocalCommitBatchLogging.get());
|
||||
FlowTransport::transport().resetConnection(self->resolvers[r].address());
|
||||
}
|
||||
self->lastResolverReset=now();
|
||||
|
@ -1258,13 +1354,10 @@ ACTOR Future<Void> sendGrvReplies(Future<GetReadVersionReply> replyFuture, std::
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> transactionStarter(
|
||||
MasterProxyInterface proxy,
|
||||
Reference<AsyncVar<ServerDBInfo>> db,
|
||||
PromiseStream<Future<Void>> addActor,
|
||||
ProxyCommitData* commitData, GetHealthMetricsReply* healthMetricsReply,
|
||||
GetHealthMetricsReply* detailedHealthMetricsReply)
|
||||
{
|
||||
ACTOR static Future<Void> transactionStarter(MasterProxyInterface proxy, Reference<AsyncVar<ServerDBInfo>> db,
|
||||
PromiseStream<Future<Void>> addActor, ProxyCommitData* commitData,
|
||||
GetHealthMetricsReply* healthMetricsReply,
|
||||
GetHealthMetricsReply* detailedHealthMetricsReply, ProxyStats* stats) {
|
||||
state double lastGRVTime = 0;
|
||||
state PromiseStream<Void> GRVTimer;
|
||||
state double GRVBatchTime = SERVER_KNOBS->START_TRANSACTION_BATCH_INTERVAL_MIN;
|
||||
|
@ -1280,7 +1373,8 @@ ACTOR static Future<Void> transactionStarter(
|
|||
state vector<MasterProxyInterface> otherProxies;
|
||||
|
||||
state PromiseStream<double> replyTimes;
|
||||
addActor.send(getRate(proxy.id(), db, &transactionCount, &batchTransactionCount, &normalRateInfo.rate, &batchRateInfo.rate, healthMetricsReply, detailedHealthMetricsReply));
|
||||
addActor.send(getRate(proxy.id(), db, &transactionCount, &batchTransactionCount, &normalRateInfo.rate,
|
||||
&batchRateInfo.rate, healthMetricsReply, detailedHealthMetricsReply, stats));
|
||||
addActor.send(queueTransactionStartRequests(&systemQueue, &defaultQueue, &batchQueue, proxy.getConsistentReadVersion.getFuture(), GRVTimer, &lastGRVTime, &GRVBatchTime, replyTimes.getFuture(), &commitData->stats));
|
||||
|
||||
// Get a list of the other proxies that go together with us
|
||||
|
@ -1307,6 +1401,9 @@ ACTOR static Future<Void> transactionStarter(
|
|||
normalRateInfo.reset(elapsed);
|
||||
batchRateInfo.reset(elapsed);
|
||||
|
||||
stats->transactionLimit = normalRateInfo.limit;
|
||||
stats->batchTransactionLimit = batchRateInfo.limit;
|
||||
|
||||
int transactionsStarted[2] = {0,0};
|
||||
int systemTransactionsStarted[2] = {0,0};
|
||||
int defaultPriTransactionsStarted[2] = { 0, 0 };
|
||||
|
@ -1317,6 +1414,8 @@ ACTOR static Future<Void> transactionStarter(
|
|||
|
||||
int requestsToStart = 0;
|
||||
|
||||
uint32_t defaultQueueSize = defaultQueue.size();
|
||||
uint32_t batchQueueSize = batchQueue.size();
|
||||
while (requestsToStart < SERVER_KNOBS->START_TRANSACTION_MAX_REQUESTS_TO_START) {
|
||||
Deque<GetReadVersionRequest>* transactionQueue;
|
||||
if(!systemQueue.empty()) {
|
||||
|
@ -1345,12 +1444,16 @@ ACTOR static Future<Void> transactionStarter(
|
|||
}
|
||||
|
||||
transactionsStarted[req.flags&1] += tc;
|
||||
if (req.priority() >= GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE)
|
||||
double currentTime = g_network->timer();
|
||||
if (req.priority() >= GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE) {
|
||||
systemTransactionsStarted[req.flags & 1] += tc;
|
||||
else if (req.priority() >= GetReadVersionRequest::PRIORITY_DEFAULT)
|
||||
} else if (req.priority() >= GetReadVersionRequest::PRIORITY_DEFAULT) {
|
||||
defaultPriTransactionsStarted[req.flags & 1] += tc;
|
||||
else
|
||||
stats->defaultTxnGRVTimeInQueue.addMeasurement(currentTime - req.requestTime());
|
||||
} else {
|
||||
batchPriTransactionsStarted[req.flags & 1] += tc;
|
||||
stats->batchTxnGRVTimeInQueue.addMeasurement(currentTime - req.requestTime());
|
||||
}
|
||||
|
||||
start[req.flags & 1].push_back(std::move(req)); static_assert(GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY == 1, "Implementation dependent on flag value");
|
||||
transactionQueue->pop_front();
|
||||
|
@ -1382,6 +1485,8 @@ ACTOR static Future<Void> transactionStarter(
|
|||
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "MasterProxyServer.masterProxyServerCore.Broadcast");
|
||||
}
|
||||
|
||||
int defaultGRVProcessed = 0;
|
||||
int batchGRVProcessed = 0;
|
||||
for (int i = 0; i < start.size(); i++) {
|
||||
if (start[i].size()) {
|
||||
Future<GetReadVersionReply> readVersionReply = getLiveCommittedVersion(commitData, i, &otherProxies, debugID, transactionsStarted[i], systemTransactionsStarted[i], defaultPriTransactionsStarted[i], batchPriTransactionsStarted[i]);
|
||||
|
@ -1391,8 +1496,12 @@ ACTOR static Future<Void> transactionStarter(
|
|||
if (i == 0) {
|
||||
addActor.send(timeReply(readVersionReply, replyTimes));
|
||||
}
|
||||
defaultGRVProcessed += defaultPriTransactionsStarted[i];
|
||||
batchGRVProcessed += batchPriTransactionsStarted[i];
|
||||
}
|
||||
}
|
||||
stats->percentageOfDefaultGRVQueueProcessed = (double)defaultGRVProcessed / defaultQueueSize;
|
||||
stats->percentageOfBatchGRVQueueProcessed = (double)batchGRVProcessed / batchQueueSize;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1747,7 +1856,8 @@ ACTOR Future<Void> masterProxyServerCore(
|
|||
TraceEvent(SevInfo, "CommitBatchesMemoryLimit").detail("BytesLimit", commitBatchesMemoryLimit);
|
||||
|
||||
addActor.send(monitorRemoteCommitted(&commitData));
|
||||
addActor.send(transactionStarter(proxy, commitData.db, addActor, &commitData, &healthMetricsReply, &detailedHealthMetricsReply));
|
||||
addActor.send(transactionStarter(proxy, commitData.db, addActor, &commitData, &healthMetricsReply,
|
||||
&detailedHealthMetricsReply, &commitData.stats));
|
||||
addActor.send(readRequestServer(proxy, addActor, &commitData));
|
||||
addActor.send(rejoinServer(proxy, &commitData));
|
||||
addActor.send(healthMetricsRequestServer(proxy, &healthMetricsReply, &detailedHealthMetricsReply));
|
||||
|
|
|
@ -27,6 +27,11 @@
|
|||
#include "fdbserver/Knobs.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
const StringRef STORAGESERVER_HISTOGRAM_GROUP = LiteralStringRef("StorageServer");
|
||||
const StringRef FETCH_KEYS_LATENCY_HISTOGRAM = LiteralStringRef("FetchKeysLatency");
|
||||
const StringRef FETCH_KEYS_BYTES_HISTOGRAM = LiteralStringRef("FetchKeysSize");
|
||||
const StringRef FETCH_KEYS_BYTES_PER_SECOND_HISTOGRAM = LiteralStringRef("FetchKeysBandwidth");
|
||||
|
||||
struct StorageMetricSample {
|
||||
IndexedSet<Key, int64_t> sample;
|
||||
int64_t metricUnitsPerSample;
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include "fdbserver/WaitFailure.h"
|
||||
#include "fdbserver/RecoveryState.h"
|
||||
#include "fdbserver/FDBExecHelper.actor.h"
|
||||
#include "flow/Histogram.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
using std::pair;
|
||||
|
@ -325,6 +326,7 @@ struct TLogData : NonCopyable {
|
|||
FlowLock concurrentLogRouterReads;
|
||||
FlowLock persistentDataCommitLock;
|
||||
|
||||
// Beginning of fields used by snapshot based backup and restore
|
||||
bool ignorePopRequest; // ignore pop request from storage servers
|
||||
double ignorePopDeadline; // time until which the ignorePopRequest will be
|
||||
// honored
|
||||
|
@ -336,19 +338,26 @@ struct TLogData : NonCopyable {
|
|||
std::map<Tag, Version> toBePopped; // map of Tag->Version for all the pops
|
||||
// that came when ignorePopRequest was set
|
||||
Reference<AsyncVar<bool>> degraded;
|
||||
// End of fields used by snapshot based backup and restore
|
||||
|
||||
std::vector<TagsAndMessage> tempTagMessages;
|
||||
|
||||
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
||||
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
|
||||
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
|
||||
peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES),
|
||||
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS),
|
||||
ignorePopRequest(false), ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped()
|
||||
{
|
||||
cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, true, true);
|
||||
}
|
||||
Reference<Histogram> commitLatencyDist;
|
||||
|
||||
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
||||
Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
persistentData(persistentData), rawPersistentQueue(persistentQueue),
|
||||
persistentQueue(new TLogQueue(persistentQueue, dbgid)), dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0),
|
||||
queueCommitEnd(0), diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0),
|
||||
targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
|
||||
peekMemoryLimiter(SERVER_KNOBS->TLOG_SPILL_REFERENCE_MAX_PEEK_MEMORY_BYTES),
|
||||
concurrentLogRouterReads(SERVER_KNOBS->CONCURRENT_LOG_ROUTER_READS), ignorePopRequest(false),
|
||||
ignorePopDeadline(), ignorePopUid(), dataFolder(folder), toBePopped(),
|
||||
commitLatencyDist(Histogram::getHistogram(LiteralStringRef("tLog"), LiteralStringRef("commit"),
|
||||
Histogram::Unit::microseconds)) {
|
||||
cx = openDBOnServer(dbInfo, TaskPriority::DefaultEndpoint, true, true);
|
||||
}
|
||||
};
|
||||
|
||||
struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
||||
|
@ -432,13 +441,19 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
bool stopped, initialized;
|
||||
DBRecoveryCount recoveryCount;
|
||||
|
||||
// If persistentDataVersion != persistentDurableDataVersion,
|
||||
// then spilling is happening from persistentDurableDataVersion to persistentDataVersion.
|
||||
// Data less than persistentDataDurableVersion is spilled on disk (or fully popped from the TLog);
|
||||
VersionMetricHandle persistentDataVersion, persistentDataDurableVersion; // The last version number in the portion of the log (written|durable) to persistentData
|
||||
NotifiedVersion version, queueCommittedVersion;
|
||||
NotifiedVersion version;
|
||||
NotifiedVersion queueCommittedVersion; // The disk queue has committed up until the queueCommittedVersion version.
|
||||
Version queueCommittingVersion;
|
||||
Version knownCommittedVersion, durableKnownCommittedVersion, minKnownCommittedVersion;
|
||||
Version queuePoppedVersion;
|
||||
Version knownCommittedVersion; // The maximum version that a proxy has told us that is committed (all TLogs have
|
||||
// ack'd a commit for this version).
|
||||
Version durableKnownCommittedVersion, minKnownCommittedVersion;
|
||||
Version queuePoppedVersion; // The disk queue has been popped up until the location which represents this version.
|
||||
Version minPoppedTagVersion;
|
||||
Tag minPoppedTag;
|
||||
Tag minPoppedTag; // The tag that makes tLog hold its data and cause tLog's disk queue increasing.
|
||||
|
||||
Deque<std::pair<Version, Standalone<VectorRef<uint8_t>>>> messageBlocks;
|
||||
std::vector<std::vector<Reference<TagData>>> tag_data; //tag.locality | tag.id
|
||||
|
@ -481,7 +496,8 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
Version unrecoveredBefore, recoveredAt;
|
||||
|
||||
struct PeekTrackerData {
|
||||
std::map<int, Promise<std::pair<Version, bool>>> sequence_version;
|
||||
std::map<int, Promise<std::pair<Version, bool>>>
|
||||
sequence_version; // second: Version is peeked begin version. bool is onlySpilled
|
||||
double lastUpdate;
|
||||
|
||||
Tag tag;
|
||||
|
@ -554,12 +570,15 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
queueCommittedVersion.initMetric(LiteralStringRef("TLog.QueueCommittedVersion"), cc.id);
|
||||
|
||||
specialCounter(cc, "Version", [this](){ return this->version.get(); });
|
||||
specialCounter(cc, "QueueCommittedVersion", [this](){ return this->queueCommittedVersion.get(); });
|
||||
specialCounter(cc, "QueueCommittedVersion", [this]() { return this->queueCommittedVersion.get(); });
|
||||
specialCounter(cc, "PersistentDataVersion", [this](){ return this->persistentDataVersion; });
|
||||
specialCounter(cc, "PersistentDataDurableVersion", [this](){ return this->persistentDataDurableVersion; });
|
||||
specialCounter(cc, "KnownCommittedVersion", [this](){ return this->knownCommittedVersion; });
|
||||
specialCounter(cc, "QueuePoppedVersion", [this](){ return this->queuePoppedVersion; });
|
||||
specialCounter(cc, "MinPoppedTagVersion", [this](){ return this->minPoppedTagVersion; });
|
||||
specialCounter(cc, "MinPoppedTagVersion", [this]() { return this->minPoppedTagVersion; });
|
||||
// The locality and id of the tag that is responsible for making the TLog hold onto its oldest piece of data.
|
||||
// If disk queues are growing and no one is sure why, then you shall look at this to find the tag responsible
|
||||
// for why the TLog thinks it can't throw away data.
|
||||
specialCounter(cc, "MinPoppedTagLocality", [this](){ return this->minPoppedTag.locality; });
|
||||
specialCounter(cc, "MinPoppedTagId", [this](){ return this->minPoppedTag.id; });
|
||||
specialCounter(cc, "SharedBytesInput", [tLogData](){ return tLogData->bytesInput; });
|
||||
|
@ -576,6 +595,7 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
|||
specialCounter(cc, "QueueDiskBytesTotal", [tLogData](){ return tLogData->rawPersistentQueue->getStorageBytes().total; });
|
||||
specialCounter(cc, "PeekMemoryReserved", [tLogData]() { return tLogData->peekMemoryLimiter.activePermits(); });
|
||||
specialCounter(cc, "PeekMemoryRequestsStalled", [tLogData]() { return tLogData->peekMemoryLimiter.waiters(); });
|
||||
specialCounter(cc, "Geneartion", [this]() { return this->recoveryCount; });
|
||||
}
|
||||
|
||||
~LogData() {
|
||||
|
@ -759,6 +779,9 @@ ACTOR Future<Void> updatePoppedLocation( TLogData* self, Reference<LogData> logD
|
|||
return Void();
|
||||
}
|
||||
|
||||
// It runs against the oldest TLog instance, calculates the first location in the disk queue that contains un-popped
|
||||
// data, and then issues a pop to the disk queue at that location so that anything earlier can be
|
||||
// removed/forgotten/overwritten. In effect, it applies the effect of TLogPop RPCs to disk.
|
||||
ACTOR Future<Void> popDiskQueue( TLogData* self, Reference<LogData> logData ) {
|
||||
if (!logData->initialized) return Void();
|
||||
|
||||
|
@ -973,9 +996,11 @@ ACTOR Future<Void> updatePersistentData( TLogData* self, Reference<LogData> logD
|
|||
return Void();
|
||||
}
|
||||
|
||||
// This function (and updatePersistentData, which is called by this function) run at a low priority and can soak up all CPU resources.
|
||||
// For this reason, they employ aggressive use of yields to avoid causing slow tasks that could introduce latencies for more important
|
||||
// work (e.g. commits).
|
||||
// This function (and updatePersistentData, which is called by this function) run at a low priority and can soak up all
|
||||
// CPU resources. For this reason, they employ aggressive use of yields to avoid causing slow tasks that could introduce
|
||||
// latencies for more important work (e.g. commits).
|
||||
// This actor is just a loop that calls updatePersistentData and popDiskQueue whenever
|
||||
// (a) there's data to be spilled or (b) we should update metadata after some commits have been fully popped.
|
||||
ACTOR Future<Void> updateStorage( TLogData* self ) {
|
||||
while(self->spillOrder.size() && !self->id_data.count(self->spillOrder.front())) {
|
||||
self->spillOrder.pop_front();
|
||||
|
@ -1823,7 +1848,11 @@ ACTOR Future<Void> tLogCommit(
|
|||
return Void();
|
||||
}
|
||||
|
||||
if (logData->version.get() == req.prevVersion) { // Not a duplicate (check relies on critical section between here self->version.set() below!)
|
||||
state double beforeCommitT = now();
|
||||
|
||||
// Not a duplicate (check relies on critical section between here self->version.set() below!)
|
||||
state bool isNotDuplicate = (logData->version.get() == req.prevVersion);
|
||||
if (isNotDuplicate) {
|
||||
if(req.debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.Before");
|
||||
|
||||
|
@ -1861,6 +1890,10 @@ ACTOR Future<Void> tLogCommit(
|
|||
return Void();
|
||||
}
|
||||
|
||||
if (isNotDuplicate) {
|
||||
self->commitLatencyDist->sampleSeconds(now() - beforeCommitT);
|
||||
}
|
||||
|
||||
if(req.debugID.present())
|
||||
g_traceBatch.addEvent("CommitDebug", tlogDebugID.get().first(), "TLog.tLogCommit.After");
|
||||
|
||||
|
@ -2235,6 +2268,7 @@ void removeLog( TLogData* self, Reference<LogData> logData ) {
|
|||
}
|
||||
}
|
||||
|
||||
// remote tLog pull data from log routers
|
||||
ACTOR Future<Void> pullAsyncData( TLogData* self, Reference<LogData> logData, std::vector<Tag> tags, Version beginVersion, Optional<Version> endVersion, bool poppedIsKnownCommitted ) {
|
||||
state Future<Void> dbInfoChange = Void();
|
||||
state Reference<ILogSystem::IPeekCursor> r;
|
||||
|
|
|
@ -160,7 +160,7 @@ OldTLogCoreData::OldTLogCoreData(const OldLogData& oldData) :
|
|||
struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogSystem> {
|
||||
UID dbgid;
|
||||
LogSystemType logSystemType;
|
||||
std::vector<Reference<LogSet>> tLogs;
|
||||
std::vector<Reference<LogSet>> tLogs; // LogSets in different locations: primary, satellite, or remote
|
||||
int expectedLogSets;
|
||||
int logRouterTags;
|
||||
int txsTags;
|
||||
|
@ -168,7 +168,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
int repopulateRegionAntiQuorum;
|
||||
bool stopped;
|
||||
std::set<int8_t> pseudoLocalities;
|
||||
std::map<int8_t, Version> pseudoLocalityPopVersion;
|
||||
std::map<int8_t, Version> pseudoLocalityPopVersion; // first:locality, second:popped version at the locality
|
||||
|
||||
// new members
|
||||
Future<Void> rejoins;
|
||||
|
@ -184,7 +184,14 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
Optional<Version> recoveredAt;
|
||||
Version knownCommittedVersion;
|
||||
LocalityData locality;
|
||||
std::map< std::pair<UID, Tag>, std::pair<Version, Version> > outstandingPops; // For each currently running popFromLog actor, (log server #, tag)->popped version
|
||||
// For each currently running popFromLog actor, outstandingPops is
|
||||
// (logID, tag)->(max popped version, durableKnownCommittedVersion).
|
||||
// Why do we need durableKnownCommittedVersion? knownCommittedVersion gives the lower bound of what data
|
||||
// will need to be copied into the next generation to restore the replication factor.
|
||||
// Guess: It probably serves as a minimum version of what data should be on a TLog in the next generation and
|
||||
// sending a pop for anything less than durableKnownCommittedVersion for the TLog will be absurd.
|
||||
std::map<std::pair<UID, Tag>, std::pair<Version, Version>> outstandingPops;
|
||||
|
||||
Optional<PromiseStream<Future<Void>>> addActor;
|
||||
ActorCollection popActors;
|
||||
std::vector<OldLogData> oldLogData;
|
||||
|
@ -245,11 +252,15 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
return pseudoLocalities.count(locality) > 0;
|
||||
}
|
||||
|
||||
// Return the max version that can be popped for the locality;
|
||||
Version popPseudoLocalityTag(int8_t locality, Version upTo) override {
|
||||
ASSERT(isPseudoLocality(locality));
|
||||
auto& localityVersion = pseudoLocalityPopVersion[locality];
|
||||
localityVersion = std::max(localityVersion, upTo);
|
||||
Version minVersion = localityVersion;
|
||||
// Why do we need to use the minimum popped version among all tags? Reason: for example,
|
||||
// 2 pseudo tags pop 100 or 150, respectively. It's only safe to pop min(100, 150),
|
||||
// because [101,150) is needed by another pseudo tag.
|
||||
for (const auto& it : pseudoLocalityPopVersion) {
|
||||
minVersion = std::min(minVersion, it.second);
|
||||
}
|
||||
|
@ -1045,6 +1056,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
}
|
||||
}
|
||||
|
||||
// pop 'tag.locality' type data up to the 'upTo' version
|
||||
virtual void pop( Version upTo, Tag tag, Version durableKnownCommittedVersion, int8_t popLocality ) {
|
||||
if (upTo <= 0) return;
|
||||
if( tag.locality == tagLocalityRemoteLog) {
|
||||
|
@ -1057,18 +1069,22 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
|
|||
Version prev = outstandingPops[std::make_pair(log->get().id(),tag)].first;
|
||||
if (prev < upTo)
|
||||
outstandingPops[std::make_pair(log->get().id(),tag)] = std::make_pair(upTo, durableKnownCommittedVersion);
|
||||
if (prev == 0)
|
||||
popActors.add( popFromLog( this, log, tag, 1.0 ) ); //< FIXME: knob
|
||||
|
||||
if (prev == 0) {
|
||||
popActors.add(popFromLog(this, log, tag, 1.0)); //< FIXME: knob // TODO: Knobify it
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pop tag from log up to the version defined in self->outstandingPops[].first
|
||||
ACTOR static Future<Void> popFromLog( TagPartitionedLogSystem* self, Reference<AsyncVar<OptionalInterface<TLogInterface>>> log, Tag tag, double time ) {
|
||||
state Version last = 0;
|
||||
loop {
|
||||
wait( delay(time, TaskPriority::TLogPop) );
|
||||
|
||||
// to: first is upto version, second is durableKnownComittedVersion
|
||||
state std::pair<Version,Version> to = self->outstandingPops[ std::make_pair(log->get().id(),tag) ];
|
||||
|
||||
if (to.first <= last) {
|
||||
|
|
|
@ -373,6 +373,14 @@ void failAfter( Future<Void> trigger, Endpoint e ) {
|
|||
failAfter( trigger, g_simulator.getProcess( e ) );
|
||||
}
|
||||
|
||||
ACTOR Future<Void> histogramReport() {
|
||||
loop {
|
||||
wait(delay(SERVER_KNOBS->HISTOGRAM_REPORT_INTERVAL));
|
||||
|
||||
GetHistogramRegistry().logReport();
|
||||
}
|
||||
}
|
||||
|
||||
void testSerializationSpeed() {
|
||||
double tstart;
|
||||
double build = 0, serialize = 0, deserialize = 0, copy = 0, deallocate = 0;
|
||||
|
@ -492,8 +500,10 @@ ACTOR Future<Void> dumpDatabase( Database cx, std::string outputFilename, KeyRan
|
|||
void memoryTest();
|
||||
void skipListTest();
|
||||
|
||||
Future<Void> startSystemMonitor(std::string dataFolder, Optional<Standalone<StringRef>> zoneId, Optional<Standalone<StringRef>> machineId) {
|
||||
initializeSystemMonitorMachineState(SystemMonitorMachineState(dataFolder, zoneId, machineId, g_network->getLocalAddress().ip));
|
||||
Future<Void> startSystemMonitor(std::string dataFolder, Optional<Standalone<StringRef>> dcId,
|
||||
Optional<Standalone<StringRef>> zoneId, Optional<Standalone<StringRef>> machineId) {
|
||||
initializeSystemMonitorMachineState(
|
||||
SystemMonitorMachineState(dataFolder, dcId, zoneId, machineId, g_network->getLocalAddress().ip));
|
||||
|
||||
systemMonitor();
|
||||
return recurring( &systemMonitor, 5.0, TaskPriority::FlushTrace );
|
||||
|
@ -1656,6 +1666,8 @@ int main(int argc, char* argv[]) {
|
|||
if (role == Simulation) {
|
||||
TraceEvent("Simulation").detail("TestFile", testFile);
|
||||
|
||||
auto histogramReportActor = histogramReport();
|
||||
|
||||
clientKnobs->trace();
|
||||
flowKnobs->trace();
|
||||
serverKnobs->trace();
|
||||
|
@ -1786,6 +1798,7 @@ int main(int argc, char* argv[]) {
|
|||
|
||||
vector<Future<Void>> actors(listenErrors.begin(), listenErrors.end());
|
||||
actors.push_back( fdbd(connectionFile, localities, processClass, dataFolder, dataFolder, storageMemLimit, metricsConnFile, metricsPrefix, rsssize, whitelistBinPaths) );
|
||||
actors.push_back(histogramReport());
|
||||
//actors.push_back( recurring( []{}, .001 ) ); // for ASIO latency measurement
|
||||
|
||||
f = stopAfter( waitForAll(actors) );
|
||||
|
@ -1794,13 +1807,13 @@ int main(int argc, char* argv[]) {
|
|||
f = stopAfter( runTests( connectionFile, TEST_TYPE_FROM_FILE, testOnServers ? TEST_ON_SERVERS : TEST_ON_TESTERS, minTesterCount, testFile, StringRef(), localities ) );
|
||||
g_network->run();
|
||||
} else if (role == Test) {
|
||||
auto m = startSystemMonitor(dataFolder, zoneId, zoneId);
|
||||
auto m = startSystemMonitor(dataFolder, dcId, zoneId, zoneId);
|
||||
f = stopAfter( runTests( connectionFile, TEST_TYPE_FROM_FILE, TEST_HERE, 1, testFile, StringRef(), localities ) );
|
||||
g_network->run();
|
||||
} else if (role == ConsistencyCheck) {
|
||||
setupSlowTaskProfiler();
|
||||
|
||||
auto m = startSystemMonitor(dataFolder, zoneId, zoneId);
|
||||
auto m = startSystemMonitor(dataFolder, dcId, zoneId, zoneId);
|
||||
f = stopAfter( runTests( connectionFile, TEST_TYPE_CONSISTENCY_CHECK, TEST_HERE, 1, testFile, StringRef(), localities ) );
|
||||
g_network->run();
|
||||
} else if (role == CreateTemplateDatabase) {
|
||||
|
|
|
@ -19,11 +19,16 @@
|
|||
*/
|
||||
|
||||
#include <cinttypes>
|
||||
#include <functional>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "fdbrpc/fdbrpc.h"
|
||||
#include "fdbrpc/LoadBalance.h"
|
||||
#include "flow/IndexedSet.h"
|
||||
#include "flow/Hash3.h"
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/Hash3.h"
|
||||
#include "flow/Histogram.h"
|
||||
#include "flow/IndexedSet.h"
|
||||
#include "flow/SystemMonitor.h"
|
||||
#include "flow/Util.h"
|
||||
#include "fdbclient/Atomic.h"
|
||||
|
@ -52,11 +57,8 @@
|
|||
#include "fdbrpc/Smoother.h"
|
||||
#include "fdbrpc/Stats.h"
|
||||
#include "flow/TDMetric.actor.h"
|
||||
#include <type_traits>
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
using std::pair;
|
||||
using std::make_pair;
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
#pragma region Data Structures
|
||||
|
||||
|
@ -240,13 +242,13 @@ struct UpdateEagerReadInfo {
|
|||
void finishKeyBegin() {
|
||||
std::sort(keyBegin.begin(), keyBegin.end());
|
||||
keyBegin.resize( std::unique(keyBegin.begin(), keyBegin.end()) - keyBegin.begin() );
|
||||
std::sort(keys.begin(), keys.end(), [](const pair<KeyRef, int>& lhs, const pair<KeyRef, int>& rhs) { return (lhs.first < rhs.first) || (lhs.first == rhs.first && lhs.second > rhs.second); } );
|
||||
keys.resize(std::unique(keys.begin(), keys.end(), [](const pair<KeyRef, int>& lhs, const pair<KeyRef, int>& rhs) { return lhs.first == rhs.first; } ) - keys.begin());
|
||||
std::sort(keys.begin(), keys.end(), [](const std::pair<KeyRef, int>& lhs, const std::pair<KeyRef, int>& rhs) { return (lhs.first < rhs.first) || (lhs.first == rhs.first && lhs.second > rhs.second); } );
|
||||
keys.resize(std::unique(keys.begin(), keys.end(), [](const std::pair<KeyRef, int>& lhs, const std::pair<KeyRef, int>& rhs) { return lhs.first == rhs.first; } ) - keys.begin());
|
||||
//value gets populated in doEagerReads
|
||||
}
|
||||
|
||||
Optional<Value>& getValue(KeyRef key) {
|
||||
int i = std::lower_bound(keys.begin(), keys.end(), pair<KeyRef, int>(key, 0), [](const pair<KeyRef, int>& lhs, const pair<KeyRef, int>& rhs) { return lhs.first < rhs.first; } ) - keys.begin();
|
||||
int i = std::lower_bound(keys.begin(), keys.end(),std::pair<KeyRef, int>(key, 0), [](const std::pair<KeyRef, int>& lhs, const std::pair<KeyRef, int>& rhs) { return lhs.first < rhs.first; } ) - keys.begin();
|
||||
ASSERT( i < keys.size() && keys[i].first == key );
|
||||
return value[i];
|
||||
}
|
||||
|
@ -296,9 +298,63 @@ private:
|
|||
std::map<Version, Standalone<VersionUpdateRef>> mutationLog; // versions (durableVersion, version]
|
||||
|
||||
public:
|
||||
public:
|
||||
// Histograms
|
||||
struct FetchKeysHistograms {
|
||||
const Reference<Histogram> latency;
|
||||
const Reference<Histogram> bytes;
|
||||
const Reference<Histogram> bandwidth;
|
||||
|
||||
FetchKeysHistograms()
|
||||
: latency(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP, FETCH_KEYS_LATENCY_HISTOGRAM,
|
||||
Histogram::Unit::microseconds)),
|
||||
bytes(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP, FETCH_KEYS_BYTES_HISTOGRAM,
|
||||
Histogram::Unit::bytes)),
|
||||
bandwidth(Histogram::getHistogram(STORAGESERVER_HISTOGRAM_GROUP, FETCH_KEYS_BYTES_PER_SECOND_HISTOGRAM,
|
||||
Histogram::Unit::bytes_per_second)) {}
|
||||
} fetchKeysHistograms;
|
||||
|
||||
class CurrentRunningFetchKeys {
|
||||
std::unordered_map<UID, double> startTimeMap;
|
||||
std::unordered_map<UID, KeyRangeRef> keyRangeMap;
|
||||
|
||||
static const StringRef emptyString;
|
||||
static const KeyRangeRef emptyKeyRange;
|
||||
public:
|
||||
void recordStart(const UID id, const KeyRange keyRange) {
|
||||
startTimeMap[id] = now();
|
||||
keyRangeMap[id] = keyRange;
|
||||
}
|
||||
|
||||
void recordFinish(const UID id) {
|
||||
startTimeMap.erase(id);
|
||||
keyRangeMap.erase(id);
|
||||
}
|
||||
|
||||
std::pair<double, KeyRangeRef> longestTime() const {
|
||||
if (numRunning() == 0) {
|
||||
return {-1, emptyKeyRange};
|
||||
}
|
||||
|
||||
const double currentTime = now();
|
||||
double longest = 0;
|
||||
UID UIDofLongest;
|
||||
for (const auto kv: startTimeMap) {
|
||||
const double currentRunningTime = currentTime - kv.second;
|
||||
if (longest < currentRunningTime) {
|
||||
longest = currentRunningTime;
|
||||
UIDofLongest = kv.first;
|
||||
}
|
||||
}
|
||||
return {longest, keyRangeMap.at(UIDofLongest)};
|
||||
}
|
||||
|
||||
int numRunning() const { return startTimeMap.size(); }
|
||||
} currentRunningFetchKeys;
|
||||
|
||||
Tag tag;
|
||||
vector<pair<Version,Tag>> history;
|
||||
vector<pair<Version,Tag>> allHistory;
|
||||
vector<std::pair<Version,Tag>> history;
|
||||
vector<std::pair<Version,Tag>> allHistory;
|
||||
Version poppedAllAfter;
|
||||
std::map<Version, Arena> freeable; // for each version, an Arena that must be held until that version is < oldestVersion
|
||||
Arena lastArena;
|
||||
|
@ -345,8 +401,8 @@ public:
|
|||
poppedAllAfter = std::numeric_limits<Version>::max();
|
||||
}
|
||||
|
||||
vector<pair<Version,Tag>>* hist = &history;
|
||||
vector<pair<Version,Tag>> allHistoryCopy;
|
||||
vector<std::pair<Version,Tag>>* hist = &history;
|
||||
vector<std::pair<Version,Tag>> allHistoryCopy;
|
||||
if(popAllTags) {
|
||||
allHistoryCopy = allHistory;
|
||||
hist = &allHistoryCopy;
|
||||
|
@ -535,22 +591,18 @@ public:
|
|||
}
|
||||
} counters;
|
||||
|
||||
StorageServer(IKeyValueStore* storage, Reference<AsyncVar<ServerDBInfo>> const& db, StorageServerInterface const& ssi)
|
||||
: instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||
storage(this, storage), db(db),
|
||||
lastTLogVersion(0), lastVersionWithData(0), restoredVersion(0),
|
||||
rebootAfterDurableVersion(std::numeric_limits<Version>::max()),
|
||||
durableInProgress(Void()),
|
||||
versionLag(0), primaryLocality(tagLocalityInvalid),
|
||||
updateEagerReads(0),
|
||||
shardChangeCounter(0),
|
||||
fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_BYTES),
|
||||
shuttingDown(false), debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0), numWatches(0),
|
||||
logProtocol(0), counters(this), tag(invalidTag), maxQueryQueue(0), thisServerID(ssi.id()),
|
||||
readQueueSizeMetric(LiteralStringRef("StorageServer.ReadQueueSize")),
|
||||
behind(false), versionBehind(false), byteSampleClears(false, LiteralStringRef("\xff\xff\xff")), noRecentUpdates(false),
|
||||
lastUpdate(now()), poppedAllAfter(std::numeric_limits<Version>::max()), cpuUsage(0.0), diskUsage(0.0)
|
||||
{
|
||||
StorageServer(IKeyValueStore* storage, Reference<AsyncVar<ServerDBInfo>> const& db,
|
||||
StorageServerInterface const& ssi)
|
||||
: fetchKeysHistograms(), instanceID(deterministicRandom()->randomUniqueID().first()), storage(this, storage),
|
||||
db(db), lastTLogVersion(0), lastVersionWithData(0), restoredVersion(0),
|
||||
rebootAfterDurableVersion(std::numeric_limits<Version>::max()), durableInProgress(Void()), versionLag(0),
|
||||
primaryLocality(tagLocalityInvalid), updateEagerReads(0), shardChangeCounter(0),
|
||||
fetchKeysParallelismLock(SERVER_KNOBS->FETCH_KEYS_PARALLELISM_BYTES), shuttingDown(false),
|
||||
debug_inApplyUpdate(false), debug_lastValidateTime(0), watchBytes(0), numWatches(0), logProtocol(0),
|
||||
counters(this), tag(invalidTag), maxQueryQueue(0), thisServerID(ssi.id()),
|
||||
readQueueSizeMetric(LiteralStringRef("StorageServer.ReadQueueSize")), behind(false), versionBehind(false),
|
||||
byteSampleClears(false, LiteralStringRef("\xff\xff\xff")), noRecentUpdates(false), lastUpdate(now()),
|
||||
poppedAllAfter(std::numeric_limits<Version>::max()), cpuUsage(0.0), diskUsage(0.0) {
|
||||
version.initMetric(LiteralStringRef("StorageServer.Version"), counters.cc.id);
|
||||
oldestVersion.initMetric(LiteralStringRef("StorageServer.OldestVersion"), counters.cc.id);
|
||||
durableVersion.initMetric(LiteralStringRef("StorageServer.DurableVersion"), counters.cc.id);
|
||||
|
@ -662,6 +714,9 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
const StringRef StorageServer::CurrentRunningFetchKeys::emptyString = LiteralStringRef("");
|
||||
const KeyRangeRef StorageServer::CurrentRunningFetchKeys::emptyKeyRange = KeyRangeRef(StorageServer::CurrentRunningFetchKeys::emptyString, StorageServer::CurrentRunningFetchKeys::emptyString);
|
||||
|
||||
// If and only if key:=value is in (storage+versionedData), // NOT ACTUALLY: and key < allKeys.end,
|
||||
// and H(key) < |key+value|/bytesPerSample,
|
||||
// let sampledSize = max(|key+value|,bytesPerSample)
|
||||
|
@ -1622,7 +1677,7 @@ bool changeDurableVersion( StorageServer* data, Version desiredDurableVersion )
|
|||
setDataDurableVersion(data->thisServerID, data->durableVersion.get());
|
||||
if (checkFatalError.isReady()) checkFatalError.get();
|
||||
|
||||
//TraceEvent("ForgotVersionsBefore", data->thisServerID).detail("Version", nextDurableVersion);
|
||||
// TraceEvent("ForgotVersionsBefore", data->thisServerID).detail("Version", nextDurableVersion);
|
||||
validate(data);
|
||||
|
||||
return nextDurableVersion == desiredDurableVersion;
|
||||
|
@ -1942,16 +1997,56 @@ ACTOR Future<Void> logFetchKeysWarning(AddingShard* shard) {
|
|||
loop {
|
||||
state double waitSeconds = BUGGIFY ? 5.0 : 600.0;
|
||||
wait(delay(waitSeconds));
|
||||
TraceEvent(waitSeconds > 300.0 ? SevWarnAlways : SevInfo, "FetchKeysTooLong").detail("Duration", now() - startTime).detail("Phase", shard->phase).detail("Begin", shard->keys.begin.printable()).detail("End", shard->keys.end.printable());
|
||||
|
||||
const auto traceEventLevel = waitSeconds > SERVER_KNOBS->FETCH_KEYS_TOO_LONG_TIME_CRITERIA ? SevWarnAlways : SevInfo;
|
||||
TraceEvent(traceEventLevel, "FetchKeysTooLong")
|
||||
.detail("Duration", now() - startTime)
|
||||
.detail("Phase", shard->phase)
|
||||
.detail("Begin", shard->keys.begin.printable())
|
||||
.detail("End", shard->keys.end.printable());
|
||||
}
|
||||
}
|
||||
|
||||
class FetchKeysMetricReporter {
|
||||
const UID uid;
|
||||
const double startTime;
|
||||
int fetchedBytes;
|
||||
StorageServer::FetchKeysHistograms& histograms;
|
||||
StorageServer::CurrentRunningFetchKeys& currentRunning;
|
||||
|
||||
public:
|
||||
FetchKeysMetricReporter(const UID& uid_, const double startTime_, const KeyRange& keyRange, StorageServer::FetchKeysHistograms& histograms_, StorageServer::CurrentRunningFetchKeys& currentRunning_)
|
||||
: uid(uid_), startTime(startTime_), fetchedBytes(0), histograms(histograms_), currentRunning(currentRunning_) {
|
||||
|
||||
currentRunning.recordStart(uid, keyRange);
|
||||
}
|
||||
|
||||
void addFetchedBytes(const int bytes) { fetchedBytes += bytes; }
|
||||
|
||||
~FetchKeysMetricReporter() {
|
||||
double latency = now() - startTime;
|
||||
|
||||
// If fetchKeys is *NOT* run, i.e. returning immediately, still report a record.
|
||||
if (latency == 0) latency = 1e6;
|
||||
|
||||
const uint32_t bandwidth = fetchedBytes / latency;
|
||||
|
||||
histograms.latency->sampleSeconds(latency);
|
||||
histograms.bytes->sample(fetchedBytes);
|
||||
histograms.bandwidth->sample(bandwidth);
|
||||
|
||||
currentRunning.recordFinish(uid);
|
||||
}
|
||||
};
|
||||
|
||||
ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
|
||||
state const UID fetchKeysID = deterministicRandom()->randomUniqueID();
|
||||
state TraceInterval interval("FetchKeys");
|
||||
state KeyRange keys = shard->keys;
|
||||
state Future<Void> warningLogger = logFetchKeysWarning(shard);
|
||||
state double startt = now();
|
||||
state const double startTime = now();
|
||||
state int fetchBlockBytes = BUGGIFY ? SERVER_KNOBS->BUGGIFY_BLOCK_BYTES : SERVER_KNOBS->FETCH_BLOCK_BYTES;
|
||||
state FetchKeysMetricReporter metricReporter(fetchKeysID, startTime, keys, data->fetchKeysHistograms, data->currentRunningFetchKeys);
|
||||
|
||||
// delay(0) to force a return to the run loop before the work of fetchKeys is started.
|
||||
// This allows adding->start() to be called inline with CSK.
|
||||
|
@ -1989,7 +2084,7 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
|
|||
|
||||
state double executeStart = now();
|
||||
++data->counters.fetchWaitingCount;
|
||||
data->counters.fetchWaitingMS += 1000*(executeStart - startt);
|
||||
data->counters.fetchWaitingMS += 1000 * (executeStart - startTime);
|
||||
|
||||
// Fetch keys gets called while the update actor is processing mutations. data->version will not be updated until all mutations for a version
|
||||
// have been processed. We need to take the durableVersionLock to ensure data->version is greater than the version of the mutation which caused
|
||||
|
@ -2029,6 +2124,7 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
|
|||
debugKeyRange("fetchRange", fetchVersion, keys);
|
||||
for(auto k = this_block.begin(); k != this_block.end(); ++k) debugMutation("fetch", fetchVersion, MutationRef(MutationRef::SetValue, k->key, k->value));
|
||||
|
||||
metricReporter.addFetchedBytes(expectedSize);
|
||||
data->counters.bytesFetched += expectedSize;
|
||||
if( fetchBlockBytes > expectedSize ) {
|
||||
holdingFKPL.release( fetchBlockBytes - expectedSize );
|
||||
|
@ -2096,8 +2192,9 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
|
|||
while (!shard->updates.empty() && shard->updates[0].version <= fetchVersion) shard->updates.pop_front();
|
||||
|
||||
//FIXME: remove when we no longer support upgrades from 5.X
|
||||
if(debug_getRangeRetries >= 100) {
|
||||
if (debug_getRangeRetries >= 100) {
|
||||
data->cx->enableLocalityLoadBalance = false;
|
||||
// TODO: Add SevWarnAlways to say it was disabled.
|
||||
}
|
||||
|
||||
debug_getRangeRetries++;
|
||||
|
@ -2214,7 +2311,7 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
|
|||
|
||||
TraceEvent(SevError, "FetchKeysError", data->thisServerID)
|
||||
.error(e)
|
||||
.detail("Elapsed", now()-startt)
|
||||
.detail("Elapsed", now() - startTime)
|
||||
.detail("KeyBegin", keys.begin)
|
||||
.detail("KeyEnd",keys.end);
|
||||
if (e.code() != error_code_actor_cancelled)
|
||||
|
@ -3014,7 +3111,9 @@ bool StorageServerDisk::makeVersionMutationsDurable( Version& prevStorageVersion
|
|||
void StorageServerDisk::makeVersionDurable( Version version ) {
|
||||
storage->set( KeyValueRef(persistVersion, BinaryWriter::toValue(version, Unversioned())) );
|
||||
|
||||
//TraceEvent("MakeDurable", data->thisServerID).detail("FromVersion", prevStorageVersion).detail("ToVersion", version);
|
||||
// TraceEvent("MakeDurable", data->thisServerID)
|
||||
// .detail("FromVersion", prevStorageVersion)
|
||||
// .detail("ToVersion", version);
|
||||
}
|
||||
|
||||
void StorageServerDisk::changeLogProtocol(Version version, ProtocolVersion protocol) {
|
||||
|
@ -3406,7 +3505,10 @@ ACTOR Future<Void> metricsCore( StorageServer* self, StorageServerInterface ssi
|
|||
|
||||
wait( self->byteSampleRecovery );
|
||||
|
||||
actors.add(traceCounters("StorageMetrics", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY, &self->counters.cc, self->thisServerID.toString() + "/StorageMetrics"));
|
||||
Tag tag = self->tag;
|
||||
actors.add(traceCounters("StorageMetrics", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY,
|
||||
&self->counters.cc, self->thisServerID.toString() + "/StorageMetrics",
|
||||
[tag](TraceEvent& te) { te.detail("Tag", tag.toString()); }));
|
||||
|
||||
loop {
|
||||
choose {
|
||||
|
@ -3472,6 +3574,35 @@ ACTOR Future<Void> checkBehind( StorageServer* self ) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> reportStorageServerState(StorageServer* self) {
|
||||
if (!SERVER_KNOBS->REPORT_DD_METRICS) {
|
||||
return Void();
|
||||
}
|
||||
|
||||
loop {
|
||||
wait(delay(SERVER_KNOBS->DD_METRICS_REPORT_INTERVAL));
|
||||
|
||||
const auto numRunningFetchKeys = self->currentRunningFetchKeys.numRunning();
|
||||
if (numRunningFetchKeys == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto longestRunningFetchKeys = self->currentRunningFetchKeys.longestTime();
|
||||
|
||||
auto level = SevInfo;
|
||||
if (longestRunningFetchKeys.first >= SERVER_KNOBS->FETCH_KEYS_TOO_LONG_TIME_CRITERIA) {
|
||||
level = SevWarnAlways;
|
||||
}
|
||||
|
||||
TraceEvent(level, "FetchKeyCurrentStatus")
|
||||
.detail("Timestamp", now())
|
||||
.detail("LongestRunningTime", longestRunningFetchKeys.first)
|
||||
.detail("StartKey", longestRunningFetchKeys.second.begin.printable())
|
||||
.detail("EndKey", longestRunningFetchKeys.second.end.printable())
|
||||
.detail("NumRunning", numRunningFetchKeys);
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> storageServerCore( StorageServer* self, StorageServerInterface ssi )
|
||||
{
|
||||
state Future<Void> doUpdate = Void();
|
||||
|
@ -3489,6 +3620,7 @@ ACTOR Future<Void> storageServerCore( StorageServer* self, StorageServerInterfac
|
|||
actors.add(metricsCore(self, ssi));
|
||||
actors.add(logLongByteSampleRecovery(self->byteSampleRecovery));
|
||||
actors.add(checkBehind(self));
|
||||
actors.add(reportStorageServerState(self));
|
||||
|
||||
self->coreStarted.send( Void() );
|
||||
|
||||
|
|
|
@ -835,7 +835,8 @@ ACTOR Future<Void> workerServer(
|
|||
|
||||
filesClosed.add(stopping.getFuture());
|
||||
|
||||
initializeSystemMonitorMachineState(SystemMonitorMachineState(folder, locality.zoneId(), locality.machineId(), g_network->getLocalAddress().ip));
|
||||
initializeSystemMonitorMachineState(SystemMonitorMachineState(
|
||||
folder, locality.dcId(), locality.zoneId(), locality.machineId(), g_network->getLocalAddress().ip));
|
||||
|
||||
{
|
||||
auto recruited = interf; //ghetto! don't we all love a good #define
|
||||
|
|
|
@ -28,9 +28,9 @@
|
|||
extern IKeyValueStore *makeDummyKeyValueStore();
|
||||
|
||||
template <class T>
|
||||
class Histogram {
|
||||
class TestHistogram {
|
||||
public:
|
||||
Histogram(int minSamples = 100) : minSamples(minSamples) { reset(); }
|
||||
TestHistogram(int minSamples = 100) : minSamples(minSamples) { reset(); }
|
||||
|
||||
void reset(){
|
||||
N = 0;
|
||||
|
@ -153,7 +153,7 @@ struct KVTest {
|
|||
}
|
||||
};
|
||||
|
||||
ACTOR Future<Void> testKVRead( KVTest* test, Key key, Histogram<float>* latency, PerfIntCounter* count ) {
|
||||
ACTOR Future<Void> testKVRead(KVTest* test, Key key, TestHistogram<float>* latency, PerfIntCounter* count) {
|
||||
//state Version s1 = test->lastCommit;
|
||||
state Version s2 = test->lastDurable;
|
||||
|
||||
|
@ -171,7 +171,7 @@ ACTOR Future<Void> testKVRead( KVTest* test, Key key, Histogram<float>* latency,
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> testKVReadSaturation( KVTest* test, Histogram<float>* latency, PerfIntCounter* count ) {
|
||||
ACTOR Future<Void> testKVReadSaturation(KVTest* test, TestHistogram<float>* latency, PerfIntCounter* count) {
|
||||
while (true) {
|
||||
state double begin = timer();
|
||||
Optional<Value> val = wait( test->store->readValue(test->randomKey()) );
|
||||
|
@ -181,7 +181,7 @@ ACTOR Future<Void> testKVReadSaturation( KVTest* test, Histogram<float>* latency
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> testKVCommit( KVTest* test, Histogram<float>* latency, PerfIntCounter* count ) {
|
||||
ACTOR Future<Void> testKVCommit(KVTest* test, TestHistogram<float>* latency, PerfIntCounter* count) {
|
||||
state Version v = test->lastSet;
|
||||
test->lastCommit = v;
|
||||
state double begin = timer();
|
||||
|
@ -202,7 +202,7 @@ struct KVStoreTestWorkload : TestWorkload {
|
|||
bool doSetup, doClear, doCount;
|
||||
std::string filename;
|
||||
PerfIntCounter reads, sets, commits;
|
||||
Histogram<float> readLatency, commitLatency;
|
||||
TestHistogram<float> readLatency, commitLatency;
|
||||
double setupTook;
|
||||
std::string storeType;
|
||||
|
||||
|
@ -232,7 +232,7 @@ struct KVStoreTestWorkload : TestWorkload {
|
|||
return Void();
|
||||
}
|
||||
virtual Future<bool> check( Database const& cx ) { return true; }
|
||||
void metricsFromHistogram(vector<PerfMetric>& m, std::string name, Histogram<float>& h){
|
||||
void metricsFromHistogram(vector<PerfMetric>& m, std::string name, TestHistogram<float>& h) {
|
||||
m.push_back( PerfMetric( "Min " + name, 1000.0 * h.min(), true) );
|
||||
m.push_back( PerfMetric( "Average " + name, 1000.0 * h.mean(), true) );
|
||||
m.push_back( PerfMetric( "Median " + name, 1000.0 * h.medianEstimate(), true) );
|
||||
|
|
|
@ -24,6 +24,8 @@ set(FLOW_SRCS
|
|||
FileTraceLogWriter.h
|
||||
Hash3.c
|
||||
Hash3.h
|
||||
Histogram.cpp
|
||||
Histogram.h
|
||||
IDispatched.h
|
||||
IRandom.h
|
||||
IThreadPool.cpp
|
||||
|
|
|
@ -0,0 +1,187 @@
|
|||
/*
|
||||
* Histogram.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <flow/Histogram.h>
|
||||
#include <flow/flow.h>
|
||||
#include <flow/UnitTest.h>
|
||||
// TODO: remove dependency on fdbrpc.
|
||||
|
||||
// we need to be able to check if we're in simulation so that the histograms are properly
|
||||
// scoped to the right "machine".
|
||||
// either we pull g_simulator into flow, or flow (and the I/O path) will be unable to log performance
|
||||
// metrics.
|
||||
#include <fdbrpc/simulator.h>
|
||||
|
||||
// pull in some global pointers too: These types are implemented in fdbrpc/sim2.actor.cpp, which is not available here.
|
||||
// Yuck. If you're not using the simulator, these will remain null, and all should be well.
|
||||
|
||||
// TODO: create a execution context abstraction that allows independent flow instances within a process.
|
||||
// The simulator would be the main user of it, and histogram would be the only other user (for now).
|
||||
ISimulator* g_pSimulator = nullptr;
|
||||
thread_local ISimulator::ProcessInfo* ISimulator::currentProcess = nullptr;
|
||||
|
||||
// Fallback registry when we're not in simulation -- if we had execution contexts we wouldn't need to check if
|
||||
// we have a simulated contex here; we'd just use the current context regardless.
|
||||
static HistogramRegistry* globalHistograms = nullptr;
|
||||
|
||||
#pragma region HistogramRegistry
|
||||
|
||||
HistogramRegistry& GetHistogramRegistry() {
|
||||
ISimulator::ProcessInfo* info = g_simulator.getCurrentProcess();
|
||||
|
||||
if (info) {
|
||||
// in simulator; scope histograms to simulated process
|
||||
return info->histograms;
|
||||
}
|
||||
// avoid link order issues where the registry hasn't been initialized, but we're
|
||||
// instantiating a histogram
|
||||
if (globalHistograms == nullptr) {
|
||||
// Note: This will show up as a leak on shutdown, but we're OK with that.
|
||||
globalHistograms = new HistogramRegistry();
|
||||
}
|
||||
return *globalHistograms;
|
||||
}
|
||||
|
||||
void HistogramRegistry::registerHistogram(Histogram* h) {
|
||||
if (histograms.find(h->name()) != histograms.end()) {
|
||||
TraceEvent(SevError, "HistogramDoubleRegistered").detail("group", h->group).detail("op", h->op);
|
||||
ASSERT(false);
|
||||
}
|
||||
histograms.insert(std::pair<std::string, Histogram*>(h->name(), h));
|
||||
}
|
||||
|
||||
void HistogramRegistry::unregisterHistogram(Histogram* h) {
|
||||
std::string name = h->name();
|
||||
if (histograms.find(name) == histograms.end()) {
|
||||
TraceEvent(SevError, "HistogramNotRegistered").detail("group", h->group).detail("op", h->op);
|
||||
}
|
||||
int count = histograms.erase(name);
|
||||
ASSERT(count == 1);
|
||||
}
|
||||
|
||||
Histogram* HistogramRegistry::lookupHistogram(std::string name) {
|
||||
auto h = histograms.find(name);
|
||||
if (h == histograms.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return h->second;
|
||||
}
|
||||
|
||||
void HistogramRegistry::logReport() {
|
||||
for (auto& i : histograms) {
|
||||
i.second->writeToLog();
|
||||
i.second->clear();
|
||||
}
|
||||
}
|
||||
|
||||
#pragma endregion // HistogramRegistry
|
||||
|
||||
#pragma region Histogram
|
||||
|
||||
const std::unordered_map<Histogram::Unit, std::string> Histogram::UnitToStringMapper = {
|
||||
{ Histogram::Unit::microseconds, "microseconds" },
|
||||
{ Histogram::Unit::bytes, "bytes" },
|
||||
{ Histogram::Unit::bytes_per_second, "bytes_per_second" }
|
||||
};
|
||||
|
||||
void Histogram::writeToLog() {
|
||||
bool active = false;
|
||||
for (uint32_t i = 0; i < 32; i++) {
|
||||
if (buckets[i]) {
|
||||
active = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!active) {
|
||||
return;
|
||||
}
|
||||
|
||||
TraceEvent e(SevInfo, "Histogram");
|
||||
e.detail("Group", group).detail("Op", op).detail("Unit", UnitToStringMapper.at(unit));
|
||||
|
||||
for (uint32_t i = 0; i < 32; i++) {
|
||||
uint32_t value = ((uint32_t)1) << (i + 1);
|
||||
|
||||
if (buckets[i]) {
|
||||
switch (unit) {
|
||||
case Unit::microseconds:
|
||||
e.detail(format("LessThan%u.%03u", value / 1000, value % 1000), buckets[i]);
|
||||
break;
|
||||
case Unit::bytes:
|
||||
case Unit::bytes_per_second:
|
||||
e.detail(format("LessThan%u", value), buckets[i]);
|
||||
break;
|
||||
default:
|
||||
ASSERT(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#pragma endregion // Histogram
|
||||
|
||||
TEST_CASE("/flow/histogram/smoke_test") {
|
||||
|
||||
{
|
||||
Reference<Histogram> h =
|
||||
Histogram::getHistogram(LiteralStringRef("smoke_test"), LiteralStringRef("counts"), Histogram::Unit::bytes);
|
||||
|
||||
h->sample(0);
|
||||
ASSERT(h->buckets[0] == 1);
|
||||
h->sample(1);
|
||||
ASSERT(h->buckets[0] == 2);
|
||||
|
||||
h->sample(2);
|
||||
ASSERT(h->buckets[1] == 1);
|
||||
|
||||
GetHistogramRegistry().logReport();
|
||||
|
||||
ASSERT(h->buckets[0] == 0);
|
||||
h->sample(0);
|
||||
ASSERT(h->buckets[0] == 1);
|
||||
h = Histogram::getHistogram(LiteralStringRef("smoke_test"), LiteralStringRef("counts2"),
|
||||
Histogram::Unit::bytes);
|
||||
|
||||
// confirm that old h was deallocated.
|
||||
h = Histogram::getHistogram(LiteralStringRef("smoke_test"), LiteralStringRef("counts"), Histogram::Unit::bytes);
|
||||
ASSERT(h->buckets[0] == 0);
|
||||
|
||||
h = Histogram::getHistogram(LiteralStringRef("smoke_test"), LiteralStringRef("times"),
|
||||
Histogram::Unit::microseconds);
|
||||
|
||||
h->sampleSeconds(0.000000);
|
||||
h->sampleSeconds(0.0000019);
|
||||
ASSERT(h->buckets[0] == 2);
|
||||
h->sampleSeconds(0.0000021);
|
||||
ASSERT(h->buckets[1] == 1);
|
||||
h->sampleSeconds(0.000015);
|
||||
ASSERT(h->buckets[3] == 1);
|
||||
|
||||
h->sampleSeconds(4400.0);
|
||||
ASSERT(h->buckets[31] == 1);
|
||||
|
||||
GetHistogramRegistry().logReport();
|
||||
}
|
||||
|
||||
// h has been deallocated. Does this crash?
|
||||
GetHistogramRegistry().logReport();
|
||||
|
||||
return Void();
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
/*
|
||||
* Histogram.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FLOW_HISTOGRAM_H
|
||||
#define FLOW_HISTOGRAM_H
|
||||
#pragma once
|
||||
|
||||
#include <flow/Arena.h>
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <intrin.h>
|
||||
#pragma intrinsic(_BitScanReverse)
|
||||
#endif
|
||||
|
||||
class Histogram;
|
||||
|
||||
class HistogramRegistry {
|
||||
public:
|
||||
void registerHistogram(Histogram* h);
|
||||
void unregisterHistogram(Histogram* h);
|
||||
Histogram* lookupHistogram(std::string name);
|
||||
void logReport();
|
||||
|
||||
private:
|
||||
// This map is ordered by key so that ops within the same group end up
|
||||
// next to each other in the trace log.
|
||||
std::map<std::string, Histogram*> histograms;
|
||||
};
|
||||
|
||||
HistogramRegistry& GetHistogramRegistry();
|
||||
|
||||
/*
|
||||
* A fast histogram with power-of-two spaced buckets.
|
||||
*
|
||||
* For more information about this technique, see:
|
||||
* https://www.fsl.cs.stonybrook.edu/project-osprof.html
|
||||
*/
|
||||
class Histogram sealed : public ReferenceCounted<Histogram> {
|
||||
public:
|
||||
enum class Unit { microseconds, bytes, bytes_per_second };
|
||||
|
||||
private:
|
||||
static const std::unordered_map<Unit, std::string> UnitToStringMapper;
|
||||
|
||||
Histogram(std::string group, std::string op, Unit unit, HistogramRegistry& registry)
|
||||
: group(group), op(op), unit(unit), registry(registry), ReferenceCounted<Histogram>() {
|
||||
|
||||
ASSERT(UnitToStringMapper.find(unit) != UnitToStringMapper.end());
|
||||
|
||||
clear();
|
||||
}
|
||||
|
||||
static std::string generateName(std::string group, std::string op) { return group + ":" + op; }
|
||||
|
||||
public:
|
||||
~Histogram() { registry.unregisterHistogram(this); }
|
||||
|
||||
static Reference<Histogram> getHistogram(StringRef group, StringRef op, Unit unit) {
|
||||
std::string group_str = group.toString();
|
||||
std::string op_str = op.toString();
|
||||
std::string name = generateName(group_str, op_str);
|
||||
HistogramRegistry& registry = GetHistogramRegistry();
|
||||
Histogram* h = registry.lookupHistogram(name);
|
||||
if (!h) {
|
||||
h = new Histogram(group_str, op_str, unit, registry);
|
||||
registry.registerHistogram(h);
|
||||
return Reference<Histogram>(h);
|
||||
} else {
|
||||
return Reference<Histogram>::addRef(h);
|
||||
}
|
||||
}
|
||||
|
||||
// This histogram buckets samples into powers of two.
|
||||
inline void sample(uint32_t sample) {
|
||||
size_t idx;
|
||||
#ifdef _WIN32
|
||||
unsigned long index;
|
||||
// _BitScanReverse sets index to the position of the first non-zero bit, so
|
||||
// _BitScanReverse(sample) ~= log_2(sample). _BitScanReverse returns false if
|
||||
// sample is zero.
|
||||
idx = _BitScanReverse(&index, sample) ? index : 0;
|
||||
#else
|
||||
// __builtin_clz counts the leading zeros in its uint32_t argument. So, 31-clz ~= log_2(sample).
|
||||
// __builtin_clz(0) is undefined.
|
||||
idx = sample ? (31 - __builtin_clz(sample)) : 0;
|
||||
#endif
|
||||
ASSERT(idx < 32);
|
||||
buckets[idx]++;
|
||||
}
|
||||
|
||||
inline void sampleSeconds(double delta) {
|
||||
uint64_t delta_usec = (delta * 1000000);
|
||||
if (delta_usec > UINT32_MAX) {
|
||||
sample(UINT32_MAX);
|
||||
} else {
|
||||
sample((uint32_t)(delta * 1000000)); // convert to microseconds and truncate to integer
|
||||
}
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (uint32_t& i : buckets) {
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
void writeToLog();
|
||||
|
||||
std::string name() { return generateName(this->group, this->op); }
|
||||
|
||||
std::string const group;
|
||||
std::string const op;
|
||||
Unit const unit;
|
||||
HistogramRegistry& registry;
|
||||
uint32_t buckets[32];
|
||||
};
|
||||
|
||||
#endif // FLOW_HISTOGRAM_H
|
|
@ -74,6 +74,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
|
|||
init( TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT, 20.0 );
|
||||
init( PING_LOGGING_INTERVAL, 3.0 );
|
||||
init( PING_SAMPLE_AMOUNT, 100 );
|
||||
init( NETWORK_CONNECT_SAMPLE_AMOUNT, 100 );
|
||||
|
||||
init( TLS_CERT_REFRESH_DELAY_SECONDS, 12*60*60 );
|
||||
init( TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT, 9.0 );
|
||||
|
@ -150,6 +151,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
|
|||
init( TRACE_RETRY_OPEN_INTERVAL, 1.00 );
|
||||
init( MIN_TRACE_SEVERITY, isSimulated ? 0 : 10 ); // Related to the trace severity in Trace.h
|
||||
init( MAX_TRACE_SUPPRESSIONS, 1e4 );
|
||||
init( TRACE_DATETIME_ENABLED, true ); // trace time in human readable format (always real time)
|
||||
init( TRACE_SYNC_ENABLED, 0 );
|
||||
init( TRACE_EVENT_METRIC_UNITS_PER_SAMPLE, 500 );
|
||||
init( TRACE_EVENT_THROTTLER_SAMPLE_EXPIRY, 1800.0 ); // 30 mins
|
||||
|
|
|
@ -92,6 +92,7 @@ public:
|
|||
int USE_OBJECT_SERIALIZER;
|
||||
double PING_LOGGING_INTERVAL;
|
||||
int PING_SAMPLE_AMOUNT;
|
||||
int NETWORK_CONNECT_SAMPLE_AMOUNT;
|
||||
|
||||
int TLS_CERT_REFRESH_DELAY_SECONDS;
|
||||
double TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT;
|
||||
|
@ -172,6 +173,7 @@ public:
|
|||
double TRACE_RETRY_OPEN_INTERVAL;
|
||||
int MIN_TRACE_SEVERITY;
|
||||
int MAX_TRACE_SUPPRESSIONS;
|
||||
bool TRACE_DATETIME_ENABLED;
|
||||
int TRACE_SYNC_ENABLED;
|
||||
int TRACE_EVENT_METRIC_UNITS_PER_SAMPLE;
|
||||
int TRACE_EVENT_THROTTLER_SAMPLE_EXPIRY;
|
||||
|
|
|
@ -150,6 +150,8 @@ void UnsentPacketQueue::sent(int bytes) {
|
|||
bytes -= b->bytes_written - b->bytes_sent;
|
||||
b->bytes_sent = b->bytes_written;
|
||||
ASSERT(b->bytes_written <= b->size());
|
||||
double queue_time = now() - b->enqueue_time;
|
||||
sendQueueLatencyHistogram->sampleSeconds(queue_time);
|
||||
unsent_first = b->nextPacketBuffer();
|
||||
if (!unsent_first) unsent_last = NULL;
|
||||
b->delref();
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "flow/flow.h"
|
||||
#include "flow/Histogram.h"
|
||||
|
||||
// PacketWriter and PacketBuffer are in serialize.h because they are needed by the SerializeSource<> template
|
||||
|
||||
|
@ -40,8 +41,17 @@ struct ReliablePacket : FastAllocated<ReliablePacket> {
|
|||
|
||||
class UnsentPacketQueue : NonCopyable {
|
||||
public:
|
||||
UnsentPacketQueue() : unsent_first(0), unsent_last(0) {}
|
||||
~UnsentPacketQueue() { discardAll(); }
|
||||
UnsentPacketQueue()
|
||||
: unsent_first(0), unsent_last(0),
|
||||
sendQueueLatencyHistogram(Histogram::getHistogram(
|
||||
LiteralStringRef("UnsentPacketQueue"), LiteralStringRef("QueueWait"), Histogram::Unit::microseconds)) {}
|
||||
|
||||
~UnsentPacketQueue() {
|
||||
discardAll();
|
||||
unsent_first = (PacketBuffer*)0xDEADBEEF;
|
||||
unsent_last = (PacketBuffer*)0xCAFEBABE;
|
||||
sendQueueLatencyHistogram = Reference<Histogram>(nullptr);
|
||||
}
|
||||
|
||||
// Get a PacketBuffer to write new packets into
|
||||
PacketBuffer* getWriteBuffer() {
|
||||
|
@ -70,6 +80,7 @@ public:
|
|||
|
||||
private:
|
||||
PacketBuffer *unsent_first, *unsent_last; // Both NULL, or inclusive range of PacketBuffers that haven't been sent. The last one may have space for more packets to be written.
|
||||
Reference<Histogram> sendQueueLatencyHistogram;
|
||||
};
|
||||
|
||||
class ReliablePacketList : NonCopyable {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
*/
|
||||
|
||||
#include "flow/flow.h"
|
||||
#include "flow/Histogram.h"
|
||||
#include "flow/Platform.h"
|
||||
#include "flow/TDMetric.actor.h"
|
||||
#include "flow/SystemMonitor.h"
|
||||
|
@ -60,87 +61,116 @@ SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *sta
|
|||
if (!DEBUG_DETERMINISM && currentStats.initialized) {
|
||||
{
|
||||
TraceEvent(eventName.c_str())
|
||||
.detail("Elapsed", currentStats.elapsed)
|
||||
.detail("CPUSeconds", currentStats.processCPUSeconds)
|
||||
.detail("MainThreadCPUSeconds", currentStats.mainThreadCPUSeconds)
|
||||
.detail("UptimeSeconds", now() - machineState.monitorStartTime)
|
||||
.detail("Memory", currentStats.processMemory)
|
||||
.detail("ResidentMemory", currentStats.processResidentMemory)
|
||||
.detail("UnusedAllocatedMemory", getTotalUnusedAllocatedMemory())
|
||||
.detail("MbpsSent", ((netData.bytesSent - statState->networkState.bytesSent) * 8e-6) / currentStats.elapsed)
|
||||
.detail("MbpsReceived", ((netData.bytesReceived - statState->networkState.bytesReceived) * 8e-6) / currentStats.elapsed)
|
||||
.detail("DiskTotalBytes", currentStats.processDiskTotalBytes)
|
||||
.detail("DiskFreeBytes", currentStats.processDiskFreeBytes)
|
||||
.detail("DiskQueueDepth", currentStats.processDiskQueueDepth)
|
||||
.detail("DiskIdleSeconds", currentStats.processDiskIdleSeconds)
|
||||
.detail("DiskReads", currentStats.processDiskRead)
|
||||
.detail("DiskWrites", currentStats.processDiskWrite)
|
||||
.detail("DiskReadsCount", currentStats.processDiskReadCount)
|
||||
.detail("DiskWritesCount", currentStats.processDiskWriteCount)
|
||||
.detail("DiskWriteSectors", currentStats.processDiskWriteSectors)
|
||||
.detail("DiskReadSectors", currentStats.processDiskReadSectors)
|
||||
.detail("FileWrites", netData.countFileLogicalWrites - statState->networkState.countFileLogicalWrites)
|
||||
.detail("FileReads", netData.countFileLogicalReads - statState->networkState.countFileLogicalReads)
|
||||
.detail("CacheReadBytes", netData.countFileCacheReadBytes - statState->networkState.countFileCacheReadBytes)
|
||||
.detail("CacheFinds", netData.countFileCacheFinds - statState->networkState.countFileCacheFinds)
|
||||
.detail("CacheWritesBlocked", netData.countFileCacheWritesBlocked - statState->networkState.countFileCacheWritesBlocked)
|
||||
.detail("CacheReadsBlocked", netData.countFileCacheReadsBlocked - statState->networkState.countFileCacheReadsBlocked)
|
||||
.detail("CachePageReadsMerged", netData.countFileCachePageReadsMerged - statState->networkState.countFileCachePageReadsMerged)
|
||||
.detail("CacheWrites", netData.countFileCacheWrites - statState->networkState.countFileCacheWrites)
|
||||
.detail("CacheReads", netData.countFileCacheReads - statState->networkState.countFileCacheReads)
|
||||
.detail("CacheHits", netData.countFilePageCacheHits - statState->networkState.countFilePageCacheHits)
|
||||
.detail("CacheMisses", netData.countFilePageCacheMisses - statState->networkState.countFilePageCacheMisses)
|
||||
.detail("CacheEvictions", netData.countFilePageCacheEvictions - statState->networkState.countFilePageCacheEvictions)
|
||||
.detail("ZoneID", machineState.zoneId)
|
||||
.detail("MachineID", machineState.machineId)
|
||||
.detail("AIOSubmitCount", netData.countAIOSubmit - statState->networkState.countAIOSubmit)
|
||||
.detail("AIOCollectCount", netData.countAIOCollect - statState->networkState.countAIOCollect)
|
||||
.detail("AIOSubmitLag", (g_network->networkInfo.metrics.secSquaredSubmit - statState->networkMetricsState.secSquaredSubmit) / currentStats.elapsed)
|
||||
.detail("AIODiskStall", (g_network->networkInfo.metrics.secSquaredDiskStall - statState->networkMetricsState.secSquaredDiskStall) / currentStats.elapsed)
|
||||
.detail("CurrentConnections", netData.countConnEstablished - netData.countConnClosedWithError - netData.countConnClosedWithoutError)
|
||||
.detail("ConnectionsEstablished", (double) (netData.countConnEstablished - statState->networkState.countConnEstablished) / currentStats.elapsed)
|
||||
.detail("ConnectionsClosed", ((netData.countConnClosedWithError - statState->networkState.countConnClosedWithError) + (netData.countConnClosedWithoutError - statState->networkState.countConnClosedWithoutError)) / currentStats.elapsed)
|
||||
.detail("ConnectionErrors", (netData.countConnClosedWithError - statState->networkState.countConnClosedWithError) / currentStats.elapsed)
|
||||
.detail("TLSPolicyFailures", (netData.countTLSPolicyFailures - statState->networkState.countTLSPolicyFailures) / currentStats.elapsed)
|
||||
.trackLatest(eventName);
|
||||
.detail("Elapsed", currentStats.elapsed)
|
||||
.detail("CPUSeconds", currentStats.processCPUSeconds)
|
||||
.detail("MainThreadCPUSeconds", currentStats.mainThreadCPUSeconds)
|
||||
.detail("UptimeSeconds", now() - machineState.monitorStartTime)
|
||||
.detail("Memory", currentStats.processMemory)
|
||||
.detail("ResidentMemory", currentStats.processResidentMemory)
|
||||
.detail("UnusedAllocatedMemory", getTotalUnusedAllocatedMemory())
|
||||
.detail("MbpsSent",
|
||||
((netData.bytesSent - statState->networkState.bytesSent) * 8e-6) / currentStats.elapsed)
|
||||
.detail("MbpsReceived",
|
||||
((netData.bytesReceived - statState->networkState.bytesReceived) * 8e-6) / currentStats.elapsed)
|
||||
.detail("DiskTotalBytes", currentStats.processDiskTotalBytes)
|
||||
.detail("DiskFreeBytes", currentStats.processDiskFreeBytes)
|
||||
.detail("DiskQueueDepth", currentStats.processDiskQueueDepth)
|
||||
.detail("DiskIdleSeconds", currentStats.processDiskIdleSeconds)
|
||||
.detail("DiskReads", currentStats.processDiskRead)
|
||||
.detail("DiskWrites", currentStats.processDiskWrite)
|
||||
.detail("DiskReadsCount", currentStats.processDiskReadCount)
|
||||
.detail("DiskWritesCount", currentStats.processDiskWriteCount)
|
||||
.detail("DiskWriteSectors", currentStats.processDiskWriteSectors)
|
||||
.detail("DiskReadSectors", currentStats.processDiskReadSectors)
|
||||
.detail("FileWrites", netData.countFileLogicalWrites - statState->networkState.countFileLogicalWrites)
|
||||
.detail("FileReads", netData.countFileLogicalReads - statState->networkState.countFileLogicalReads)
|
||||
.detail("CacheReadBytes",
|
||||
netData.countFileCacheReadBytes - statState->networkState.countFileCacheReadBytes)
|
||||
.detail("CacheFinds", netData.countFileCacheFinds - statState->networkState.countFileCacheFinds)
|
||||
.detail("CacheWritesBlocked",
|
||||
netData.countFileCacheWritesBlocked - statState->networkState.countFileCacheWritesBlocked)
|
||||
.detail("CacheReadsBlocked",
|
||||
netData.countFileCacheReadsBlocked - statState->networkState.countFileCacheReadsBlocked)
|
||||
.detail("CachePageReadsMerged",
|
||||
netData.countFileCachePageReadsMerged - statState->networkState.countFileCachePageReadsMerged)
|
||||
.detail("CacheWrites", netData.countFileCacheWrites - statState->networkState.countFileCacheWrites)
|
||||
.detail("CacheReads", netData.countFileCacheReads - statState->networkState.countFileCacheReads)
|
||||
.detail("CacheHits", netData.countFilePageCacheHits - statState->networkState.countFilePageCacheHits)
|
||||
.detail("CacheMisses",
|
||||
netData.countFilePageCacheMisses - statState->networkState.countFilePageCacheMisses)
|
||||
.detail("CacheEvictions",
|
||||
netData.countFilePageCacheEvictions - statState->networkState.countFilePageCacheEvictions)
|
||||
.detail("DCID", machineState.dcId)
|
||||
.detail("ZoneID", machineState.zoneId)
|
||||
.detail("MachineID", machineState.machineId)
|
||||
.detail("AIOSubmitCount", netData.countAIOSubmit - statState->networkState.countAIOSubmit)
|
||||
.detail("AIOCollectCount", netData.countAIOCollect - statState->networkState.countAIOCollect)
|
||||
.detail("AIOSubmitLag", (g_network->networkInfo.metrics.secSquaredSubmit -
|
||||
statState->networkMetricsState.secSquaredSubmit) /
|
||||
currentStats.elapsed)
|
||||
.detail("AIODiskStall", (g_network->networkInfo.metrics.secSquaredDiskStall -
|
||||
statState->networkMetricsState.secSquaredDiskStall) /
|
||||
currentStats.elapsed)
|
||||
.detail("CurrentConnections", netData.countConnEstablished - netData.countConnClosedWithError -
|
||||
netData.countConnClosedWithoutError)
|
||||
.detail("ConnectionsEstablished",
|
||||
(double)(netData.countConnEstablished - statState->networkState.countConnEstablished) /
|
||||
currentStats.elapsed)
|
||||
.detail("ConnectionsClosed",
|
||||
((netData.countConnClosedWithError - statState->networkState.countConnClosedWithError) +
|
||||
(netData.countConnClosedWithoutError - statState->networkState.countConnClosedWithoutError)) /
|
||||
currentStats.elapsed)
|
||||
.detail("ConnectionErrors",
|
||||
(netData.countConnClosedWithError - statState->networkState.countConnClosedWithError) /
|
||||
currentStats.elapsed)
|
||||
.detail("TLSPolicyFailures",
|
||||
(netData.countTLSPolicyFailures - statState->networkState.countTLSPolicyFailures) /
|
||||
currentStats.elapsed)
|
||||
.trackLatest(eventName);
|
||||
|
||||
TraceEvent("MemoryMetrics")
|
||||
.DETAILALLOCATORMEMUSAGE(16)
|
||||
.DETAILALLOCATORMEMUSAGE(32)
|
||||
.DETAILALLOCATORMEMUSAGE(64)
|
||||
.DETAILALLOCATORMEMUSAGE(96)
|
||||
.DETAILALLOCATORMEMUSAGE(128)
|
||||
.DETAILALLOCATORMEMUSAGE(256)
|
||||
.DETAILALLOCATORMEMUSAGE(512)
|
||||
.DETAILALLOCATORMEMUSAGE(1024)
|
||||
.DETAILALLOCATORMEMUSAGE(2048)
|
||||
.DETAILALLOCATORMEMUSAGE(4096)
|
||||
.DETAILALLOCATORMEMUSAGE(8192)
|
||||
.detail("HugeArenaMemory", g_hugeArenaMemory.load());
|
||||
.DETAILALLOCATORMEMUSAGE(16)
|
||||
.DETAILALLOCATORMEMUSAGE(32)
|
||||
.DETAILALLOCATORMEMUSAGE(64)
|
||||
.DETAILALLOCATORMEMUSAGE(96)
|
||||
.DETAILALLOCATORMEMUSAGE(128)
|
||||
.DETAILALLOCATORMEMUSAGE(256)
|
||||
.DETAILALLOCATORMEMUSAGE(512)
|
||||
.DETAILALLOCATORMEMUSAGE(1024)
|
||||
.DETAILALLOCATORMEMUSAGE(2048)
|
||||
.DETAILALLOCATORMEMUSAGE(4096)
|
||||
.DETAILALLOCATORMEMUSAGE(8192)
|
||||
.detail("HugeArenaMemory", g_hugeArenaMemory.load())
|
||||
.detail("DCID", machineState.dcId)
|
||||
.detail("ZoneID", machineState.zoneId)
|
||||
.detail("MachineID", machineState.machineId);
|
||||
|
||||
TraceEvent n("NetworkMetrics");
|
||||
n
|
||||
.detail("Elapsed", currentStats.elapsed)
|
||||
.detail("CantSleep", netData.countCantSleep - statState->networkState.countCantSleep)
|
||||
.detail("WontSleep", netData.countWontSleep - statState->networkState.countWontSleep)
|
||||
.detail("Yields", netData.countYields - statState->networkState.countYields)
|
||||
.detail("YieldCalls", netData.countYieldCalls - statState->networkState.countYieldCalls)
|
||||
.detail("YieldCallsTrue", netData.countYieldCallsTrue - statState->networkState.countYieldCallsTrue)
|
||||
.detail("SlowTaskSignals", netData.countSlowTaskSignals - statState->networkState.countSlowTaskSignals)
|
||||
.detail("YieldBigStack", netData.countYieldBigStack - statState->networkState.countYieldBigStack)
|
||||
.detail("RunLoopIterations", netData.countRunLoop - statState->networkState.countRunLoop)
|
||||
.detail("TimersExecuted", netData.countTimers - statState->networkState.countTimers)
|
||||
.detail("TasksExecuted", netData.countTasks - statState->networkState.countTasks)
|
||||
.detail("ASIOEventsProcessed", netData.countASIOEvents - statState->networkState.countASIOEvents)
|
||||
.detail("ReadCalls", netData.countReads - statState->networkState.countReads)
|
||||
.detail("WriteCalls", netData.countWrites - statState->networkState.countWrites)
|
||||
.detail("ReadProbes", netData.countReadProbes - statState->networkState.countReadProbes)
|
||||
.detail("WriteProbes", netData.countWriteProbes - statState->networkState.countWriteProbes)
|
||||
.detail("PacketsRead", netData.countPacketsReceived - statState->networkState.countPacketsReceived)
|
||||
.detail("PacketsGenerated", netData.countPacketsGenerated - statState->networkState.countPacketsGenerated)
|
||||
.detail("WouldBlock", netData.countWouldBlock - statState->networkState.countWouldBlock)
|
||||
.detail("LaunchTime", netData.countLaunchTime - statState->networkState.countLaunchTime)
|
||||
.detail("ReactTime", netData.countReactTime - statState->networkState.countReactTime);
|
||||
n.detail("Elapsed", currentStats.elapsed)
|
||||
.detail("CantSleep", netData.countCantSleep - statState->networkState.countCantSleep)
|
||||
.detail("WontSleep", netData.countWontSleep - statState->networkState.countWontSleep)
|
||||
.detail("Yields", netData.countYields - statState->networkState.countYields)
|
||||
.detail("YieldCalls", netData.countYieldCalls - statState->networkState.countYieldCalls)
|
||||
.detail("YieldCallsTrue", netData.countYieldCallsTrue - statState->networkState.countYieldCallsTrue)
|
||||
.detail("SlowTaskSignals", netData.countSlowTaskSignals - statState->networkState.countSlowTaskSignals)
|
||||
.detail("YieldBigStack", netData.countYieldBigStack - statState->networkState.countYieldBigStack)
|
||||
.detail("RunLoopIterations", netData.countRunLoop - statState->networkState.countRunLoop)
|
||||
.detail("TimersExecuted", netData.countTimers - statState->networkState.countTimers)
|
||||
.detail("TasksExecuted", netData.countTasks - statState->networkState.countTasks)
|
||||
.detail("ASIOEventsProcessed", netData.countASIOEvents - statState->networkState.countASIOEvents)
|
||||
.detail("ReadCalls", netData.countReads - statState->networkState.countReads)
|
||||
.detail("WriteCalls", netData.countWrites - statState->networkState.countWrites)
|
||||
.detail("ReadProbes", netData.countReadProbes - statState->networkState.countReadProbes)
|
||||
.detail("WriteProbes", netData.countWriteProbes - statState->networkState.countWriteProbes)
|
||||
.detail("PacketsRead", netData.countPacketsReceived - statState->networkState.countPacketsReceived)
|
||||
.detail("PacketsGenerated",
|
||||
netData.countPacketsGenerated - statState->networkState.countPacketsGenerated)
|
||||
.detail("WouldBlock", netData.countWouldBlock - statState->networkState.countWouldBlock)
|
||||
.detail("LaunchTime", netData.countLaunchTime - statState->networkState.countLaunchTime)
|
||||
.detail("ReactTime", netData.countReactTime - statState->networkState.countReactTime)
|
||||
.detail("DCID", machineState.dcId)
|
||||
.detail("ZoneID", machineState.zoneId)
|
||||
.detail("MachineID", machineState.machineId);
|
||||
|
||||
for (int i = 0; i<NetworkMetrics::SLOW_EVENT_BINS; i++) {
|
||||
if (int c = g_network->networkInfo.metrics.countSlowEvents[i] - statState->networkMetricsState.countSlowEvents[i]) {
|
||||
|
@ -165,18 +195,20 @@ SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *sta
|
|||
}
|
||||
|
||||
if(machineMetrics) {
|
||||
TraceEvent("MachineMetrics").detail("Elapsed", currentStats.elapsed)
|
||||
.detail("MbpsSent", currentStats.machineMegabitsSent / currentStats.elapsed)
|
||||
.detail("MbpsReceived", currentStats.machineMegabitsReceived / currentStats.elapsed)
|
||||
.detail("OutSegs", currentStats.machineOutSegs)
|
||||
.detail("RetransSegs", currentStats.machineRetransSegs)
|
||||
.detail("CPUSeconds", currentStats.machineCPUSeconds)
|
||||
.detail("TotalMemory", currentStats.machineTotalRAM)
|
||||
.detail("CommittedMemory", currentStats.machineCommittedRAM)
|
||||
.detail("AvailableMemory", currentStats.machineAvailableRAM)
|
||||
.detail("ZoneID", machineState.zoneId)
|
||||
.detail("MachineID", machineState.machineId)
|
||||
.trackLatest("MachineMetrics");
|
||||
TraceEvent("MachineMetrics")
|
||||
.detail("Elapsed", currentStats.elapsed)
|
||||
.detail("MbpsSent", currentStats.machineMegabitsSent / currentStats.elapsed)
|
||||
.detail("MbpsReceived", currentStats.machineMegabitsReceived / currentStats.elapsed)
|
||||
.detail("OutSegs", currentStats.machineOutSegs)
|
||||
.detail("RetransSegs", currentStats.machineRetransSegs)
|
||||
.detail("CPUSeconds", currentStats.machineCPUSeconds)
|
||||
.detail("TotalMemory", currentStats.machineTotalRAM)
|
||||
.detail("CommittedMemory", currentStats.machineCommittedRAM)
|
||||
.detail("AvailableMemory", currentStats.machineAvailableRAM)
|
||||
.detail("DCID", machineState.dcId)
|
||||
.detail("ZoneID", machineState.zoneId)
|
||||
.detail("MachineID", machineState.machineId)
|
||||
.trackLatest("MachineMetrics");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
struct SystemMonitorMachineState {
|
||||
Optional<std::string> folder;
|
||||
Optional<Standalone<StringRef>> dcId;
|
||||
Optional<Standalone<StringRef>> zoneId;
|
||||
Optional<Standalone<StringRef>> machineId;
|
||||
Optional<IPAddress> ip;
|
||||
|
@ -35,9 +36,10 @@ struct SystemMonitorMachineState {
|
|||
|
||||
SystemMonitorMachineState() : monitorStartTime(0) {}
|
||||
explicit SystemMonitorMachineState(const IPAddress& ip) : ip(ip), monitorStartTime(0) {}
|
||||
SystemMonitorMachineState(std::string folder, Optional<Standalone<StringRef>> zoneId,
|
||||
Optional<Standalone<StringRef>> machineId, const IPAddress& ip)
|
||||
: folder(folder), zoneId(zoneId), machineId(machineId), ip(ip), monitorStartTime(0) {}
|
||||
SystemMonitorMachineState(std::string folder, Optional<Standalone<StringRef>> dcId,
|
||||
Optional<Standalone<StringRef>> zoneId, Optional<Standalone<StringRef>> machineId,
|
||||
const IPAddress& ip)
|
||||
: folder(folder), dcId(dcId), zoneId(zoneId), machineId(machineId), ip(ip), monitorStartTime(0) {}
|
||||
};
|
||||
|
||||
void initializeSystemMonitorMachineState(SystemMonitorMachineState machineState);
|
||||
|
|
|
@ -29,7 +29,8 @@
|
|||
#include <stdarg.h>
|
||||
#include <cctype>
|
||||
#include <time.h>
|
||||
|
||||
#include <set>
|
||||
#include <iomanip>
|
||||
#include "flow/IThreadPool.h"
|
||||
#include "flow/ThreadHelper.actor.h"
|
||||
#include "flow/FastRef.h"
|
||||
|
@ -422,6 +423,7 @@ public:
|
|||
|
||||
if (roll) {
|
||||
auto o = new WriterThread::Roll;
|
||||
double time = 0;
|
||||
writer->post(o);
|
||||
|
||||
std::vector<TraceEventFields> events = latestEventCache.getAllUnsafe();
|
||||
|
@ -430,9 +432,15 @@ public:
|
|||
TraceEventFields rolledFields;
|
||||
for(auto itr = events[idx].begin(); itr != events[idx].end(); ++itr) {
|
||||
if(itr->first == "Time") {
|
||||
rolledFields.addField("Time", format("%.6f", TraceEvent::getCurrentTime()));
|
||||
time = TraceEvent::getCurrentTime();
|
||||
rolledFields.addField("Time", format("%.6f", time));
|
||||
rolledFields.addField("OriginalTime", itr->second);
|
||||
}
|
||||
else if (itr->first == "DateTime") {
|
||||
UNSTOPPABLE_ASSERT(time > 0); // "Time" field should always come first
|
||||
rolledFields.addField("DateTime", TraceEvent::printRealTime(time));
|
||||
rolledFields.addField("OriginalDateTime", itr->second);
|
||||
}
|
||||
else if(itr->first == "TrackLatestType") {
|
||||
rolledFields.addField("TrackLatestType", "Rolled");
|
||||
}
|
||||
|
@ -676,6 +684,13 @@ TraceEvent::TraceEvent(TraceEvent &&ev) {
|
|||
tmpEventMetric = ev.tmpEventMetric;
|
||||
trackingKey = ev.trackingKey;
|
||||
type = ev.type;
|
||||
timeIndex = ev.timeIndex;
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
eventCounts[i] = ev.eventCounts[i];
|
||||
}
|
||||
|
||||
networkThread = ev.networkThread;
|
||||
|
||||
ev.initialized = true;
|
||||
ev.enabled = false;
|
||||
|
@ -684,6 +699,7 @@ TraceEvent::TraceEvent(TraceEvent &&ev) {
|
|||
}
|
||||
|
||||
TraceEvent& TraceEvent::operator=(TraceEvent &&ev) {
|
||||
// Note: still broken if ev and this are the same memory address.
|
||||
enabled = ev.enabled;
|
||||
err = ev.err;
|
||||
fields = std::move(ev.fields);
|
||||
|
@ -696,6 +712,13 @@ TraceEvent& TraceEvent::operator=(TraceEvent &&ev) {
|
|||
tmpEventMetric = ev.tmpEventMetric;
|
||||
trackingKey = ev.trackingKey;
|
||||
type = ev.type;
|
||||
timeIndex = ev.timeIndex;
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
eventCounts[i] = ev.eventCounts[i];
|
||||
}
|
||||
|
||||
networkThread = ev.networkThread;
|
||||
|
||||
ev.initialized = true;
|
||||
ev.enabled = false;
|
||||
|
@ -782,6 +805,9 @@ bool TraceEvent::init() {
|
|||
detail("Severity", int(severity));
|
||||
detail("Time", "0.000000");
|
||||
timeIndex = fields.size() - 1;
|
||||
if (FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
|
||||
detail("DateTime", "");
|
||||
}
|
||||
|
||||
detail("Type", type);
|
||||
if(g_network && g_network->isSimulated()) {
|
||||
|
@ -982,7 +1008,7 @@ TraceEvent& TraceEvent::GetLastError() {
|
|||
|
||||
// We're cheating in counting, as in practice, we only use {10,20,30,40}.
|
||||
static_assert(SevMaxUsed / 10 + 1 == 5, "Please bump eventCounts[5] to SevMaxUsed/10+1");
|
||||
unsigned long TraceEvent::eventCounts[5] = {0,0,0,0,0};
|
||||
unsigned long TraceEvent::eventCounts[5] = { 0, 0, 0, 0, 0 };
|
||||
|
||||
unsigned long TraceEvent::CountEventsLoggedAt(Severity sev) {
|
||||
return TraceEvent::eventCounts[sev/10];
|
||||
|
@ -1000,7 +1026,11 @@ void TraceEvent::log() {
|
|||
++g_allocation_tracing_disabled;
|
||||
try {
|
||||
if (enabled) {
|
||||
fields.mutate(timeIndex).second = format("%.6f", TraceEvent::getCurrentTime());
|
||||
double time = TraceEvent::getCurrentTime();
|
||||
fields.mutate(timeIndex).second = format("%.6f", time);
|
||||
if (FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
|
||||
fields.mutate(timeIndex + 1).second = TraceEvent::printRealTime(time);
|
||||
}
|
||||
|
||||
if (this->severity == SevError) {
|
||||
severity = SevInfo;
|
||||
|
@ -1071,6 +1101,31 @@ double TraceEvent::getCurrentTime() {
|
|||
}
|
||||
}
|
||||
|
||||
// converts the given flow time into a string
|
||||
// with format: %Y-%m-%dT%H:%M:%S
|
||||
// This only has second-resolution for the simple reason
|
||||
// that std::put_time does not support higher resolution.
|
||||
// This is fine since we always log the flow time as well.
|
||||
std::string TraceEvent::printRealTime(double time) {
|
||||
using Clock = std::chrono::system_clock;
|
||||
time_t ts = Clock::to_time_t(Clock::time_point(
|
||||
std::chrono::duration_cast<Clock::duration>(std::chrono::duration<double, std::ratio<1>>(time))));
|
||||
if (g_network && g_network->isSimulated()) {
|
||||
// The clock is simulated, so return the real time
|
||||
ts = Clock::to_time_t(Clock::now());
|
||||
}
|
||||
std::stringstream ss;
|
||||
#ifdef _WIN32
|
||||
// MSVC gmtime is threadsafe
|
||||
ss << std::put_time(::gmtime(&ts), "%Y-%m-%dT%H:%M:%SZ");
|
||||
#else
|
||||
// use threadsafe gmt
|
||||
struct tm result;
|
||||
ss << std::put_time(::gmtime_r(&ts, &result), "%Y-%m-%dT%H:%M:%SZ");
|
||||
#endif
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
TraceInterval& TraceInterval::begin() {
|
||||
pairID = nondeterministicRandom()->randomUniqueID();
|
||||
count = 0;
|
||||
|
@ -1138,6 +1193,9 @@ void TraceBatch::dump() {
|
|||
TraceBatch::EventInfo::EventInfo(double time, const char *name, uint64_t id, const char *location) {
|
||||
fields.addField("Severity", format("%d", (int)SevInfo));
|
||||
fields.addField("Time", format("%.6f", time));
|
||||
if (FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
|
||||
fields.addField("DateTime", TraceEvent::printRealTime(time));
|
||||
}
|
||||
fields.addField("Type", name);
|
||||
fields.addField("ID", format("%016" PRIx64, id));
|
||||
fields.addField("Location", location);
|
||||
|
@ -1146,6 +1204,9 @@ TraceBatch::EventInfo::EventInfo(double time, const char *name, uint64_t id, con
|
|||
TraceBatch::AttachInfo::AttachInfo(double time, const char *name, uint64_t id, uint64_t to) {
|
||||
fields.addField("Severity", format("%d", (int)SevInfo));
|
||||
fields.addField("Time", format("%.6f", time));
|
||||
if (FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
|
||||
fields.addField("DateTime", TraceEvent::printRealTime(time));
|
||||
}
|
||||
fields.addField("Type", name);
|
||||
fields.addField("ID", format("%016" PRIx64, id));
|
||||
fields.addField("To", format("%016" PRIx64, to));
|
||||
|
@ -1154,6 +1215,9 @@ TraceBatch::AttachInfo::AttachInfo(double time, const char *name, uint64_t id, u
|
|||
TraceBatch::BuggifyInfo::BuggifyInfo(double time, int activated, int line, std::string file) {
|
||||
fields.addField("Severity", format("%d", (int)SevInfo));
|
||||
fields.addField("Time", format("%.6f", time));
|
||||
if (FLOW_KNOBS->TRACE_DATETIME_ENABLED) {
|
||||
fields.addField("DateTime", TraceEvent::printRealTime(time));
|
||||
}
|
||||
fields.addField("Type", "BuggifySection");
|
||||
fields.addField("Activated", format("%d", activated));
|
||||
fields.addField("File", std::move(file));
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
#include <map>
|
||||
#include <type_traits>
|
||||
#include "flow/IRandom.h"
|
||||
|
@ -388,6 +389,7 @@ struct TraceEvent {
|
|||
static bool isNetworkThread();
|
||||
|
||||
static double getCurrentTime();
|
||||
static std::string printRealTime(double time);
|
||||
|
||||
//Must be called directly after constructing the trace event
|
||||
TraceEvent& error(const class Error& e, bool includeCancelled=false) {
|
||||
|
|
|
@ -86,6 +86,7 @@ ERROR( please_reboot_delete, 1208, "Reboot of server process requested, with del
|
|||
ERROR( master_proxy_failed, 1209, "Master terminating because a Proxy failed" )
|
||||
ERROR( master_resolver_failed, 1210, "Master terminating because a Resolver failed" )
|
||||
ERROR( server_overloaded, 1211, "Server is under too much load and cannot respond" )
|
||||
ERROR( dd_tracker_cancelled, 1215, "The data distribution tracker has been cancelled" )
|
||||
|
||||
// 15xx Platform errors
|
||||
ERROR( platform_error, 1500, "Platform error" )
|
||||
|
|
|
@ -22,11 +22,13 @@
|
|||
<ClCompile Include="FileTraceLogWriter.cpp" />
|
||||
<ClCompile Include="XmlTraceLogFormatter.cpp" />
|
||||
<ClCompile Include="JsonTraceLogFormatter.cpp" />
|
||||
<ClCompile Include="Histogram.cpp" />
|
||||
<ClInclude Include="FileTraceLogWriter.h" />
|
||||
<ClInclude Include="XmlTraceLogFormatter.h" />
|
||||
<ClInclude Include="JsonTraceLogFormatter.h" />
|
||||
<ClInclude Include="MetricSample.h" />
|
||||
<ClInclude Include="Profiler.h" />
|
||||
<ClInclude Include="Histogram.h" />
|
||||
<ActorCompiler Include="Profiler.actor.cpp" />
|
||||
<ActorCompiler Include="Net2.actor.cpp" />
|
||||
<ClCompile Include="IThreadPool.cpp" />
|
||||
|
|
|
@ -30,8 +30,9 @@
|
|||
#ifndef TLS_DISABLED
|
||||
#include "boost/asio/ssl.hpp"
|
||||
#endif
|
||||
#include "flow/serialize.h"
|
||||
#include "flow/Arena.h"
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/Trace.h"
|
||||
|
||||
enum class TaskPriority {
|
||||
Max = 1000000,
|
||||
|
@ -111,8 +112,6 @@ inline TaskPriority incrementPriorityIfEven(TaskPriority p) {
|
|||
|
||||
class Void;
|
||||
|
||||
template<class T> class Optional;
|
||||
|
||||
struct IPAddress {
|
||||
typedef boost::asio::ip::address_v6::bytes_type IPAddressStore;
|
||||
static_assert(std::is_same<IPAddressStore, std::array<uint8_t, 16>>::value,
|
||||
|
@ -351,6 +350,12 @@ public:
|
|||
virtual Future<int64_t> read() = 0;
|
||||
};
|
||||
|
||||
struct SendBuffer {
|
||||
uint8_t const* data;
|
||||
SendBuffer* next;
|
||||
int bytes_written, bytes_sent;
|
||||
};
|
||||
|
||||
class IConnection {
|
||||
public:
|
||||
// IConnection is reference-counted (use Reference<IConnection>), but the caller must explicitly call close()
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "flow/Arena.h"
|
||||
#include "flow/FileIdentifier.h"
|
||||
#include "flow/ObjectSerializer.h"
|
||||
#include "flow/network.h"
|
||||
#include <algorithm>
|
||||
|
||||
// Though similar, is_binary_serializable cannot be replaced by std::is_pod, as doing so would prefer
|
||||
|
@ -664,24 +665,20 @@ private:
|
|||
ProtocolVersion m_protocolVersion;
|
||||
};
|
||||
|
||||
struct SendBuffer {
|
||||
uint8_t const* data;
|
||||
SendBuffer* next;
|
||||
int bytes_written, bytes_sent;
|
||||
};
|
||||
|
||||
struct PacketBuffer : SendBuffer {
|
||||
private:
|
||||
static constexpr size_t PACKET_BUFFER_OVERHEAD = 40;
|
||||
int reference_count;
|
||||
uint32_t size_;
|
||||
static constexpr size_t PACKET_BUFFER_OVERHEAD = 32;
|
||||
uint32_t const size_;
|
||||
|
||||
public:
|
||||
double const enqueue_time;
|
||||
|
||||
uint8_t* data() { return const_cast<uint8_t*>(static_cast<SendBuffer*>(this)->data); }
|
||||
size_t size() { return size_; }
|
||||
|
||||
private:
|
||||
explicit PacketBuffer(size_t size) : reference_count(1), size_(size) {
|
||||
explicit PacketBuffer(size_t size) : reference_count(1), size_(size), enqueue_time(g_network->now()) {
|
||||
next = 0;
|
||||
bytes_written = bytes_sent = 0;
|
||||
((SendBuffer*)this)->data = reinterpret_cast<uint8_t*>(this + 1);
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
|
||||
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
|
||||
<Product Name='$(var.Title)'
|
||||
Id='{7DDBF1DA-C17A-4519-A893-6CED9B1D9B5A}'
|
||||
Id='{88AA3058-920F-4DB3-8E3E-492E35F13DDE}'
|
||||
UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
|
||||
Version='$(var.Version)'
|
||||
Manufacturer='$(var.Manufacturer)'
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<?xml version="1.0"?>
|
||||
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<Version>6.2.28</Version>
|
||||
<Version>6.2.29</Version>
|
||||
<PackageName>6.2</PackageName>
|
||||
</PropertyGroup>
|
||||
</Project>
|
||||
|
|
Loading…
Reference in New Issue