Merge branch 'release-6.2' into merge-release-6.2-into-release-6.3
# Conflicts: # cmake/CompileBoost.cmake # cmake/FDBComponents.cmake # fdbrpc/AsyncFileCached.actor.h # fdbrpc/simulator.h # fdbserver/KeyValueStoreSQLite.actor.cpp # fdbserver/Knobs.cpp # fdbserver/Knobs.h # fdbserver/storageserver.actor.cpp # flow/Knobs.h # flow/network.h
This commit is contained in:
commit
67e783acf8
|
@ -1,3 +1,4 @@
|
|||
ARG IMAGE_TAG=0.1.24
|
||||
FROM centos:6
|
||||
|
||||
# Clean yum cache, disable default Base repo and enable Vault
|
||||
|
@ -29,17 +30,35 @@ USER root
|
|||
RUN adduser --comment '' fdb && chown fdb /opt
|
||||
|
||||
# wget of bintray without forcing UTF-8 encoding results in 403 Forbidden
|
||||
# Old versions of FDB need boost 1.67
|
||||
RUN cd /opt/ &&\
|
||||
curl -L https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.bz2 -o boost_1_67_0.tar.bz2 &&\
|
||||
echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost_1_67_0.tar.bz2" > boost-sha-67.txt &&\
|
||||
sha256sum -c boost-sha-67.txt &&\
|
||||
tar -xjf boost_1_67_0.tar.bz2 &&\
|
||||
rm -rf boost_1_67_0.tar.bz2 boost-sha-67.txt boost_1_67_0/libs &&\
|
||||
rm -rf boost_1_67_0.tar.bz2 boost-sha-67.txt boost_1_67_0/libs
|
||||
|
||||
# install Boost 1.72
|
||||
# wget of bintray without forcing UTF-8 encoding results in 403 Forbidden
|
||||
RUN cd /tmp/ &&\
|
||||
curl -L https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.bz2 -o boost_1_72_0.tar.bz2 &&\
|
||||
echo "59c9b274bc451cf91a9ba1dd2c7fdcaf5d60b1b3aa83f2c9fa143417cc660722 boost_1_72_0.tar.bz2" > boost-sha-72.txt &&\
|
||||
sha256sum -c boost-sha-72.txt &&\
|
||||
tar -xjf boost_1_72_0.tar.bz2 &&\
|
||||
rm -rf boost_1_72_0.tar.bz2 boost-sha-72.txt boost_1_72_0/libs
|
||||
cd boost_1_72_0 &&\
|
||||
scl enable devtoolset-8 -- ./bootstrap.sh --with-libraries=context &&\
|
||||
scl enable devtoolset-8 -- ./b2 link=static cxxflags=-std=c++14 --prefix=/opt/boost_1_72_0 install &&\
|
||||
rm -rf boost_1_72_0.tar.bz2 boost-sha-72.txt boost_1_72_0
|
||||
|
||||
# jemalloc (needed for FDB after 6.3)
|
||||
RUN cd /tmp/ &&\
|
||||
curl -L https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2 -o jemalloc-5.2.1.tar.bz2 &&\
|
||||
echo "34330e5ce276099e2e8950d9335db5a875689a4c6a56751ef3b1d8c537f887f6 jemalloc-5.2.1.tar.bz2" > jemalloc-sha.txt &&\
|
||||
sha256sum -c jemalloc-sha.txt &&\
|
||||
tar --no-same-owner --no-same-permissions -xjf jemalloc-5.2.1.tar.bz2 &&\
|
||||
cd jemalloc-5.2.1 &&\
|
||||
scl enable devtoolset-8 -- ./configure --enable-static --disable-cxx &&\
|
||||
scl enable devtoolset-8 -- make install
|
||||
|
||||
# install cmake
|
||||
RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz -o /tmp/cmake.tar.gz &&\
|
||||
|
@ -79,7 +98,7 @@ RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.
|
|||
RUN cd /opt/ && curl -L https://github.com/manticoresoftware/manticoresearch/raw/master/misc/junit/ctest2junit.xsl -o ctest2junit.xsl
|
||||
|
||||
# Setting this environment variable switches from OpenSSL to BoringSSL
|
||||
#ENV OPENSSL_ROOT_DIR=/opt/boringssl
|
||||
ENV OPENSSL_ROOT_DIR=/opt/boringssl
|
||||
|
||||
# install BoringSSL: TODO: They don't seem to have releases(?) I picked today's master SHA.
|
||||
RUN cd /opt &&\
|
||||
|
@ -110,8 +129,8 @@ RUN cd /opt/boringssl/build &&\
|
|||
ARG TIMEZONEINFO=America/Los_Angeles
|
||||
RUN rm -f /etc/localtime && ln -s /usr/share/zoneinfo/${TIMEZONEINFO} /etc/localtime
|
||||
|
||||
LABEL version=0.1.22
|
||||
ENV DOCKER_IMAGEVER=0.1.22
|
||||
LABEL version=${IMAGE_TAG}
|
||||
ENV DOCKER_IMAGEVER=${IMAGE_TAG}
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
|
||||
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
|
||||
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
ARG IMAGE_TAG=0.1.21
|
||||
ARG IMAGE_TAG=0.1.24
|
||||
ARG IMAGE_VERSION=0.11.15
|
||||
FROM foundationdb/foundationdb-build:${IMAGE_TAG}
|
||||
|
||||
USER root
|
||||
|
@ -51,8 +52,8 @@ RUN cp -iv /usr/local/bin/clang++ /usr/local/bin/clang++.deref &&\
|
|||
ldconfig &&\
|
||||
rm -rf /mnt/artifacts
|
||||
|
||||
LABEL version=0.11.13
|
||||
ENV DOCKER_IMAGEVER=0.11.13
|
||||
LABEL version=${IMAGE_VERSION}
|
||||
ENV DOCKER_IMAGEVER=${IMAGE_VERSION}
|
||||
|
||||
ENV CLANGCC=/usr/local/bin/clang.de8a65ef
|
||||
ENV CLANGCXX=/usr/local/bin/clang++.de8a65ef
|
||||
|
|
|
@ -2,7 +2,7 @@ version: "3"
|
|||
|
||||
services:
|
||||
common: &common
|
||||
image: foundationdb/foundationdb-build:0.1.22
|
||||
image: foundationdb/foundationdb-build:0.1.24
|
||||
|
||||
build-setup: &build-setup
|
||||
<<: *common
|
||||
|
@ -60,7 +60,7 @@ services:
|
|||
|
||||
snapshot-cmake: &snapshot-cmake
|
||||
<<: *build-setup
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "packages" "strip_targets" && cpack'
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=0 -DUSE_WERROR=ON /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "packages" "strip_targets" && cpack'
|
||||
|
||||
prb-cmake:
|
||||
<<: *snapshot-cmake
|
||||
|
@ -68,7 +68,7 @@ services:
|
|||
|
||||
snapshot-bindings-cmake: &snapshot-bindings-cmake
|
||||
<<: *build-setup
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "bindings/all"'
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=0 -DUSE_WERROR=ON /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "bindings/all"'
|
||||
|
||||
prb-bindings-cmake:
|
||||
<<: *snapshot-bindings-cmake
|
||||
|
@ -84,7 +84,7 @@ services:
|
|||
|
||||
snapshot-ctest: &snapshot-ctest
|
||||
<<: *build-setup
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=1 -DUSE_WERROR=ON /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'
|
||||
|
||||
prb-ctest:
|
||||
<<: *snapshot-ctest
|
||||
|
@ -92,7 +92,7 @@ services:
|
|||
|
||||
snapshot-correctness: &snapshot-correctness
|
||||
<<: *build-setup
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure'
|
||||
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=1 -DUSE_WERROR=ON /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure'
|
||||
|
||||
prb-correctness:
|
||||
<<: *snapshot-correctness
|
||||
|
|
|
@ -11,6 +11,7 @@ endif()
|
|||
################################################################################
|
||||
# SSL
|
||||
################################################################################
|
||||
|
||||
include(CheckSymbolExists)
|
||||
|
||||
set(DISABLE_TLS OFF CACHE BOOL "Don't try to find OpenSSL and always build without TLS support")
|
||||
|
@ -23,12 +24,12 @@ else()
|
|||
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
|
||||
check_symbol_exists("OPENSSL_INIT_NO_ATEXIT" "openssl/crypto.h" OPENSSL_HAS_NO_ATEXIT)
|
||||
if(OPENSSL_HAS_NO_ATEXIT)
|
||||
set(WITH_TLS ON)
|
||||
add_compile_options(-DHAVE_OPENSSL)
|
||||
add_compile_options(-DHAVE_OPENSSL_INIT_NO_AT_EXIT)
|
||||
else()
|
||||
message(WARNING "An OpenSSL version was found, but it doesn't support OPENSSL_INIT_NO_ATEXIT - Will compile without TLS Support")
|
||||
set(WITH_TLS OFF)
|
||||
message(STATUS "Found OpenSSL without OPENSSL_INIT_NO_ATEXIT: assuming BoringSSL")
|
||||
endif()
|
||||
set(WITH_TLS ON)
|
||||
add_compile_options(-DHAVE_OPENSSL)
|
||||
else()
|
||||
message(STATUS "OpenSSL was not found - Will compile without TLS Support")
|
||||
message(STATUS "You can set OPENSSL_ROOT_DIR to help cmake find it")
|
||||
|
|
|
@ -93,6 +93,11 @@
|
|||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"low_priority_queries":{
|
||||
"hz":0.0,
|
||||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"bytes_queried":{
|
||||
"hz":0.0,
|
||||
"counter":0,
|
||||
|
@ -479,6 +484,11 @@
|
|||
"hz":0.0,
|
||||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"low_priority_reads":{ // measures number of incoming low priority read requests
|
||||
"hz":0.0,
|
||||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"location_requests":{ // measures number of outgoing key server location responses
|
||||
"hz":0.0,
|
||||
|
|
|
@ -11,6 +11,7 @@ Release Notes
|
|||
* Add documentation on read and write Path. `(PR #4099) <https://github.com/apple/foundationdb/pull/4099>`_
|
||||
* Add a histogram to expose commit batching window on Proxies. `(PR #4166) <https://github.com/apple/foundationdb/pull/4166>`_
|
||||
* Fix double counting of range reads in TransactionMetrics. `(PR #4130) <https://github.com/apple/foundationdb/pull/4130>`_
|
||||
* Add a trace event that can be used as an indicator of the load on the proxy. `(PR #4166) <https://github.com/apple/foundationdb/pull/4166>`_
|
||||
|
||||
6.2.28
|
||||
======
|
||||
|
|
|
@ -116,6 +116,11 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"low_priority_queries":{
|
||||
"hz":0.0,
|
||||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"bytes_queried":{
|
||||
"hz":0.0,
|
||||
"counter":0,
|
||||
|
@ -521,6 +526,11 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
|
|||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"low_priority_reads":{
|
||||
"hz":0.0,
|
||||
"counter":0,
|
||||
"roughness":0.0
|
||||
},
|
||||
"location_requests":{
|
||||
"hz":0.0,
|
||||
"counter":0,
|
||||
|
|
|
@ -231,8 +231,6 @@ public:
|
|||
return filename;
|
||||
}
|
||||
|
||||
std::vector<AFCPage*> const& getFlushable() { return flushable; }
|
||||
|
||||
void setRateControl(Reference<IRateControl> const& rc) override { rateControl = rc; }
|
||||
|
||||
Reference<IRateControl> const& getRateControl() override { return rateControl; }
|
||||
|
@ -253,8 +251,8 @@ public:
|
|||
}
|
||||
|
||||
auto f = quiesce();
|
||||
//TraceEvent("AsyncFileCachedDel").detail("Filename", filename)
|
||||
// .detail("Refcount", debugGetReferenceCount()).detail("CanDie", f.isReady()).backtrace();
|
||||
TraceEvent("AsyncFileCachedDel").detail("Filename", filename)
|
||||
.detail("Refcount", debugGetReferenceCount()).detail("CanDie", f.isReady()).backtrace();
|
||||
if (f.isReady())
|
||||
delete this;
|
||||
else
|
||||
|
@ -523,8 +521,16 @@ struct AFCPage : public EvictablePage, public FastAllocated<AFCPage> {
|
|||
|
||||
if (dirty) {
|
||||
// Wait for rate control if it is set
|
||||
if (self->owner->getRateControl())
|
||||
wait(self->owner->getRateControl()->getAllowance(1));
|
||||
if (self->owner->getRateControl()) {
|
||||
int allowance = 1;
|
||||
// If I/O size is defined, wait for the calculated I/O quota
|
||||
if (FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_IO_SIZE > 0) {
|
||||
allowance = (self->pageCache->pageSize + FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_IO_SIZE - 1) /
|
||||
FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_IO_SIZE; // round up
|
||||
ASSERT(allowance > 0);
|
||||
}
|
||||
wait(self->owner->getRateControl()->getAllowance(allowance));
|
||||
}
|
||||
|
||||
if ( self->pageOffset + self->pageCache->pageSize > self->owner->length ) {
|
||||
ASSERT(self->pageOffset < self->owner->length);
|
||||
|
|
|
@ -1114,7 +1114,7 @@ public:
|
|||
int nQuorum = ((desiredCoordinators+1)/2)*2-1;
|
||||
|
||||
KillType newKt = kt;
|
||||
if ((kt == KillInstantly) || (kt == InjectFaults) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete))
|
||||
if ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete))
|
||||
{
|
||||
LocalityGroup primaryProcessesLeft, primaryProcessesDead;
|
||||
LocalityGroup primarySatelliteProcessesLeft, primarySatelliteProcessesDead;
|
||||
|
@ -1255,6 +1255,7 @@ public:
|
|||
TEST( true ); // Simulated machine was killed with any kill type
|
||||
TEST( kt == KillInstantly ); // Simulated machine was killed instantly
|
||||
TEST( kt == InjectFaults ); // Simulated machine was killed with faults
|
||||
TEST( kt == FailDisk ); // Simulated machine was killed with a failed disk
|
||||
|
||||
if (kt == KillInstantly) {
|
||||
TraceEvent(SevWarn, "FailMachine")
|
||||
|
@ -1281,6 +1282,9 @@ public:
|
|||
machine->fault_injection_r = deterministicRandom()->randomUniqueID().first();
|
||||
machine->fault_injection_p1 = 0.1;
|
||||
machine->fault_injection_p2 = deterministicRandom()->random01();
|
||||
} else if (kt == FailDisk) {
|
||||
TraceEvent(SevWarn, "FailDiskMachine").detail("Name", machine->name).detail("Address", machine->address).detail("ZoneId", machine->locality.zoneId()).detail("Process", machine->toString()).detail("Rebooting", machine->rebooting).detail("Protected", protectedAddresses.count(machine->address)).backtrace();
|
||||
machine->failedDisk = true;
|
||||
} else {
|
||||
ASSERT( false );
|
||||
}
|
||||
|
@ -1371,7 +1375,7 @@ public:
|
|||
}
|
||||
|
||||
// Check if machine can be removed, if requested
|
||||
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete)))
|
||||
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete)))
|
||||
{
|
||||
std::vector<ProcessInfo*> processesLeft, processesDead;
|
||||
int protectedWorker = 0, unavailable = 0, excluded = 0, cleared = 0;
|
||||
|
@ -1404,7 +1408,7 @@ public:
|
|||
if (!canKillProcesses(processesLeft, processesDead, kt, &kt)) {
|
||||
TraceEvent("ChangedKillMachine").detail("MachineId", machineId).detail("KillType", kt).detail("OrigKillType", ktOrig).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("TotalProcesses", machines.size()).detail("ProcessesPerMachine", processesPerMachine).detail("Protected", protectedWorker).detail("Unavailable", unavailable).detail("Excluded", excluded).detail("Cleared", cleared).detail("ProtectedTotal", protectedAddresses.size()).detail("TLogPolicy", tLogPolicy->info()).detail("StoragePolicy", storagePolicy->info());
|
||||
}
|
||||
else if ((kt == KillInstantly) || (kt == InjectFaults)) {
|
||||
else if ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk)) {
|
||||
TraceEvent("DeadMachine").detail("MachineId", machineId).detail("KillType", kt).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("TotalProcesses", machines.size()).detail("ProcessesPerMachine", processesPerMachine).detail("TLogPolicy", tLogPolicy->info()).detail("StoragePolicy", storagePolicy->info());
|
||||
for (auto process : processesLeft) {
|
||||
TraceEvent("DeadMachineSurvivors").detail("MachineId", machineId).detail("KillType", kt).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("SurvivingProcess", process->toString());
|
||||
|
@ -1444,7 +1448,7 @@ public:
|
|||
|
||||
TraceEvent("KillMachine").detail("MachineId", machineId).detail("Kt", kt).detail("KtOrig", ktOrig).detail("KillableMachines", processesOnMachine).detail("ProcessPerMachine", processesPerMachine).detail("KillChanged", kt!=ktOrig);
|
||||
if ( kt < RebootAndDelete ) {
|
||||
if(kt == InjectFaults && machines[machineId].machineProcess != nullptr)
|
||||
if((kt == InjectFaults || kt == FailDisk) && machines[machineId].machineProcess != nullptr)
|
||||
killProcess_internal( machines[machineId].machineProcess, kt );
|
||||
for (auto& process : machines[machineId].processes) {
|
||||
TraceEvent("KillMachineProcess").detail("KillType", kt).detail("Process", process->toString()).detail("StartingClass", process->startingClass.toString()).detail("Failed", process->failed).detail("Excluded", process->excluded).detail("Cleared", process->cleared).detail("Rebooting", process->rebooting);
|
||||
|
@ -1492,7 +1496,7 @@ public:
|
|||
}
|
||||
|
||||
// Check if machine can be removed, if requested
|
||||
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete)))
|
||||
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete)))
|
||||
{
|
||||
std::vector<ProcessInfo*> processesLeft, processesDead;
|
||||
for (auto processInfo : getAllProcesses()) {
|
||||
|
@ -1781,6 +1785,9 @@ ACTOR void doReboot( ISimulator::ProcessInfo *p, ISimulator::KillType kt ) {
|
|||
|
||||
//Simulates delays for performing operations on disk
|
||||
Future<Void> waitUntilDiskReady( Reference<DiskParameters> diskParameters, int64_t size, bool sync ) {
|
||||
if(g_simulator.getCurrentProcess()->failedDisk) {
|
||||
return Never();
|
||||
}
|
||||
if(g_simulator.connectionFailuresDisableDuration > 1e4)
|
||||
return delay(0.0001);
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ public:
|
|||
ISimulator() : desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), listenersPerProcess(1), isStopped(false), lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false), allSwapsDisabled(false), backupAgents(WaitForType), drAgents(WaitForType), extraDB(NULL), allowLogSetKills(true), usableRegions(1) {}
|
||||
|
||||
// Order matters!
|
||||
enum KillType { KillInstantly, InjectFaults, RebootAndDelete, RebootProcessAndDelete, Reboot, RebootProcess, None };
|
||||
enum KillType { KillInstantly, InjectFaults, FailDisk, RebootAndDelete, RebootProcessAndDelete, Reboot, RebootProcess, None };
|
||||
|
||||
enum BackupAgentType { NoBackupAgents, WaitForType, BackupToFile, BackupToDB };
|
||||
|
||||
|
@ -68,6 +68,7 @@ public:
|
|||
|
||||
uint64_t fault_injection_r;
|
||||
double fault_injection_p1, fault_injection_p2;
|
||||
bool failedDisk;
|
||||
|
||||
UID uid;
|
||||
|
||||
|
@ -76,13 +77,13 @@ public:
|
|||
: name(name), locality(locality), startingClass(startingClass), addresses(addresses),
|
||||
address(addresses.address), dataFolder(dataFolder), network(net), coordinationFolder(coordinationFolder),
|
||||
failed(false), excluded(false), cpuTicks(0), rebooting(false), fault_injection_p1(0), fault_injection_p2(0),
|
||||
fault_injection_r(0), machine(0), cleared(false) {
|
||||
fault_injection_r(0), machine(0), cleared(false), failedDisk(false) {
|
||||
uid = deterministicRandom()->randomUniqueID();
|
||||
}
|
||||
|
||||
Future<KillType> onShutdown() { return shutdownSignal.getFuture(); }
|
||||
|
||||
bool isReliable() const { return !failed && fault_injection_p1 == 0 && fault_injection_p2 == 0; }
|
||||
bool isReliable() const { return !failed && fault_injection_p1 == 0 && fault_injection_p2 == 0 && !failedDisk; }
|
||||
bool isAvailable() const { return !isExcluded() && isReliable(); }
|
||||
bool isExcluded() const { return excluded; }
|
||||
bool isCleared() const { return cleared; }
|
||||
|
|
|
@ -1385,13 +1385,13 @@ void SQLiteDB::open(bool writable) {
|
|||
if (dbFile.isError()) throw dbFile.getError(); // If we've failed to open the file, throw an exception
|
||||
if (walFile.isError()) throw walFile.getError(); // If we've failed to open the file, throw an exception
|
||||
|
||||
// Set Rate control if FLOW_KNOBS are positive
|
||||
if (FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT > 0 && FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS > 0) {
|
||||
// Set Rate control if SERVER_KNOBS are positive
|
||||
if (SERVER_KNOBS->SQLITE_WRITE_WINDOW_LIMIT > 0 && SERVER_KNOBS->SQLITE_WRITE_WINDOW_SECONDS > 0) {
|
||||
// The writer thread is created before the readers, so it should initialize the rate controls.
|
||||
if(writable) {
|
||||
// Create a new rate control and assign it to both files.
|
||||
Reference<SpeedLimit> rc(new SpeedLimit(FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT,
|
||||
FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS));
|
||||
Reference<SpeedLimit> rc(
|
||||
new SpeedLimit(SERVER_KNOBS->SQLITE_WRITE_WINDOW_LIMIT, SERVER_KNOBS->SQLITE_WRITE_WINDOW_SECONDS));
|
||||
dbFile.get()->setRateControl(rc);
|
||||
walFile.get()->setRateControl(rc);
|
||||
} else {
|
||||
|
@ -1922,6 +1922,7 @@ private:
|
|||
dbFile->setRateControl({});
|
||||
rc->wakeWaiters();
|
||||
}
|
||||
dbFile.clear();
|
||||
|
||||
if(walFile && walFile->getRateControl()) {
|
||||
TraceEvent(SevDebug, "KeyValueStoreSQLiteShutdownRateControl").detail("Filename", walFile->getFilename());
|
||||
|
@ -1929,6 +1930,7 @@ private:
|
|||
walFile->setRateControl({});
|
||||
rc->wakeWaiters();
|
||||
}
|
||||
walFile.clear();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> stopOnError( KeyValueStoreSQLite* self ) {
|
||||
|
|
|
@ -279,6 +279,16 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
init( SQLITE_CHUNK_SIZE_PAGES, 25600 ); // 100MB
|
||||
init( SQLITE_CHUNK_SIZE_PAGES_SIM, 1024 ); // 4MB
|
||||
init( SQLITE_READER_THREADS, 64 ); // number of read threads
|
||||
init( SQLITE_WRITE_WINDOW_SECONDS, -1 );
|
||||
init( SQLITE_WRITE_WINDOW_LIMIT, -1 );
|
||||
if( randomize && BUGGIFY ) {
|
||||
// Choose an window between .01 and 1.01 seconds.
|
||||
SQLITE_WRITE_WINDOW_SECONDS = 0.01 + deterministicRandom()->random01();
|
||||
// Choose random operations per second
|
||||
int opsPerSecond = deterministicRandom()->randomInt(1000, 5000);
|
||||
// Set window limit to opsPerSecond scaled down to window size
|
||||
SQLITE_WRITE_WINDOW_LIMIT = opsPerSecond * SQLITE_WRITE_WINDOW_SECONDS;
|
||||
}
|
||||
|
||||
// Maximum and minimum cell payload bytes allowed on primary page as calculated in SQLite.
|
||||
// These formulas are copied from SQLite, using its hardcoded constants, so if you are
|
||||
|
@ -497,6 +507,10 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
init( STORAGE_DURABILITY_LAG_HARD_MAX, 2000e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_HARD_MAX = 100e6;
|
||||
init( STORAGE_DURABILITY_LAG_SOFT_MAX, 200e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_SOFT_MAX = 10e6;
|
||||
|
||||
//FIXME: Low priority reads are disabled by assigning very high knob values, reduce knobs for 7.0
|
||||
init( LOW_PRIORITY_STORAGE_QUEUE_BYTES, 775e8 ); if( smallStorageTarget ) LOW_PRIORITY_STORAGE_QUEUE_BYTES = 1750e3;
|
||||
init( LOW_PRIORITY_DURABILITY_LAG, 275e8 ); if( smallStorageTarget ) LOW_PRIORITY_DURABILITY_LAG = 15e6;
|
||||
|
||||
bool smallTlogTarget = randomize && BUGGIFY;
|
||||
init( TARGET_BYTES_PER_TLOG, 2400e6 ); if( smallTlogTarget ) TARGET_BYTES_PER_TLOG = 2000e3;
|
||||
init( SPRING_BYTES_TLOG, 400e6 ); if( smallTlogTarget ) SPRING_BYTES_TLOG = 200e3;
|
||||
|
@ -587,6 +601,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
init( REPORT_DD_METRICS, true );
|
||||
init( DD_METRICS_REPORT_INTERVAL, 30.0 );
|
||||
init( FETCH_KEYS_TOO_LONG_TIME_CRITERIA, 300.0 );
|
||||
init( MAX_STORAGE_COMMIT_TIME, 120.0 ); //The max fsync stall time on the storage server and tlog before marking a disk as failed
|
||||
|
||||
//Wait Failure
|
||||
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
||||
|
|
|
@ -236,6 +236,8 @@ public:
|
|||
int SQLITE_CHUNK_SIZE_PAGES;
|
||||
int SQLITE_CHUNK_SIZE_PAGES_SIM;
|
||||
int SQLITE_READER_THREADS;
|
||||
int SQLITE_WRITE_WINDOW_LIMIT;
|
||||
double SQLITE_WRITE_WINDOW_SECONDS;
|
||||
|
||||
// KeyValueStoreSqlite spring cleaning
|
||||
double SPRING_CLEANING_NO_ACTION_INTERVAL;
|
||||
|
@ -418,6 +420,12 @@ public:
|
|||
int64_t AUTO_TAG_THROTTLE_STORAGE_QUEUE_BYTES;
|
||||
int64_t TARGET_BYTES_PER_STORAGE_SERVER_BATCH;
|
||||
int64_t SPRING_BYTES_STORAGE_SERVER_BATCH;
|
||||
int64_t STORAGE_HARD_LIMIT_BYTES;
|
||||
int64_t STORAGE_DURABILITY_LAG_HARD_MAX;
|
||||
int64_t STORAGE_DURABILITY_LAG_SOFT_MAX;
|
||||
|
||||
int64_t LOW_PRIORITY_STORAGE_QUEUE_BYTES;
|
||||
int64_t LOW_PRIORITY_DURABILITY_LAG;
|
||||
|
||||
int64_t TARGET_BYTES_PER_TLOG;
|
||||
int64_t SPRING_BYTES_TLOG;
|
||||
|
@ -488,9 +496,6 @@ public:
|
|||
int FETCH_KEYS_PARALLELISM_BYTES;
|
||||
int FETCH_KEYS_LOWER_PRIORITY;
|
||||
int BUGGIFY_BLOCK_BYTES;
|
||||
int64_t STORAGE_HARD_LIMIT_BYTES;
|
||||
int64_t STORAGE_DURABILITY_LAG_HARD_MAX;
|
||||
int64_t STORAGE_DURABILITY_LAG_SOFT_MAX;
|
||||
double STORAGE_DURABILITY_LAG_REJECT_THRESHOLD;
|
||||
double STORAGE_DURABILITY_LAG_MIN_RATE;
|
||||
int STORAGE_COMMIT_BYTES;
|
||||
|
@ -516,6 +521,7 @@ public:
|
|||
bool REPORT_DD_METRICS;
|
||||
double DD_METRICS_REPORT_INTERVAL;
|
||||
double FETCH_KEYS_TOO_LONG_TIME_CRITERIA;
|
||||
double MAX_STORAGE_COMMIT_TIME;
|
||||
|
||||
//Wait Failure
|
||||
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
|
||||
|
|
|
@ -1307,19 +1307,6 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> watchDegraded(TLogData* self) {
|
||||
if(g_network->isSimulated() && g_simulator.speedUpSimulation) {
|
||||
return Void();
|
||||
}
|
||||
|
||||
wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
|
||||
|
||||
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
|
||||
TEST(true); //6.0 TLog degraded
|
||||
self->degraded->set(true);
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> doQueueCommit( TLogData* self, Reference<LogData> logData, std::vector<Reference<LogData>> missingFinalCommit ) {
|
||||
state Version ver = logData->version.get();
|
||||
state Version commitNumber = self->queueCommitBegin+1;
|
||||
|
@ -1332,12 +1319,11 @@ ACTOR Future<Void> doQueueCommit( TLogData* self, Reference<LogData> logData, st
|
|||
self->diskQueueCommitBytes = 0;
|
||||
self->largeDiskQueueCommitBytes.set(false);
|
||||
|
||||
state Future<Void> degraded = watchDegraded(self);
|
||||
wait(c);
|
||||
wait(ioDegradedOrTimeoutError(c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded,
|
||||
SERVER_KNOBS->TLOG_DEGRADED_DURATION));
|
||||
if(g_network->isSimulated() && !g_simulator.speedUpSimulation && BUGGIFY_WITH_PROB(0.0001)) {
|
||||
wait(delay(6.0));
|
||||
}
|
||||
degraded.cancel();
|
||||
wait(self->queueCommitEnd.whenAtLeast(commitNumber-1));
|
||||
|
||||
//Calling check_yield instead of yield to avoid a destruction ordering problem in simulation
|
||||
|
|
|
@ -453,6 +453,11 @@ struct RolesInfo {
|
|||
obj.setKeyRawNumber("query_queue_max", storageMetrics.getValue("QueryQueueMax"));
|
||||
obj["total_queries"] = StatusCounter(storageMetrics.getValue("QueryQueue")).getStatus();
|
||||
obj["finished_queries"] = StatusCounter(storageMetrics.getValue("FinishedQueries")).getStatus();
|
||||
try { //FIXME: This field was added in a patch release, the try-catch can be removed for the 7.0 release
|
||||
obj["low_priority_queries"] = StatusCounter(storageMetrics.getValue("LowPriorityQueries")).getStatus();
|
||||
} catch(Error &e) {
|
||||
if(e.code() != error_code_attribute_not_found) throw e;
|
||||
}
|
||||
obj["bytes_queried"] = StatusCounter(storageMetrics.getValue("BytesQueried")).getStatus();
|
||||
obj["keys_queried"] = StatusCounter(storageMetrics.getValue("RowsQueried")).getStatus();
|
||||
obj["mutation_bytes"] = StatusCounter(storageMetrics.getValue("MutationBytes")).getStatus();
|
||||
|
@ -1820,6 +1825,7 @@ ACTOR static Future<JsonBuilderObject> workloadStatusFetcher(Reference<AsyncVar<
|
|||
StatusCounter reads;
|
||||
StatusCounter readKeys;
|
||||
StatusCounter readBytes;
|
||||
StatusCounter lowPriorityReads;
|
||||
|
||||
for(auto &ss : storageServers.get()) {
|
||||
TraceEventFields const& storageMetrics = ss.second.at("StorageMetrics");
|
||||
|
@ -1837,6 +1843,19 @@ ACTOR static Future<JsonBuilderObject> workloadStatusFetcher(Reference<AsyncVar<
|
|||
keysObj["read"] = readKeys.getStatus();
|
||||
bytesObj["read"] = readBytes.getStatus();
|
||||
|
||||
try {
|
||||
for(auto &ss : storageServers.get()) {
|
||||
TraceEventFields const& storageMetrics = ss.second.at("StorageMetrics");
|
||||
|
||||
if (storageMetrics.size() > 0) {
|
||||
//FIXME: This field was added in a patch release, for the 7.0 release move this to above loop
|
||||
lowPriorityReads.updateValues(StatusCounter(storageMetrics.getValue("LowPriorityQueries")));
|
||||
}
|
||||
}
|
||||
operationsObj["low_priority_reads"] = lowPriorityReads.getStatus();
|
||||
} catch(Error &e) {
|
||||
if(e.code() != error_code_attribute_not_found) throw e;
|
||||
}
|
||||
}
|
||||
catch (Error& e) {
|
||||
if (e.code() == error_code_actor_cancelled)
|
||||
|
|
|
@ -1750,19 +1750,6 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
|
|||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> watchDegraded(TLogData* self) {
|
||||
if(g_network->isSimulated() && g_simulator.speedUpSimulation) {
|
||||
return Void();
|
||||
}
|
||||
|
||||
wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
|
||||
|
||||
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
|
||||
TEST(true); //TLog degraded
|
||||
self->degraded->set(true);
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> doQueueCommit( TLogData* self, Reference<LogData> logData, std::vector<Reference<LogData>> missingFinalCommit ) {
|
||||
state Version ver = logData->version.get();
|
||||
state Version commitNumber = self->queueCommitBegin+1;
|
||||
|
@ -1775,12 +1762,11 @@ ACTOR Future<Void> doQueueCommit( TLogData* self, Reference<LogData> logData, st
|
|||
self->diskQueueCommitBytes = 0;
|
||||
self->largeDiskQueueCommitBytes.set(false);
|
||||
|
||||
state Future<Void> degraded = watchDegraded(self);
|
||||
wait(c);
|
||||
wait(ioDegradedOrTimeoutError(c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded,
|
||||
SERVER_KNOBS->TLOG_DEGRADED_DURATION));
|
||||
if(g_network->isSimulated() && !g_simulator.speedUpSimulation && BUGGIFY_WITH_PROB(0.0001)) {
|
||||
wait(delay(6.0));
|
||||
}
|
||||
degraded.cancel();
|
||||
wait(self->queueCommitEnd.whenAtLeast(commitNumber-1));
|
||||
|
||||
//Calling check_yield instead of yield to avoid a destruction ordering problem in simulation
|
||||
|
|
|
@ -584,7 +584,7 @@ public:
|
|||
|
||||
struct Counters {
|
||||
CounterCollection cc;
|
||||
Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, finishedQueries, rowsQueried, bytesQueried, watchQueries, emptyQueries;
|
||||
Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, finishedQueries, lowPriorityQueries, rowsQueried, bytesQueried, watchQueries, emptyQueries;
|
||||
Counter bytesInput, bytesDurable, bytesFetched,
|
||||
mutationBytes; // Like bytesInput but without MVCC accounting
|
||||
Counter sampledBytesCleared;
|
||||
|
@ -604,6 +604,7 @@ public:
|
|||
getRangeQueries("GetRangeQueries", cc),
|
||||
allQueries("QueryQueue", cc),
|
||||
finishedQueries("FinishedQueries", cc),
|
||||
lowPriorityQueries("LowPriorityQueries", cc),
|
||||
rowsQueried("RowsQueried", cc),
|
||||
bytesQueried("BytesQueried", cc),
|
||||
watchQueries("WatchQueries", cc),
|
||||
|
@ -750,6 +751,18 @@ public:
|
|||
(currentRate() < 1e-6 ? 1e6 : 1.0 / currentRate()));
|
||||
}
|
||||
|
||||
// Normally the storage server prefers to serve read requests over making mutations
|
||||
// durable to disk. However, when the storage server falls to far behind on
|
||||
// making mutations durable, this function will change the priority to prefer writes.
|
||||
Future<Void> getQueryDelay() {
|
||||
if ((version.get() - durableVersion.get() > SERVER_KNOBS->LOW_PRIORITY_DURABILITY_LAG) ||
|
||||
(queueSize() > SERVER_KNOBS->LOW_PRIORITY_STORAGE_QUEUE_BYTES)) {
|
||||
++counters.lowPriorityQueries;
|
||||
return delay(0, TaskPriority::LowPriorityRead);
|
||||
}
|
||||
return delay(0, TaskPriority::DefaultEndpoint);
|
||||
}
|
||||
|
||||
template<class Reply>
|
||||
using isLoadBalancedReply = std::is_base_of<LoadBalancedReply, Reply>;
|
||||
|
||||
|
@ -978,7 +991,7 @@ ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
|
|||
|
||||
// Active load balancing runs at a very high priority (to obtain accurate queue lengths)
|
||||
// so we need to downgrade here
|
||||
wait( delay(0, TaskPriority::DefaultEndpoint) );
|
||||
wait( data->getQueryDelay() );
|
||||
|
||||
if( req.debugID.present() )
|
||||
g_traceBatch.addEvent("GetValueDebug", req.debugID.get().first(), "getValueQ.DoRead"); //.detail("TaskID", g_network->getCurrentTask());
|
||||
|
@ -1540,11 +1553,8 @@ ACTOR Future<Void> getKeyValuesQ( StorageServer* data, GetKeyValuesRequest req )
|
|||
// so we need to downgrade here
|
||||
if (SERVER_KNOBS->FETCH_KEYS_LOWER_PRIORITY && req.isFetchKeys) {
|
||||
wait( delay(0, TaskPriority::FetchKeys) );
|
||||
// } else if (false) {
|
||||
// // Placeholder for up-prioritizing fetches for important requests
|
||||
// taskType = TaskPriority::DefaultDelay;
|
||||
} else {
|
||||
wait( delay(0, TaskPriority::DefaultEndpoint) );
|
||||
wait( data->getQueryDelay() );
|
||||
}
|
||||
|
||||
try {
|
||||
|
@ -1677,7 +1687,7 @@ ACTOR Future<Void> getKeyQ( StorageServer* data, GetKeyRequest req ) {
|
|||
|
||||
// Active load balancing runs at a very high priority (to obtain accurate queue lengths)
|
||||
// so we need to downgrade here
|
||||
wait( delay(0, TaskPriority::DefaultEndpoint) );
|
||||
wait( data->getQueryDelay() );
|
||||
|
||||
try {
|
||||
state Version version = wait( waitForVersion( data, req.version ) );
|
||||
|
@ -3179,7 +3189,7 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
|
|||
durableDelay = delay(SERVER_KNOBS->STORAGE_COMMIT_INTERVAL, TaskPriority::UpdateStorage);
|
||||
}
|
||||
|
||||
wait( durable );
|
||||
wait( ioTimeoutError(durable, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME));
|
||||
|
||||
debug_advanceMinCommittedVersion( data->thisServerID, newOldestVersion );
|
||||
|
||||
|
|
|
@ -212,8 +212,7 @@ ACTOR Future<Void> workerHandleErrors(FutureStream<ErrorInfo> errors) {
|
|||
|
||||
endRole(err.role, err.id, "Error", ok, err.error);
|
||||
|
||||
|
||||
if (err.error.code() == error_code_please_reboot || err.error.code() == error_code_io_timeout || (err.role == Role::SHARED_TRANSACTION_LOG && err.error.code() == error_code_io_error )) throw err.error;
|
||||
if (err.error.code() == error_code_please_reboot || (err.role == Role::SHARED_TRANSACTION_LOG && (err.error.code() == error_code_io_error || err.error.code() == error_code_io_timeout))) throw err.error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -270,13 +270,15 @@ struct MachineAttritionWorkload : TestWorkload {
|
|||
|
||||
ISimulator::KillType kt = ISimulator::Reboot;
|
||||
if( !self->reboot ) {
|
||||
int killType = deterministicRandom()->randomInt(0,3);
|
||||
int killType = deterministicRandom()->randomInt(0,3); //FIXME: enable disk stalls
|
||||
if( killType == 0 )
|
||||
kt = ISimulator::KillInstantly;
|
||||
else if( killType == 1 )
|
||||
kt = ISimulator::InjectFaults;
|
||||
else
|
||||
else if( killType == 2 )
|
||||
kt = ISimulator::RebootAndDelete;
|
||||
else
|
||||
kt = ISimulator::FailDisk;
|
||||
}
|
||||
TraceEvent("Assassination").detail("TargetDatacenter", target).detail("Reboot", self->reboot).detail("KillType", kt);
|
||||
|
||||
|
@ -336,7 +338,20 @@ struct MachineAttritionWorkload : TestWorkload {
|
|||
TraceEvent("RebootAndDelete").detail("TargetMachine", targetMachine.toString());
|
||||
g_simulator.killZone( targetMachine.zoneId(), ISimulator::RebootAndDelete );
|
||||
} else {
|
||||
auto kt = (deterministicRandom()->random01() < 0.5 || !self->allowFaultInjection) ? ISimulator::KillInstantly : ISimulator::InjectFaults;
|
||||
auto kt = ISimulator::KillInstantly;
|
||||
if( self->allowFaultInjection ) {
|
||||
if( randomDouble < 0.50 ) {
|
||||
kt = ISimulator::InjectFaults;
|
||||
}
|
||||
//FIXME: enable disk stalls
|
||||
/*
|
||||
if( randomDouble < 0.56 ) {
|
||||
kt = ISimulator::InjectFaults;
|
||||
} else if( randomDouble < 0.66 ) {
|
||||
kt = ISimulator::FailDisk;
|
||||
}
|
||||
*/
|
||||
}
|
||||
g_simulator.killZone( targetMachine.zoneId(), kt );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ public: // variables
|
|||
public: // ctor & dtor
|
||||
SnapTestWorkload(WorkloadContext const& wcx)
|
||||
: TestWorkload(wcx), numSnaps(0), maxSnapDelay(0.0), testID(0), snapUID() {
|
||||
TraceEvent("SnapTestWorkload Constructor");
|
||||
TraceEvent("SnapTestWorkloadConstructor");
|
||||
std::string workloadName = "SnapTest";
|
||||
maxRetryCntToRetrieveMessage = 10;
|
||||
|
||||
|
|
|
@ -113,16 +113,10 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) {
|
|||
init( MAX_EVICT_ATTEMPTS, 100 ); if( randomize && BUGGIFY ) MAX_EVICT_ATTEMPTS = 2;
|
||||
init( CACHE_EVICTION_POLICY, "random" );
|
||||
init( PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION, 0.1 ); if( randomize && BUGGIFY ) PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION = 0.0; else if( randomize && BUGGIFY ) PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION = 1.0;
|
||||
|
||||
init( FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS, -1 );
|
||||
init( FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT, -1 );
|
||||
if( randomize && BUGGIFY ) {
|
||||
// Choose an window between .01 and 1.01 seconds.
|
||||
FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS = 0.01 + deterministicRandom()->random01();
|
||||
// Choose 10k to 50k operations per second
|
||||
int opsPerSecond = deterministicRandom()->randomInt(1000, 5000);
|
||||
// Set window limit to opsPerSecond scaled down to window size
|
||||
FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT = opsPerSecond * FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS;
|
||||
init( FLOW_CACHEDFILE_WRITE_IO_SIZE, 0 );
|
||||
if ( randomize && BUGGIFY) {
|
||||
// Choose 16KB to 64KB as I/O size
|
||||
FLOW_CACHEDFILE_WRITE_IO_SIZE = deterministicRandom()->randomInt(16384, 65537);
|
||||
}
|
||||
|
||||
//AsyncFileEIO
|
||||
|
@ -142,6 +136,7 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) {
|
|||
//GenericActors
|
||||
init( BUGGIFY_FLOW_LOCK_RELEASE_DELAY, 1.0 );
|
||||
init( LOW_PRIORITY_DELAY_COUNT, 5 );
|
||||
init( LOW_PRIORITY_MAX_DELAY, 5.0 );
|
||||
|
||||
//IAsyncFile
|
||||
init( INCREMENTAL_DELETE_TRUNCATE_AMOUNT, 5e8 ); //500MB
|
||||
|
|
|
@ -130,8 +130,7 @@ public:
|
|||
double TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY;
|
||||
int TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT;
|
||||
int PEER_UNAVAILABLE_FOR_LONG_TIME_TIMEOUT;
|
||||
int FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT;
|
||||
double FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS;
|
||||
int FLOW_CACHEDFILE_WRITE_IO_SIZE;
|
||||
|
||||
//AsyncFileEIO
|
||||
int EIO_MAX_PARALLELISM;
|
||||
|
@ -150,6 +149,7 @@ public:
|
|||
//GenericActors
|
||||
double BUGGIFY_FLOW_LOCK_RELEASE_DELAY;
|
||||
int LOW_PRIORITY_DELAY_COUNT;
|
||||
double LOW_PRIORITY_MAX_DELAY;
|
||||
|
||||
//IAsyncFile
|
||||
int64_t INCREMENTAL_DELETE_TRUNCATE_AMOUNT;
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <math.h> // For _set_FMA3_enable workaround in platformInit
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#include "flow/Platform.h"
|
||||
#include "flow/Arena.h"
|
||||
|
||||
|
@ -2606,7 +2607,13 @@ int64_t fileSize(std::string const& filename) {
|
|||
std::string readFileBytes( std::string const& filename, int maxSize ) {
|
||||
std::string s;
|
||||
FILE* f = fopen(filename.c_str(), "rb" FOPEN_CLOEXEC_MODE);
|
||||
if (!f) throw file_not_readable();
|
||||
if (!f) {
|
||||
TraceEvent(SevWarn, "FileOpenError")
|
||||
.detail("Filename", filename)
|
||||
.detail("Errno", errno)
|
||||
.detail("ErrorDescription", strerror(errno));
|
||||
throw file_not_readable();
|
||||
}
|
||||
try {
|
||||
fseek(f, 0, SEEK_END);
|
||||
size_t size = ftell(f);
|
||||
|
|
|
@ -28,9 +28,7 @@ TLSPolicy::~TLSPolicy() {}
|
|||
namespace TLS {
|
||||
|
||||
void DisableOpenSSLAtExitHandler() {
|
||||
#ifdef TLS_DISABLED
|
||||
return;
|
||||
#else
|
||||
#ifdef HAVE_OPENSSL_INIT_NO_ATEXIT
|
||||
static bool once = false;
|
||||
if (!once) {
|
||||
once = true;
|
||||
|
@ -43,9 +41,7 @@ void DisableOpenSSLAtExitHandler() {
|
|||
}
|
||||
|
||||
void DestroyOpenSSLGlobalState() {
|
||||
#ifdef TLS_DISABLED
|
||||
return;
|
||||
#else
|
||||
#ifdef HAVE_OPENSSL_INIT_NO_ATEXIT
|
||||
OPENSSL_cleanup();
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -135,8 +135,10 @@ ACTOR Future<Void> returnIfTrue( Future<bool> f )
|
|||
|
||||
ACTOR Future<Void> lowPriorityDelay( double waitTime ) {
|
||||
state int loopCount = 0;
|
||||
while(loopCount < FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT) {
|
||||
wait(delay(waitTime/FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT, TaskPriority::Low));
|
||||
state int totalLoops = std::max<int>(waitTime/FLOW_KNOBS->LOW_PRIORITY_MAX_DELAY,FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT);
|
||||
|
||||
while(loopCount < totalLoops) {
|
||||
wait(delay(waitTime/totalLoops, TaskPriority::Low));
|
||||
loopCount++;
|
||||
}
|
||||
return Void();
|
||||
|
|
|
@ -872,6 +872,35 @@ Future<T> ioTimeoutError( Future<T> what, double time ) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR template <class T>
|
||||
Future<T> ioDegradedOrTimeoutError(Future<T> what, double errTime, Reference<AsyncVar<bool>> degraded,
|
||||
double degradedTime) {
|
||||
if (degradedTime < errTime) {
|
||||
Future<Void> degradedEnd = lowPriorityDelay(degradedTime);
|
||||
choose {
|
||||
when(T t = wait(what)) { return t; }
|
||||
when(wait(degradedEnd)) {
|
||||
TEST(true); // TLog degraded
|
||||
TraceEvent(SevWarnAlways, "IoDegraded");
|
||||
degraded->set(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Future<Void> end = lowPriorityDelay(errTime - degradedTime);
|
||||
choose {
|
||||
when(T t = wait(what)) { return t; }
|
||||
when(wait(end)) {
|
||||
Error err = io_timeout();
|
||||
if (g_network->isSimulated()) {
|
||||
err = err.asInjectedFault();
|
||||
}
|
||||
TraceEvent(SevError, "IoTimeoutError").error(err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR template <class T>
|
||||
Future<Void> streamHelper( PromiseStream<T> output, PromiseStream<Error> errors, Future<T> input ) {
|
||||
try {
|
||||
|
|
|
@ -99,6 +99,7 @@ enum class TaskPriority {
|
|||
RestoreLoaderFinishVersionBatch = 2220,
|
||||
RestoreLoaderSendMutations = 2210,
|
||||
RestoreLoaderLoadFiles = 2200,
|
||||
LowPriorityRead = 2100,
|
||||
Low = 2000,
|
||||
|
||||
Min = 1000,
|
||||
|
|
Loading…
Reference in New Issue