Merge branch 'release-6.2' into merge-release-6.2-into-release-6.3

# Conflicts:
#	cmake/CompileBoost.cmake
#	cmake/FDBComponents.cmake
#	fdbrpc/AsyncFileCached.actor.h
#	fdbrpc/simulator.h
#	fdbserver/KeyValueStoreSQLite.actor.cpp
#	fdbserver/Knobs.cpp
#	fdbserver/Knobs.h
#	fdbserver/storageserver.actor.cpp
#	flow/Knobs.h
#	flow/network.h
This commit is contained in:
A.J. Beamon 2021-02-08 09:20:28 -08:00
commit 67e783acf8
27 changed files with 230 additions and 106 deletions

View File

@ -1,3 +1,4 @@
ARG IMAGE_TAG=0.1.24
FROM centos:6
# Clean yum cache, disable default Base repo and enable Vault
@ -29,17 +30,35 @@ USER root
RUN adduser --comment '' fdb && chown fdb /opt
# wget of bintray without forcing UTF-8 encoding results in 403 Forbidden
# Old versions of FDB need boost 1.67
RUN cd /opt/ &&\
curl -L https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.bz2 -o boost_1_67_0.tar.bz2 &&\
echo "2684c972994ee57fc5632e03bf044746f6eb45d4920c343937a465fd67a5adba boost_1_67_0.tar.bz2" > boost-sha-67.txt &&\
sha256sum -c boost-sha-67.txt &&\
tar -xjf boost_1_67_0.tar.bz2 &&\
rm -rf boost_1_67_0.tar.bz2 boost-sha-67.txt boost_1_67_0/libs &&\
rm -rf boost_1_67_0.tar.bz2 boost-sha-67.txt boost_1_67_0/libs
# install Boost 1.72
# wget of bintray without forcing UTF-8 encoding results in 403 Forbidden
RUN cd /tmp/ &&\
curl -L https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.bz2 -o boost_1_72_0.tar.bz2 &&\
echo "59c9b274bc451cf91a9ba1dd2c7fdcaf5d60b1b3aa83f2c9fa143417cc660722 boost_1_72_0.tar.bz2" > boost-sha-72.txt &&\
sha256sum -c boost-sha-72.txt &&\
tar -xjf boost_1_72_0.tar.bz2 &&\
rm -rf boost_1_72_0.tar.bz2 boost-sha-72.txt boost_1_72_0/libs
cd boost_1_72_0 &&\
scl enable devtoolset-8 -- ./bootstrap.sh --with-libraries=context &&\
scl enable devtoolset-8 -- ./b2 link=static cxxflags=-std=c++14 --prefix=/opt/boost_1_72_0 install &&\
rm -rf boost_1_72_0.tar.bz2 boost-sha-72.txt boost_1_72_0
# jemalloc (needed for FDB after 6.3)
RUN cd /tmp/ &&\
curl -L https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2 -o jemalloc-5.2.1.tar.bz2 &&\
echo "34330e5ce276099e2e8950d9335db5a875689a4c6a56751ef3b1d8c537f887f6 jemalloc-5.2.1.tar.bz2" > jemalloc-sha.txt &&\
sha256sum -c jemalloc-sha.txt &&\
tar --no-same-owner --no-same-permissions -xjf jemalloc-5.2.1.tar.bz2 &&\
cd jemalloc-5.2.1 &&\
scl enable devtoolset-8 -- ./configure --enable-static --disable-cxx &&\
scl enable devtoolset-8 -- make install
# install cmake
RUN curl -L https://github.com/Kitware/CMake/releases/download/v3.13.4/cmake-3.13.4-Linux-x86_64.tar.gz -o /tmp/cmake.tar.gz &&\
@ -79,7 +98,7 @@ RUN cd /opt/ && curl -L https://github.com/facebook/rocksdb/archive/v6.10.1.tar.
RUN cd /opt/ && curl -L https://github.com/manticoresoftware/manticoresearch/raw/master/misc/junit/ctest2junit.xsl -o ctest2junit.xsl
# Setting this environment variable switches from OpenSSL to BoringSSL
#ENV OPENSSL_ROOT_DIR=/opt/boringssl
ENV OPENSSL_ROOT_DIR=/opt/boringssl
# install BoringSSL: TODO: They don't seem to have releases(?) I picked today's master SHA.
RUN cd /opt &&\
@ -110,8 +129,8 @@ RUN cd /opt/boringssl/build &&\
ARG TIMEZONEINFO=America/Los_Angeles
RUN rm -f /etc/localtime && ln -s /usr/share/zoneinfo/${TIMEZONEINFO} /etc/localtime
LABEL version=0.1.22
ENV DOCKER_IMAGEVER=0.1.22
LABEL version=${IMAGE_TAG}
ENV DOCKER_IMAGEVER=${IMAGE_TAG}
ENV JAVA_HOME=/usr/lib/jvm/java-1.8.0
ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc
ENV CXX=/opt/rh/devtoolset-8/root/usr/bin/g++

View File

@ -1,4 +1,5 @@
ARG IMAGE_TAG=0.1.21
ARG IMAGE_TAG=0.1.24
ARG IMAGE_VERSION=0.11.15
FROM foundationdb/foundationdb-build:${IMAGE_TAG}
USER root
@ -51,8 +52,8 @@ RUN cp -iv /usr/local/bin/clang++ /usr/local/bin/clang++.deref &&\
ldconfig &&\
rm -rf /mnt/artifacts
LABEL version=0.11.13
ENV DOCKER_IMAGEVER=0.11.13
LABEL version=${IMAGE_VERSION}
ENV DOCKER_IMAGEVER=${IMAGE_VERSION}
ENV CLANGCC=/usr/local/bin/clang.de8a65ef
ENV CLANGCXX=/usr/local/bin/clang++.de8a65ef

View File

@ -2,7 +2,7 @@ version: "3"
services:
common: &common
image: foundationdb/foundationdb-build:0.1.22
image: foundationdb/foundationdb-build:0.1.24
build-setup: &build-setup
<<: *common
@ -60,7 +60,7 @@ services:
snapshot-cmake: &snapshot-cmake
<<: *build-setup
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "packages" "strip_targets" && cpack'
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=0 -DUSE_WERROR=ON /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "packages" "strip_targets" && cpack'
prb-cmake:
<<: *snapshot-cmake
@ -68,7 +68,7 @@ services:
snapshot-bindings-cmake: &snapshot-bindings-cmake
<<: *build-setup
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=0 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "bindings/all"'
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=0 -DUSE_WERROR=ON /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" "bindings/all"'
prb-bindings-cmake:
<<: *snapshot-bindings-cmake
@ -84,7 +84,7 @@ services:
snapshot-ctest: &snapshot-ctest
<<: *build-setup
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=1 -DUSE_WERROR=ON /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -L fast -j "$${MAKEJOBS}" --output-on-failure'
prb-ctest:
<<: *snapshot-ctest
@ -92,7 +92,7 @@ services:
snapshot-correctness: &snapshot-correctness
<<: *build-setup
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=1 /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure'
command: scl enable devtoolset-8 rh-python36 rh-ruby24 -- bash -c 'mkdir -p "$${BUILD_DIR}" && cd "$${BUILD_DIR}" && cmake -G "Ninja" -DFDB_RELEASE=1 -DUSE_WERROR=ON /__this_is_some_very_long_name_dir_needed_to_fix_a_bug_with_debug_rpms__/foundationdb && ninja -v -j "$${MAKEJOBS}" && ctest -j "$${MAKEJOBS}" --output-on-failure'
prb-correctness:
<<: *snapshot-correctness

View File

@ -11,6 +11,7 @@ endif()
################################################################################
# SSL
################################################################################
include(CheckSymbolExists)
set(DISABLE_TLS OFF CACHE BOOL "Don't try to find OpenSSL and always build without TLS support")
@ -23,12 +24,12 @@ else()
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
check_symbol_exists("OPENSSL_INIT_NO_ATEXIT" "openssl/crypto.h" OPENSSL_HAS_NO_ATEXIT)
if(OPENSSL_HAS_NO_ATEXIT)
set(WITH_TLS ON)
add_compile_options(-DHAVE_OPENSSL)
add_compile_options(-DHAVE_OPENSSL_INIT_NO_AT_EXIT)
else()
message(WARNING "An OpenSSL version was found, but it doesn't support OPENSSL_INIT_NO_ATEXIT - Will compile without TLS Support")
set(WITH_TLS OFF)
message(STATUS "Found OpenSSL without OPENSSL_INIT_NO_ATEXIT: assuming BoringSSL")
endif()
set(WITH_TLS ON)
add_compile_options(-DHAVE_OPENSSL)
else()
message(STATUS "OpenSSL was not found - Will compile without TLS Support")
message(STATUS "You can set OPENSSL_ROOT_DIR to help cmake find it")

View File

@ -93,6 +93,11 @@
"counter":0,
"roughness":0.0
},
"low_priority_queries":{
"hz":0.0,
"counter":0,
"roughness":0.0
},
"bytes_queried":{
"hz":0.0,
"counter":0,
@ -479,6 +484,11 @@
"hz":0.0,
"counter":0,
"roughness":0.0
},
"low_priority_reads":{ // measures number of incoming low priority read requests
"hz":0.0,
"counter":0,
"roughness":0.0
},
"location_requests":{ // measures number of outgoing key server location responses
"hz":0.0,

View File

@ -11,6 +11,7 @@ Release Notes
* Add documentation on read and write Path. `(PR #4099) <https://github.com/apple/foundationdb/pull/4099>`_
* Add a histogram to expose commit batching window on Proxies. `(PR #4166) <https://github.com/apple/foundationdb/pull/4166>`_
* Fix double counting of range reads in TransactionMetrics. `(PR #4130) <https://github.com/apple/foundationdb/pull/4130>`_
* Add a trace event that can be used as an indicator of the load on the proxy. `(PR #4166) <https://github.com/apple/foundationdb/pull/4166>`_
6.2.28
======

View File

@ -116,6 +116,11 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"counter":0,
"roughness":0.0
},
"low_priority_queries":{
"hz":0.0,
"counter":0,
"roughness":0.0
},
"bytes_queried":{
"hz":0.0,
"counter":0,
@ -521,6 +526,11 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"counter":0,
"roughness":0.0
},
"low_priority_reads":{
"hz":0.0,
"counter":0,
"roughness":0.0
},
"location_requests":{
"hz":0.0,
"counter":0,

View File

@ -231,8 +231,6 @@ public:
return filename;
}
std::vector<AFCPage*> const& getFlushable() { return flushable; }
void setRateControl(Reference<IRateControl> const& rc) override { rateControl = rc; }
Reference<IRateControl> const& getRateControl() override { return rateControl; }
@ -253,8 +251,8 @@ public:
}
auto f = quiesce();
//TraceEvent("AsyncFileCachedDel").detail("Filename", filename)
// .detail("Refcount", debugGetReferenceCount()).detail("CanDie", f.isReady()).backtrace();
TraceEvent("AsyncFileCachedDel").detail("Filename", filename)
.detail("Refcount", debugGetReferenceCount()).detail("CanDie", f.isReady()).backtrace();
if (f.isReady())
delete this;
else
@ -523,8 +521,16 @@ struct AFCPage : public EvictablePage, public FastAllocated<AFCPage> {
if (dirty) {
// Wait for rate control if it is set
if (self->owner->getRateControl())
wait(self->owner->getRateControl()->getAllowance(1));
if (self->owner->getRateControl()) {
int allowance = 1;
// If I/O size is defined, wait for the calculated I/O quota
if (FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_IO_SIZE > 0) {
allowance = (self->pageCache->pageSize + FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_IO_SIZE - 1) /
FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_IO_SIZE; // round up
ASSERT(allowance > 0);
}
wait(self->owner->getRateControl()->getAllowance(allowance));
}
if ( self->pageOffset + self->pageCache->pageSize > self->owner->length ) {
ASSERT(self->pageOffset < self->owner->length);

View File

@ -1114,7 +1114,7 @@ public:
int nQuorum = ((desiredCoordinators+1)/2)*2-1;
KillType newKt = kt;
if ((kt == KillInstantly) || (kt == InjectFaults) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete))
if ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete))
{
LocalityGroup primaryProcessesLeft, primaryProcessesDead;
LocalityGroup primarySatelliteProcessesLeft, primarySatelliteProcessesDead;
@ -1255,6 +1255,7 @@ public:
TEST( true ); // Simulated machine was killed with any kill type
TEST( kt == KillInstantly ); // Simulated machine was killed instantly
TEST( kt == InjectFaults ); // Simulated machine was killed with faults
TEST( kt == FailDisk ); // Simulated machine was killed with a failed disk
if (kt == KillInstantly) {
TraceEvent(SevWarn, "FailMachine")
@ -1281,6 +1282,9 @@ public:
machine->fault_injection_r = deterministicRandom()->randomUniqueID().first();
machine->fault_injection_p1 = 0.1;
machine->fault_injection_p2 = deterministicRandom()->random01();
} else if (kt == FailDisk) {
TraceEvent(SevWarn, "FailDiskMachine").detail("Name", machine->name).detail("Address", machine->address).detail("ZoneId", machine->locality.zoneId()).detail("Process", machine->toString()).detail("Rebooting", machine->rebooting).detail("Protected", protectedAddresses.count(machine->address)).backtrace();
machine->failedDisk = true;
} else {
ASSERT( false );
}
@ -1371,7 +1375,7 @@ public:
}
// Check if machine can be removed, if requested
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete)))
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete)))
{
std::vector<ProcessInfo*> processesLeft, processesDead;
int protectedWorker = 0, unavailable = 0, excluded = 0, cleared = 0;
@ -1404,7 +1408,7 @@ public:
if (!canKillProcesses(processesLeft, processesDead, kt, &kt)) {
TraceEvent("ChangedKillMachine").detail("MachineId", machineId).detail("KillType", kt).detail("OrigKillType", ktOrig).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("TotalProcesses", machines.size()).detail("ProcessesPerMachine", processesPerMachine).detail("Protected", protectedWorker).detail("Unavailable", unavailable).detail("Excluded", excluded).detail("Cleared", cleared).detail("ProtectedTotal", protectedAddresses.size()).detail("TLogPolicy", tLogPolicy->info()).detail("StoragePolicy", storagePolicy->info());
}
else if ((kt == KillInstantly) || (kt == InjectFaults)) {
else if ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk)) {
TraceEvent("DeadMachine").detail("MachineId", machineId).detail("KillType", kt).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("TotalProcesses", machines.size()).detail("ProcessesPerMachine", processesPerMachine).detail("TLogPolicy", tLogPolicy->info()).detail("StoragePolicy", storagePolicy->info());
for (auto process : processesLeft) {
TraceEvent("DeadMachineSurvivors").detail("MachineId", machineId).detail("KillType", kt).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("SurvivingProcess", process->toString());
@ -1444,7 +1448,7 @@ public:
TraceEvent("KillMachine").detail("MachineId", machineId).detail("Kt", kt).detail("KtOrig", ktOrig).detail("KillableMachines", processesOnMachine).detail("ProcessPerMachine", processesPerMachine).detail("KillChanged", kt!=ktOrig);
if ( kt < RebootAndDelete ) {
if(kt == InjectFaults && machines[machineId].machineProcess != nullptr)
if((kt == InjectFaults || kt == FailDisk) && machines[machineId].machineProcess != nullptr)
killProcess_internal( machines[machineId].machineProcess, kt );
for (auto& process : machines[machineId].processes) {
TraceEvent("KillMachineProcess").detail("KillType", kt).detail("Process", process->toString()).detail("StartingClass", process->startingClass.toString()).detail("Failed", process->failed).detail("Excluded", process->excluded).detail("Cleared", process->cleared).detail("Rebooting", process->rebooting);
@ -1492,7 +1496,7 @@ public:
}
// Check if machine can be removed, if requested
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete)))
if (!forceKill && ((kt == KillInstantly) || (kt == InjectFaults) || (kt == FailDisk) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete)))
{
std::vector<ProcessInfo*> processesLeft, processesDead;
for (auto processInfo : getAllProcesses()) {
@ -1781,6 +1785,9 @@ ACTOR void doReboot( ISimulator::ProcessInfo *p, ISimulator::KillType kt ) {
//Simulates delays for performing operations on disk
Future<Void> waitUntilDiskReady( Reference<DiskParameters> diskParameters, int64_t size, bool sync ) {
if(g_simulator.getCurrentProcess()->failedDisk) {
return Never();
}
if(g_simulator.connectionFailuresDisableDuration > 1e4)
return delay(0.0001);

View File

@ -38,7 +38,7 @@ public:
ISimulator() : desiredCoordinators(1), physicalDatacenters(1), processesPerMachine(0), listenersPerProcess(1), isStopped(false), lastConnectionFailure(0), connectionFailuresDisableDuration(0), speedUpSimulation(false), allSwapsDisabled(false), backupAgents(WaitForType), drAgents(WaitForType), extraDB(NULL), allowLogSetKills(true), usableRegions(1) {}
// Order matters!
enum KillType { KillInstantly, InjectFaults, RebootAndDelete, RebootProcessAndDelete, Reboot, RebootProcess, None };
enum KillType { KillInstantly, InjectFaults, FailDisk, RebootAndDelete, RebootProcessAndDelete, Reboot, RebootProcess, None };
enum BackupAgentType { NoBackupAgents, WaitForType, BackupToFile, BackupToDB };
@ -68,6 +68,7 @@ public:
uint64_t fault_injection_r;
double fault_injection_p1, fault_injection_p2;
bool failedDisk;
UID uid;
@ -76,13 +77,13 @@ public:
: name(name), locality(locality), startingClass(startingClass), addresses(addresses),
address(addresses.address), dataFolder(dataFolder), network(net), coordinationFolder(coordinationFolder),
failed(false), excluded(false), cpuTicks(0), rebooting(false), fault_injection_p1(0), fault_injection_p2(0),
fault_injection_r(0), machine(0), cleared(false) {
fault_injection_r(0), machine(0), cleared(false), failedDisk(false) {
uid = deterministicRandom()->randomUniqueID();
}
Future<KillType> onShutdown() { return shutdownSignal.getFuture(); }
bool isReliable() const { return !failed && fault_injection_p1 == 0 && fault_injection_p2 == 0; }
bool isReliable() const { return !failed && fault_injection_p1 == 0 && fault_injection_p2 == 0 && !failedDisk; }
bool isAvailable() const { return !isExcluded() && isReliable(); }
bool isExcluded() const { return excluded; }
bool isCleared() const { return cleared; }

View File

@ -1385,13 +1385,13 @@ void SQLiteDB::open(bool writable) {
if (dbFile.isError()) throw dbFile.getError(); // If we've failed to open the file, throw an exception
if (walFile.isError()) throw walFile.getError(); // If we've failed to open the file, throw an exception
// Set Rate control if FLOW_KNOBS are positive
if (FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT > 0 && FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS > 0) {
// Set Rate control if SERVER_KNOBS are positive
if (SERVER_KNOBS->SQLITE_WRITE_WINDOW_LIMIT > 0 && SERVER_KNOBS->SQLITE_WRITE_WINDOW_SECONDS > 0) {
// The writer thread is created before the readers, so it should initialize the rate controls.
if(writable) {
// Create a new rate control and assign it to both files.
Reference<SpeedLimit> rc(new SpeedLimit(FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT,
FLOW_KNOBS->FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS));
Reference<SpeedLimit> rc(
new SpeedLimit(SERVER_KNOBS->SQLITE_WRITE_WINDOW_LIMIT, SERVER_KNOBS->SQLITE_WRITE_WINDOW_SECONDS));
dbFile.get()->setRateControl(rc);
walFile.get()->setRateControl(rc);
} else {
@ -1922,6 +1922,7 @@ private:
dbFile->setRateControl({});
rc->wakeWaiters();
}
dbFile.clear();
if(walFile && walFile->getRateControl()) {
TraceEvent(SevDebug, "KeyValueStoreSQLiteShutdownRateControl").detail("Filename", walFile->getFilename());
@ -1929,6 +1930,7 @@ private:
walFile->setRateControl({});
rc->wakeWaiters();
}
walFile.clear();
}
ACTOR static Future<Void> stopOnError( KeyValueStoreSQLite* self ) {

View File

@ -279,6 +279,16 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( SQLITE_CHUNK_SIZE_PAGES, 25600 ); // 100MB
init( SQLITE_CHUNK_SIZE_PAGES_SIM, 1024 ); // 4MB
init( SQLITE_READER_THREADS, 64 ); // number of read threads
init( SQLITE_WRITE_WINDOW_SECONDS, -1 );
init( SQLITE_WRITE_WINDOW_LIMIT, -1 );
if( randomize && BUGGIFY ) {
// Choose an window between .01 and 1.01 seconds.
SQLITE_WRITE_WINDOW_SECONDS = 0.01 + deterministicRandom()->random01();
// Choose random operations per second
int opsPerSecond = deterministicRandom()->randomInt(1000, 5000);
// Set window limit to opsPerSecond scaled down to window size
SQLITE_WRITE_WINDOW_LIMIT = opsPerSecond * SQLITE_WRITE_WINDOW_SECONDS;
}
// Maximum and minimum cell payload bytes allowed on primary page as calculated in SQLite.
// These formulas are copied from SQLite, using its hardcoded constants, so if you are
@ -497,6 +507,10 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( STORAGE_DURABILITY_LAG_HARD_MAX, 2000e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_HARD_MAX = 100e6;
init( STORAGE_DURABILITY_LAG_SOFT_MAX, 200e6 ); if( smallStorageTarget ) STORAGE_DURABILITY_LAG_SOFT_MAX = 10e6;
//FIXME: Low priority reads are disabled by assigning very high knob values, reduce knobs for 7.0
init( LOW_PRIORITY_STORAGE_QUEUE_BYTES, 775e8 ); if( smallStorageTarget ) LOW_PRIORITY_STORAGE_QUEUE_BYTES = 1750e3;
init( LOW_PRIORITY_DURABILITY_LAG, 275e8 ); if( smallStorageTarget ) LOW_PRIORITY_DURABILITY_LAG = 15e6;
bool smallTlogTarget = randomize && BUGGIFY;
init( TARGET_BYTES_PER_TLOG, 2400e6 ); if( smallTlogTarget ) TARGET_BYTES_PER_TLOG = 2000e3;
init( SPRING_BYTES_TLOG, 400e6 ); if( smallTlogTarget ) SPRING_BYTES_TLOG = 200e3;
@ -587,6 +601,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
init( REPORT_DD_METRICS, true );
init( DD_METRICS_REPORT_INTERVAL, 30.0 );
init( FETCH_KEYS_TOO_LONG_TIME_CRITERIA, 300.0 );
init( MAX_STORAGE_COMMIT_TIME, 120.0 ); //The max fsync stall time on the storage server and tlog before marking a disk as failed
//Wait Failure
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;

View File

@ -236,6 +236,8 @@ public:
int SQLITE_CHUNK_SIZE_PAGES;
int SQLITE_CHUNK_SIZE_PAGES_SIM;
int SQLITE_READER_THREADS;
int SQLITE_WRITE_WINDOW_LIMIT;
double SQLITE_WRITE_WINDOW_SECONDS;
// KeyValueStoreSqlite spring cleaning
double SPRING_CLEANING_NO_ACTION_INTERVAL;
@ -418,6 +420,12 @@ public:
int64_t AUTO_TAG_THROTTLE_STORAGE_QUEUE_BYTES;
int64_t TARGET_BYTES_PER_STORAGE_SERVER_BATCH;
int64_t SPRING_BYTES_STORAGE_SERVER_BATCH;
int64_t STORAGE_HARD_LIMIT_BYTES;
int64_t STORAGE_DURABILITY_LAG_HARD_MAX;
int64_t STORAGE_DURABILITY_LAG_SOFT_MAX;
int64_t LOW_PRIORITY_STORAGE_QUEUE_BYTES;
int64_t LOW_PRIORITY_DURABILITY_LAG;
int64_t TARGET_BYTES_PER_TLOG;
int64_t SPRING_BYTES_TLOG;
@ -488,9 +496,6 @@ public:
int FETCH_KEYS_PARALLELISM_BYTES;
int FETCH_KEYS_LOWER_PRIORITY;
int BUGGIFY_BLOCK_BYTES;
int64_t STORAGE_HARD_LIMIT_BYTES;
int64_t STORAGE_DURABILITY_LAG_HARD_MAX;
int64_t STORAGE_DURABILITY_LAG_SOFT_MAX;
double STORAGE_DURABILITY_LAG_REJECT_THRESHOLD;
double STORAGE_DURABILITY_LAG_MIN_RATE;
int STORAGE_COMMIT_BYTES;
@ -516,6 +521,7 @@ public:
bool REPORT_DD_METRICS;
double DD_METRICS_REPORT_INTERVAL;
double FETCH_KEYS_TOO_LONG_TIME_CRITERIA;
double MAX_STORAGE_COMMIT_TIME;
//Wait Failure
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;

View File

@ -1307,19 +1307,6 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
return Void();
}
ACTOR Future<Void> watchDegraded(TLogData* self) {
if(g_network->isSimulated() && g_simulator.speedUpSimulation) {
return Void();
}
wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
TEST(true); //6.0 TLog degraded
self->degraded->set(true);
return Void();
}
ACTOR Future<Void> doQueueCommit( TLogData* self, Reference<LogData> logData, std::vector<Reference<LogData>> missingFinalCommit ) {
state Version ver = logData->version.get();
state Version commitNumber = self->queueCommitBegin+1;
@ -1332,12 +1319,11 @@ ACTOR Future<Void> doQueueCommit( TLogData* self, Reference<LogData> logData, st
self->diskQueueCommitBytes = 0;
self->largeDiskQueueCommitBytes.set(false);
state Future<Void> degraded = watchDegraded(self);
wait(c);
wait(ioDegradedOrTimeoutError(c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded,
SERVER_KNOBS->TLOG_DEGRADED_DURATION));
if(g_network->isSimulated() && !g_simulator.speedUpSimulation && BUGGIFY_WITH_PROB(0.0001)) {
wait(delay(6.0));
}
degraded.cancel();
wait(self->queueCommitEnd.whenAtLeast(commitNumber-1));
//Calling check_yield instead of yield to avoid a destruction ordering problem in simulation

View File

@ -453,6 +453,11 @@ struct RolesInfo {
obj.setKeyRawNumber("query_queue_max", storageMetrics.getValue("QueryQueueMax"));
obj["total_queries"] = StatusCounter(storageMetrics.getValue("QueryQueue")).getStatus();
obj["finished_queries"] = StatusCounter(storageMetrics.getValue("FinishedQueries")).getStatus();
try { //FIXME: This field was added in a patch release, the try-catch can be removed for the 7.0 release
obj["low_priority_queries"] = StatusCounter(storageMetrics.getValue("LowPriorityQueries")).getStatus();
} catch(Error &e) {
if(e.code() != error_code_attribute_not_found) throw e;
}
obj["bytes_queried"] = StatusCounter(storageMetrics.getValue("BytesQueried")).getStatus();
obj["keys_queried"] = StatusCounter(storageMetrics.getValue("RowsQueried")).getStatus();
obj["mutation_bytes"] = StatusCounter(storageMetrics.getValue("MutationBytes")).getStatus();
@ -1820,6 +1825,7 @@ ACTOR static Future<JsonBuilderObject> workloadStatusFetcher(Reference<AsyncVar<
StatusCounter reads;
StatusCounter readKeys;
StatusCounter readBytes;
StatusCounter lowPriorityReads;
for(auto &ss : storageServers.get()) {
TraceEventFields const& storageMetrics = ss.second.at("StorageMetrics");
@ -1837,6 +1843,19 @@ ACTOR static Future<JsonBuilderObject> workloadStatusFetcher(Reference<AsyncVar<
keysObj["read"] = readKeys.getStatus();
bytesObj["read"] = readBytes.getStatus();
try {
for(auto &ss : storageServers.get()) {
TraceEventFields const& storageMetrics = ss.second.at("StorageMetrics");
if (storageMetrics.size() > 0) {
//FIXME: This field was added in a patch release, for the 7.0 release move this to above loop
lowPriorityReads.updateValues(StatusCounter(storageMetrics.getValue("LowPriorityQueries")));
}
}
operationsObj["low_priority_reads"] = lowPriorityReads.getStatus();
} catch(Error &e) {
if(e.code() != error_code_attribute_not_found) throw e;
}
}
catch (Error& e) {
if (e.code() == error_code_actor_cancelled)

View File

@ -1750,19 +1750,6 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
return Void();
}
ACTOR Future<Void> watchDegraded(TLogData* self) {
if(g_network->isSimulated() && g_simulator.speedUpSimulation) {
return Void();
}
wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
TEST(true); //TLog degraded
self->degraded->set(true);
return Void();
}
ACTOR Future<Void> doQueueCommit( TLogData* self, Reference<LogData> logData, std::vector<Reference<LogData>> missingFinalCommit ) {
state Version ver = logData->version.get();
state Version commitNumber = self->queueCommitBegin+1;
@ -1775,12 +1762,11 @@ ACTOR Future<Void> doQueueCommit( TLogData* self, Reference<LogData> logData, st
self->diskQueueCommitBytes = 0;
self->largeDiskQueueCommitBytes.set(false);
state Future<Void> degraded = watchDegraded(self);
wait(c);
wait(ioDegradedOrTimeoutError(c, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME, self->degraded,
SERVER_KNOBS->TLOG_DEGRADED_DURATION));
if(g_network->isSimulated() && !g_simulator.speedUpSimulation && BUGGIFY_WITH_PROB(0.0001)) {
wait(delay(6.0));
}
degraded.cancel();
wait(self->queueCommitEnd.whenAtLeast(commitNumber-1));
//Calling check_yield instead of yield to avoid a destruction ordering problem in simulation

View File

@ -584,7 +584,7 @@ public:
struct Counters {
CounterCollection cc;
Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, finishedQueries, rowsQueried, bytesQueried, watchQueries, emptyQueries;
Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, finishedQueries, lowPriorityQueries, rowsQueried, bytesQueried, watchQueries, emptyQueries;
Counter bytesInput, bytesDurable, bytesFetched,
mutationBytes; // Like bytesInput but without MVCC accounting
Counter sampledBytesCleared;
@ -604,6 +604,7 @@ public:
getRangeQueries("GetRangeQueries", cc),
allQueries("QueryQueue", cc),
finishedQueries("FinishedQueries", cc),
lowPriorityQueries("LowPriorityQueries", cc),
rowsQueried("RowsQueried", cc),
bytesQueried("BytesQueried", cc),
watchQueries("WatchQueries", cc),
@ -750,6 +751,18 @@ public:
(currentRate() < 1e-6 ? 1e6 : 1.0 / currentRate()));
}
// Normally the storage server prefers to serve read requests over making mutations
// durable to disk. However, when the storage server falls to far behind on
// making mutations durable, this function will change the priority to prefer writes.
Future<Void> getQueryDelay() {
if ((version.get() - durableVersion.get() > SERVER_KNOBS->LOW_PRIORITY_DURABILITY_LAG) ||
(queueSize() > SERVER_KNOBS->LOW_PRIORITY_STORAGE_QUEUE_BYTES)) {
++counters.lowPriorityQueries;
return delay(0, TaskPriority::LowPriorityRead);
}
return delay(0, TaskPriority::DefaultEndpoint);
}
template<class Reply>
using isLoadBalancedReply = std::is_base_of<LoadBalancedReply, Reply>;
@ -978,7 +991,7 @@ ACTOR Future<Void> getValueQ( StorageServer* data, GetValueRequest req ) {
// Active load balancing runs at a very high priority (to obtain accurate queue lengths)
// so we need to downgrade here
wait( delay(0, TaskPriority::DefaultEndpoint) );
wait( data->getQueryDelay() );
if( req.debugID.present() )
g_traceBatch.addEvent("GetValueDebug", req.debugID.get().first(), "getValueQ.DoRead"); //.detail("TaskID", g_network->getCurrentTask());
@ -1540,11 +1553,8 @@ ACTOR Future<Void> getKeyValuesQ( StorageServer* data, GetKeyValuesRequest req )
// so we need to downgrade here
if (SERVER_KNOBS->FETCH_KEYS_LOWER_PRIORITY && req.isFetchKeys) {
wait( delay(0, TaskPriority::FetchKeys) );
// } else if (false) {
// // Placeholder for up-prioritizing fetches for important requests
// taskType = TaskPriority::DefaultDelay;
} else {
wait( delay(0, TaskPriority::DefaultEndpoint) );
wait( data->getQueryDelay() );
}
try {
@ -1677,7 +1687,7 @@ ACTOR Future<Void> getKeyQ( StorageServer* data, GetKeyRequest req ) {
// Active load balancing runs at a very high priority (to obtain accurate queue lengths)
// so we need to downgrade here
wait( delay(0, TaskPriority::DefaultEndpoint) );
wait( data->getQueryDelay() );
try {
state Version version = wait( waitForVersion( data, req.version ) );
@ -3179,7 +3189,7 @@ ACTOR Future<Void> updateStorage(StorageServer* data) {
durableDelay = delay(SERVER_KNOBS->STORAGE_COMMIT_INTERVAL, TaskPriority::UpdateStorage);
}
wait( durable );
wait( ioTimeoutError(durable, SERVER_KNOBS->MAX_STORAGE_COMMIT_TIME));
debug_advanceMinCommittedVersion( data->thisServerID, newOldestVersion );

View File

@ -212,8 +212,7 @@ ACTOR Future<Void> workerHandleErrors(FutureStream<ErrorInfo> errors) {
endRole(err.role, err.id, "Error", ok, err.error);
if (err.error.code() == error_code_please_reboot || err.error.code() == error_code_io_timeout || (err.role == Role::SHARED_TRANSACTION_LOG && err.error.code() == error_code_io_error )) throw err.error;
if (err.error.code() == error_code_please_reboot || (err.role == Role::SHARED_TRANSACTION_LOG && (err.error.code() == error_code_io_error || err.error.code() == error_code_io_timeout))) throw err.error;
}
}
}

View File

@ -270,13 +270,15 @@ struct MachineAttritionWorkload : TestWorkload {
ISimulator::KillType kt = ISimulator::Reboot;
if( !self->reboot ) {
int killType = deterministicRandom()->randomInt(0,3);
int killType = deterministicRandom()->randomInt(0,3); //FIXME: enable disk stalls
if( killType == 0 )
kt = ISimulator::KillInstantly;
else if( killType == 1 )
kt = ISimulator::InjectFaults;
else
else if( killType == 2 )
kt = ISimulator::RebootAndDelete;
else
kt = ISimulator::FailDisk;
}
TraceEvent("Assassination").detail("TargetDatacenter", target).detail("Reboot", self->reboot).detail("KillType", kt);
@ -336,7 +338,20 @@ struct MachineAttritionWorkload : TestWorkload {
TraceEvent("RebootAndDelete").detail("TargetMachine", targetMachine.toString());
g_simulator.killZone( targetMachine.zoneId(), ISimulator::RebootAndDelete );
} else {
auto kt = (deterministicRandom()->random01() < 0.5 || !self->allowFaultInjection) ? ISimulator::KillInstantly : ISimulator::InjectFaults;
auto kt = ISimulator::KillInstantly;
if( self->allowFaultInjection ) {
if( randomDouble < 0.50 ) {
kt = ISimulator::InjectFaults;
}
//FIXME: enable disk stalls
/*
if( randomDouble < 0.56 ) {
kt = ISimulator::InjectFaults;
} else if( randomDouble < 0.66 ) {
kt = ISimulator::FailDisk;
}
*/
}
g_simulator.killZone( targetMachine.zoneId(), kt );
}
}

View File

@ -85,7 +85,7 @@ public: // variables
public: // ctor & dtor
SnapTestWorkload(WorkloadContext const& wcx)
: TestWorkload(wcx), numSnaps(0), maxSnapDelay(0.0), testID(0), snapUID() {
TraceEvent("SnapTestWorkload Constructor");
TraceEvent("SnapTestWorkloadConstructor");
std::string workloadName = "SnapTest";
maxRetryCntToRetrieveMessage = 10;

View File

@ -113,16 +113,10 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) {
init( MAX_EVICT_ATTEMPTS, 100 ); if( randomize && BUGGIFY ) MAX_EVICT_ATTEMPTS = 2;
init( CACHE_EVICTION_POLICY, "random" );
init( PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION, 0.1 ); if( randomize && BUGGIFY ) PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION = 0.0; else if( randomize && BUGGIFY ) PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION = 1.0;
init( FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS, -1 );
init( FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT, -1 );
if( randomize && BUGGIFY ) {
// Choose an window between .01 and 1.01 seconds.
FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS = 0.01 + deterministicRandom()->random01();
// Choose 10k to 50k operations per second
int opsPerSecond = deterministicRandom()->randomInt(1000, 5000);
// Set window limit to opsPerSecond scaled down to window size
FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT = opsPerSecond * FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS;
init( FLOW_CACHEDFILE_WRITE_IO_SIZE, 0 );
if ( randomize && BUGGIFY) {
// Choose 16KB to 64KB as I/O size
FLOW_CACHEDFILE_WRITE_IO_SIZE = deterministicRandom()->randomInt(16384, 65537);
}
//AsyncFileEIO
@ -142,6 +136,7 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) {
//GenericActors
init( BUGGIFY_FLOW_LOCK_RELEASE_DELAY, 1.0 );
init( LOW_PRIORITY_DELAY_COUNT, 5 );
init( LOW_PRIORITY_MAX_DELAY, 5.0 );
//IAsyncFile
init( INCREMENTAL_DELETE_TRUNCATE_AMOUNT, 5e8 ); //500MB

View File

@ -130,8 +130,7 @@ public:
double TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY;
int TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT;
int PEER_UNAVAILABLE_FOR_LONG_TIME_TIMEOUT;
int FLOW_CACHEDFILE_WRITE_WINDOW_LIMIT;
double FLOW_CACHEDFILE_WRITE_WINDOW_SECONDS;
int FLOW_CACHEDFILE_WRITE_IO_SIZE;
//AsyncFileEIO
int EIO_MAX_PARALLELISM;
@ -150,6 +149,7 @@ public:
//GenericActors
double BUGGIFY_FLOW_LOCK_RELEASE_DELAY;
int LOW_PRIORITY_DELAY_COUNT;
double LOW_PRIORITY_MAX_DELAY;
//IAsyncFile
int64_t INCREMENTAL_DELETE_TRUNCATE_AMOUNT;

View File

@ -25,6 +25,7 @@
#include <math.h> // For _set_FMA3_enable workaround in platformInit
#endif
#include <errno.h>
#include "flow/Platform.h"
#include "flow/Arena.h"
@ -2606,7 +2607,13 @@ int64_t fileSize(std::string const& filename) {
std::string readFileBytes( std::string const& filename, int maxSize ) {
std::string s;
FILE* f = fopen(filename.c_str(), "rb" FOPEN_CLOEXEC_MODE);
if (!f) throw file_not_readable();
if (!f) {
TraceEvent(SevWarn, "FileOpenError")
.detail("Filename", filename)
.detail("Errno", errno)
.detail("ErrorDescription", strerror(errno));
throw file_not_readable();
}
try {
fseek(f, 0, SEEK_END);
size_t size = ftell(f);

View File

@ -28,9 +28,7 @@ TLSPolicy::~TLSPolicy() {}
namespace TLS {
void DisableOpenSSLAtExitHandler() {
#ifdef TLS_DISABLED
return;
#else
#ifdef HAVE_OPENSSL_INIT_NO_ATEXIT
static bool once = false;
if (!once) {
once = true;
@ -43,9 +41,7 @@ void DisableOpenSSLAtExitHandler() {
}
void DestroyOpenSSLGlobalState() {
#ifdef TLS_DISABLED
return;
#else
#ifdef HAVE_OPENSSL_INIT_NO_ATEXIT
OPENSSL_cleanup();
#endif
}

View File

@ -135,8 +135,10 @@ ACTOR Future<Void> returnIfTrue( Future<bool> f )
ACTOR Future<Void> lowPriorityDelay( double waitTime ) {
state int loopCount = 0;
while(loopCount < FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT) {
wait(delay(waitTime/FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT, TaskPriority::Low));
state int totalLoops = std::max<int>(waitTime/FLOW_KNOBS->LOW_PRIORITY_MAX_DELAY,FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT);
while(loopCount < totalLoops) {
wait(delay(waitTime/totalLoops, TaskPriority::Low));
loopCount++;
}
return Void();

View File

@ -872,6 +872,35 @@ Future<T> ioTimeoutError( Future<T> what, double time ) {
}
}
ACTOR template <class T>
Future<T> ioDegradedOrTimeoutError(Future<T> what, double errTime, Reference<AsyncVar<bool>> degraded,
double degradedTime) {
if (degradedTime < errTime) {
Future<Void> degradedEnd = lowPriorityDelay(degradedTime);
choose {
when(T t = wait(what)) { return t; }
when(wait(degradedEnd)) {
TEST(true); // TLog degraded
TraceEvent(SevWarnAlways, "IoDegraded");
degraded->set(true);
}
}
}
Future<Void> end = lowPriorityDelay(errTime - degradedTime);
choose {
when(T t = wait(what)) { return t; }
when(wait(end)) {
Error err = io_timeout();
if (g_network->isSimulated()) {
err = err.asInjectedFault();
}
TraceEvent(SevError, "IoTimeoutError").error(err);
throw err;
}
}
}
ACTOR template <class T>
Future<Void> streamHelper( PromiseStream<T> output, PromiseStream<Error> errors, Future<T> input ) {
try {

View File

@ -99,6 +99,7 @@ enum class TaskPriority {
RestoreLoaderFinishVersionBatch = 2220,
RestoreLoaderSendMutations = 2210,
RestoreLoaderLoadFiles = 2200,
LowPriorityRead = 2100,
Low = 2000,
Min = 1000,