Merge branch 'release-6.3'
# Conflicts: # CMakeLists.txt # cmake/ConfigureCompiler.cmake # fdbserver/Knobs.cpp # fdbserver/StorageCache.actor.cpp # fdbserver/storageserver.actor.cpp # flow/ThreadHelper.actor.h # flow/serialize.h # tests/CMakeLists.txt
This commit is contained in:
commit
a49cb41de7
|
@ -999,12 +999,12 @@ failExit:
|
|||
int worker_process_main(mako_args_t* args, int worker_id, mako_shmhdr_t* shm, pid_t* pid_main) {
|
||||
int i;
|
||||
pthread_t network_thread; /* handle for thread which invoked fdb_run_network() */
|
||||
pthread_t* worker_threads;
|
||||
pthread_t* worker_threads = NULL;
|
||||
#if FDB_API_VERSION < 610
|
||||
FDBCluster* cluster;
|
||||
#endif
|
||||
process_info_t process;
|
||||
thread_args_t* thread_args;
|
||||
thread_args_t* thread_args = NULL;
|
||||
int rc;
|
||||
fdb_error_t err;
|
||||
|
||||
|
@ -1017,7 +1017,11 @@ int worker_process_main(mako_args_t* args, int worker_id, mako_shmhdr_t* shm, pi
|
|||
|
||||
/* Everything starts from here */
|
||||
err = fdb_select_api_version(args->api_version);
|
||||
check_fdb_error(err);
|
||||
if (err) {
|
||||
fprintf(stderr, "ERROR: Failed at %s:%d (%s)\n", __FILE__, __LINE__, fdb_get_error(err));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* enable flatbuffers if specified */
|
||||
if (args->flatbuffers) {
|
||||
|
@ -1065,7 +1069,11 @@ int worker_process_main(mako_args_t* args, int worker_id, mako_shmhdr_t* shm, pi
|
|||
/* Network thread must be setup before doing anything */
|
||||
fprintf(debugme, "DEBUG: fdb_setup_network\n");
|
||||
err = fdb_setup_network();
|
||||
check_fdb_error(err);
|
||||
if (err) {
|
||||
fprintf(stderr, "ERROR: Failed at %s:%d (%s)\n", __FILE__, __LINE__, fdb_get_error(err));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* Each worker process will have its own network thread */
|
||||
fprintf(debugme, "DEBUG: creating network thread\n");
|
||||
|
@ -2067,7 +2075,7 @@ int main(int argc, char* argv[]) {
|
|||
int rc;
|
||||
mako_args_t args;
|
||||
int p;
|
||||
pid_t* worker_pids;
|
||||
pid_t* worker_pids = NULL;
|
||||
proc_type_t proc_type = proc_master;
|
||||
int worker_id;
|
||||
pid_t pid;
|
||||
|
@ -2117,6 +2125,7 @@ int main(int argc, char* argv[]) {
|
|||
/* allocate */
|
||||
shmsize = sizeof(mako_shmhdr_t) + (sizeof(mako_stats_t) * args.num_processes * args.num_threads);
|
||||
if (ftruncate(shmfd, shmsize) < 0) {
|
||||
shm = MAP_FAILED;
|
||||
fprintf(stderr, "ERROR: ftruncate (fd:%d size:%llu) failed\n", shmfd, (unsigned long long)shmsize);
|
||||
goto failExit;
|
||||
}
|
||||
|
|
|
@ -1365,12 +1365,12 @@ const char* StartThreadFunc::name = "START_THREAD";
|
|||
REGISTER_INSTRUCTION_FUNC(StartThreadFunc);
|
||||
|
||||
ACTOR template <class Function>
|
||||
Future<decltype(fake<Function>()(Reference<ReadTransaction>()).getValue())> read(Reference<Database> db,
|
||||
Function func) {
|
||||
Future<decltype(std::declval<Function>()(Reference<ReadTransaction>()).getValue())> read(Reference<Database> db,
|
||||
Function func) {
|
||||
state Reference<ReadTransaction> tr = db->createTransaction();
|
||||
loop {
|
||||
try {
|
||||
state decltype(fake<Function>()(Reference<ReadTransaction>()).getValue()) result = wait(func(tr));
|
||||
state decltype(std::declval<Function>()(Reference<ReadTransaction>()).getValue()) result = wait(func(tr));
|
||||
return result;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
|
|
|
@ -27,6 +27,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "flow/IDispatched.h"
|
||||
#include "bindings/flow/fdb_flow.h"
|
||||
#include "bindings/flow/IDirectory.h"
|
||||
|
@ -57,7 +59,7 @@ struct FlowTesterStack {
|
|||
void push(Future<Standalone<StringRef>> value) {
|
||||
data.push_back(StackItem(index, value));
|
||||
}
|
||||
|
||||
|
||||
void push(Standalone<StringRef> value) {
|
||||
push(Future<Standalone<StringRef>>(value));
|
||||
}
|
||||
|
@ -86,10 +88,10 @@ struct FlowTesterStack {
|
|||
items.push_back(data.back());
|
||||
data.pop_back();
|
||||
count--;
|
||||
}
|
||||
}
|
||||
return items;
|
||||
}
|
||||
|
||||
|
||||
Future<std::vector<FDB::Tuple>> waitAndPop(int count);
|
||||
Future<FDB::Tuple> waitAndPop();
|
||||
|
||||
|
@ -106,7 +108,7 @@ struct FlowTesterStack {
|
|||
|
||||
struct InstructionData : public ReferenceCounted<InstructionData> {
|
||||
bool isDatabase;
|
||||
bool isSnapshot;
|
||||
bool isSnapshot;
|
||||
StringRef instruction;
|
||||
Reference<FDB::Transaction> tr;
|
||||
|
||||
|
@ -153,7 +155,7 @@ struct DirectoryOrSubspace {
|
|||
return "DirectorySubspace";
|
||||
}
|
||||
else if(directory.present()) {
|
||||
return "IDirectory";
|
||||
return "IDirectory";
|
||||
}
|
||||
else if(subspace.present()) {
|
||||
return "Subspace";
|
||||
|
@ -169,10 +171,10 @@ struct DirectoryTesterData {
|
|||
int directoryListIndex;
|
||||
int directoryErrorIndex;
|
||||
|
||||
Reference<FDB::IDirectory> directory() {
|
||||
Reference<FDB::IDirectory> directory() {
|
||||
ASSERT(directoryListIndex < directoryList.size());
|
||||
ASSERT(directoryList[directoryListIndex].directory.present());
|
||||
return directoryList[directoryListIndex].directory.get();
|
||||
return directoryList[directoryListIndex].directory.get();
|
||||
}
|
||||
|
||||
FDB::Subspace* subspace() {
|
||||
|
@ -220,10 +222,10 @@ struct FlowTesterData : public ReferenceCounted<FlowTesterData> {
|
|||
std::string tupleToString(FDB::Tuple const& tuple);
|
||||
|
||||
ACTOR template <class F>
|
||||
Future<decltype(fake<F>()().getValue())> executeMutation(Reference<InstructionData> instruction, F func) {
|
||||
Future<decltype(std::declval<F>()().getValue())> executeMutation(Reference<InstructionData> instruction, F func) {
|
||||
loop {
|
||||
try {
|
||||
state decltype(fake<F>()().getValue()) result = wait(func());
|
||||
state decltype(std::declval<F>()().getValue()) result = wait(func());
|
||||
if(instruction->isDatabase) {
|
||||
wait(instruction->tr->commit());
|
||||
}
|
||||
|
|
|
@ -320,6 +320,12 @@ func (t *transaction) getEstimatedRangeSizeBytes(beginKey Key, endKey Key) Futur
|
|||
|
||||
// GetEstimatedRangeSizeBytes will get an estimate for the number of bytes
|
||||
// stored in the given range.
|
||||
// Note: the estimated size is calculated based on the sampling done by FDB server. The sampling
|
||||
// algorithm works roughly in this way: the larger the key-value pair is, the more likely it would
|
||||
// be sampled and the more accurate its sampled size would be. And due to
|
||||
// that reason it is recommended to use this API to query against large ranges for accuracy considerations.
|
||||
// For a rough reference, if the returned size is larger than 3MB, one can consider the size to be
|
||||
// accurate.
|
||||
func (t Transaction) GetEstimatedRangeSizeBytes(r ExactRange) FutureInt64 {
|
||||
beginKey, endKey := r.FDBRangeKeys()
|
||||
return t.getEstimatedRangeSizeBytes(
|
||||
|
|
|
@ -75,6 +75,9 @@ void printTrace(JNIEnv* env, jclass, jlong logger, jint severity, jstring messag
|
|||
sev = FDBSeverity::Warn;
|
||||
} else if (severity < 40) {
|
||||
sev = FDBSeverity::WarnAlways;
|
||||
} else {
|
||||
assert(false);
|
||||
std::abort();
|
||||
}
|
||||
log->trace(sev, msg, detailsMap);
|
||||
if (isCopy) {
|
||||
|
|
|
@ -427,6 +427,12 @@ public interface ReadTransaction extends ReadTransactionContext {
|
|||
|
||||
/**
|
||||
* Gets an estimate for the number of bytes stored in the given range.
|
||||
* Note: the estimated size is calculated based on the sampling done by FDB server. The sampling
|
||||
* algorithm works roughly in this way: the larger the key-value pair is, the more likely it would
|
||||
* be sampled and the more accurate its sampled size would be. And due to
|
||||
* that reason it is recommended to use this API to query against large ranges for accuracy considerations.
|
||||
* For a rough reference, if the returned size is larger than 3MB, one can consider the size to be
|
||||
* accurate.
|
||||
*
|
||||
* @param begin the beginning of the range (inclusive)
|
||||
* @param end the end of the range (exclusive)
|
||||
|
@ -437,7 +443,12 @@ public interface ReadTransaction extends ReadTransactionContext {
|
|||
|
||||
/**
|
||||
* Gets an estimate for the number of bytes stored in the given range.
|
||||
*
|
||||
* Note: the estimated size is calculated based on the sampling done by FDB server. The sampling
|
||||
* algorithm works roughly in this way: the larger the key-value pair is, the more likely it would
|
||||
* be sampled and the more accurate its sampled size would be. And due to
|
||||
* that reason it is recommended to use this API to query against large ranges for accuracy considerations.
|
||||
* For a rough reference, if the returned size is larger than 3MB, one can consider the size to be
|
||||
* accurate.
|
||||
* @param range the range of the keys
|
||||
*
|
||||
* @return a handle to access the results of the asynchronous call
|
||||
|
|
|
@ -22,5 +22,5 @@ else()
|
|||
|
||||
add_library(boost_target INTERFACE)
|
||||
add_dependencies(boost_target boostProject)
|
||||
target_include_directories(boost_target INTERFACE ${BOOST_INCLUDE_DIR})
|
||||
target_include_directories(boost_target SYSTEM INTERFACE ${BOOST_INCLUDE_DIR})
|
||||
endif()
|
||||
|
|
|
@ -266,18 +266,31 @@ else()
|
|||
-Wno-unknown-attributes)
|
||||
endif()
|
||||
add_compile_options(
|
||||
-Wno-unknown-warning-option
|
||||
-Wno-dangling-else
|
||||
-Wno-sign-compare
|
||||
-Wall -Wextra
|
||||
# Here's the current set of warnings we need to explicitly disable to compile warning-free with clang 10
|
||||
-Wno-comment
|
||||
-Wno-unknown-pragmas
|
||||
-Wno-dangling-else
|
||||
-Wno-delete-non-virtual-dtor
|
||||
-Wno-format
|
||||
-Wno-mismatched-tags
|
||||
-Wno-missing-field-initializers
|
||||
-Wno-overloaded-virtual
|
||||
-Wno-reorder
|
||||
-Wno-reorder-ctor
|
||||
-Wno-sign-compare
|
||||
-Wno-tautological-pointer-compare
|
||||
-Wno-undefined-var-template
|
||||
-Wno-tautological-pointer-compare
|
||||
-Wno-format
|
||||
-Wredundant-move
|
||||
-Wpessimizing-move
|
||||
-Woverloaded-virtual
|
||||
-Wno-unknown-pragmas
|
||||
-Wno-unknown-warning-option
|
||||
-Wno-unused-function
|
||||
-Wno-unused-local-typedef
|
||||
-Wno-unused-parameter
|
||||
-Wno-unused-value
|
||||
-Wno-self-assign
|
||||
)
|
||||
if (USE_CCACHE)
|
||||
add_compile_options(
|
||||
|
|
|
@ -261,7 +261,7 @@ namespace SummarizeTest
|
|||
testFile = random.Choice(uniqueFiles);
|
||||
string oldBinaryVersionLowerBound = "0.0.0";
|
||||
string lastFolderName = Path.GetFileName(Path.GetDirectoryName(testFile));
|
||||
if (lastFolderName.Contains("from_")) // Only perform upgrade tests from certain versions
|
||||
if (lastFolderName.Contains("from_") || lastFolderName.Contains("to_")) // Only perform upgrade/downgrade tests from certain versions
|
||||
{
|
||||
oldBinaryVersionLowerBound = lastFolderName.Split('_').Last();
|
||||
}
|
||||
|
@ -295,14 +295,17 @@ namespace SummarizeTest
|
|||
|
||||
if (testDir.EndsWith("restarting"))
|
||||
{
|
||||
bool isDowngrade = Path.GetFileName(Path.GetDirectoryName(testFile)).Contains("to_");
|
||||
string firstServerName = isDowngrade ? fdbserverName : oldServerName;
|
||||
string secondServerName = isDowngrade ? oldServerName : fdbserverName;
|
||||
int expectedUnseed = -1;
|
||||
int unseed;
|
||||
string uid = Guid.NewGuid().ToString();
|
||||
bool useNewPlugin = oldServerName == fdbserverName || versionGreaterThanOrEqual(oldServerName.Split('-').Last(), "5.2.0");
|
||||
result = RunTest(oldServerName, useNewPlugin ? tlsPluginFile : tlsPluginFile_5_1, summaryFileName, errorFileName, seed, buggify, testFile + "-1.txt", runDir, uid, expectedUnseed, out unseed, out retryableError, logOnRetryableError, useValgrind, false, true, oldServerName, traceToStdout);
|
||||
bool useNewPlugin = (oldServerName == fdbserverName) || versionGreaterThanOrEqual(oldServerName.Split('-').Last(), "5.2.0");
|
||||
result = RunTest(firstServerName, useNewPlugin ? tlsPluginFile : tlsPluginFile_5_1, summaryFileName, errorFileName, seed, buggify, testFile + "-1.txt", runDir, uid, expectedUnseed, out unseed, out retryableError, logOnRetryableError, useValgrind, false, true, oldServerName, traceToStdout);
|
||||
if (result == 0)
|
||||
{
|
||||
result = RunTest(fdbserverName, tlsPluginFile, summaryFileName, errorFileName, seed+1, buggify, testFile + "-2.txt", runDir, uid, expectedUnseed, out unseed, out retryableError, logOnRetryableError, useValgrind, true, false, oldServerName, traceToStdout);
|
||||
result = RunTest(secondServerName, tlsPluginFile, summaryFileName, errorFileName, seed+1, buggify, testFile + "-2.txt", runDir, uid, expectedUnseed, out unseed, out retryableError, logOnRetryableError, useValgrind, true, false, oldServerName, traceToStdout);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -476,6 +476,7 @@ Applications must provide error handling and an appropriate retry loop around th
|
|||
|
||||
.. function:: FDBFuture* fdb_transaction_get_estimated_range_size_bytes( FDBTransaction* tr, uint8_t const* begin_key_name, int begin_key_name_length, uint8_t const* end_key_name, int end_key_name_length)
|
||||
Returns an estimated byte size of the key range.
|
||||
.. note:: The estimated size is calculated based on the sampling done by FDB server. The sampling algorithm works roughly in this way: the larger the key-value pair is, the more likely it would be sampled and the more accurate its sampled size would be. And due to that reason it is recommended to use this API to query against large ranges for accuracy considerations. For a rough reference, if the returned size is larger than 3MB, one can consider the size to be accurate.
|
||||
|
||||
|future-return0| the estimated size of the key range given. |future-return1| call :func:`fdb_future_get_int64()` to extract the size, |future-return2|
|
||||
|
||||
|
|
|
@ -800,6 +800,7 @@ Transaction misc functions
|
|||
.. method:: Transaction.get_estimated_range_size_bytes(begin_key, end_key)
|
||||
|
||||
Get the estimated byte size of the given key range. Returns a :class:`FutureInt64`.
|
||||
.. note:: The estimated size is calculated based on the sampling done by FDB server. The sampling algorithm works roughly in this way: the larger the key-value pair is, the more likely it would be sampled and the more accurate its sampled size would be. And due to that reason it is recommended to use this API to query against large ranges for accuracy considerations. For a rough reference, if the returned size is larger than 3MB, one can consider the size to be accurate.
|
||||
|
||||
.. _api-python-transaction-options:
|
||||
|
||||
|
|
|
@ -744,6 +744,7 @@ Transaction misc functions
|
|||
.. method:: Transaction.get_estimated_range_size_bytes(begin_key, end_key)
|
||||
|
||||
Get the estimated byte size of the given key range. Returns a :class:`Int64Future`.
|
||||
.. note:: The estimated size is calculated based on the sampling done by FDB server. The sampling algorithm works roughly in this way: the larger the key-value pair is, the more likely it would be sampled and the more accurate its sampled size would be. And due to that reason it is recommended to use this API to query against large ranges for accuracy considerations. For a rough reference, if the returned size is larger than 3MB, one can consider the size to be accurate.
|
||||
|
||||
.. method:: Transaction.get_approximate_size() -> Int64Future
|
||||
|
||||
|
|
|
@ -10,38 +10,38 @@ macOS
|
|||
|
||||
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
|
||||
|
||||
* `FoundationDB-6.3.3.pkg <https://www.foundationdb.org/downloads/6.3.3/macOS/installers/FoundationDB-6.3.3.pkg>`_
|
||||
* `FoundationDB-6.3.4.pkg <https://www.foundationdb.org/downloads/6.3.4/macOS/installers/FoundationDB-6.3.4.pkg>`_
|
||||
|
||||
Ubuntu
|
||||
------
|
||||
|
||||
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
|
||||
|
||||
* `foundationdb-clients-6.3.3-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.3/ubuntu/installers/foundationdb-clients_6.3.3-1_amd64.deb>`_
|
||||
* `foundationdb-server-6.3.3-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.3/ubuntu/installers/foundationdb-server_6.3.3-1_amd64.deb>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.3.4-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.4/ubuntu/installers/foundationdb-clients_6.3.4-1_amd64.deb>`_
|
||||
* `foundationdb-server-6.3.4-1_amd64.deb <https://www.foundationdb.org/downloads/6.3.4/ubuntu/installers/foundationdb-server_6.3.4-1_amd64.deb>`_ (depends on the clients package)
|
||||
|
||||
RHEL/CentOS EL6
|
||||
---------------
|
||||
|
||||
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
|
||||
|
||||
* `foundationdb-clients-6.3.3-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.3/rhel6/installers/foundationdb-clients-6.3.3-1.el6.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.3-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.3/rhel6/installers/foundationdb-server-6.3.3-1.el6.x86_64.rpm>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.3.4-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.4/rhel6/installers/foundationdb-clients-6.3.4-1.el6.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.4-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.4/rhel6/installers/foundationdb-server-6.3.4-1.el6.x86_64.rpm>`_ (depends on the clients package)
|
||||
|
||||
RHEL/CentOS EL7
|
||||
---------------
|
||||
|
||||
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
|
||||
|
||||
* `foundationdb-clients-6.3.3-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.3/rhel7/installers/foundationdb-clients-6.3.3-1.el7.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.3-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.3/rhel7/installers/foundationdb-server-6.3.3-1.el7.x86_64.rpm>`_ (depends on the clients package)
|
||||
* `foundationdb-clients-6.3.4-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.4/rhel7/installers/foundationdb-clients-6.3.4-1.el7.x86_64.rpm>`_
|
||||
* `foundationdb-server-6.3.4-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.3.4/rhel7/installers/foundationdb-server-6.3.4-1.el7.x86_64.rpm>`_ (depends on the clients package)
|
||||
|
||||
Windows
|
||||
-------
|
||||
|
||||
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
|
||||
|
||||
* `foundationdb-6.3.3-x64.msi <https://www.foundationdb.org/downloads/6.3.3/windows/installers/foundationdb-6.3.3-x64.msi>`_
|
||||
* `foundationdb-6.3.4-x64.msi <https://www.foundationdb.org/downloads/6.3.4/windows/installers/foundationdb-6.3.4-x64.msi>`_
|
||||
|
||||
API Language Bindings
|
||||
=====================
|
||||
|
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
|
|||
|
||||
If you need to use the FoundationDB Python API from other Python installations or paths, use the Python package manager ``pip`` (``pip install foundationdb``) or download the Python package:
|
||||
|
||||
* `foundationdb-6.3.3.tar.gz <https://www.foundationdb.org/downloads/6.3.3/bindings/python/foundationdb-6.3.3.tar.gz>`_
|
||||
* `foundationdb-6.3.4.tar.gz <https://www.foundationdb.org/downloads/6.3.4/bindings/python/foundationdb-6.3.4.tar.gz>`_
|
||||
|
||||
Ruby 1.9.3/2.0.0+
|
||||
-----------------
|
||||
|
||||
* `fdb-6.3.3.gem <https://www.foundationdb.org/downloads/6.3.3/bindings/ruby/fdb-6.3.3.gem>`_
|
||||
* `fdb-6.3.4.gem <https://www.foundationdb.org/downloads/6.3.4/bindings/ruby/fdb-6.3.4.gem>`_
|
||||
|
||||
Java 8+
|
||||
-------
|
||||
|
||||
* `fdb-java-6.3.3.jar <https://www.foundationdb.org/downloads/6.3.3/bindings/java/fdb-java-6.3.3.jar>`_
|
||||
* `fdb-java-6.3.3-javadoc.jar <https://www.foundationdb.org/downloads/6.3.3/bindings/java/fdb-java-6.3.3-javadoc.jar>`_
|
||||
* `fdb-java-6.3.4.jar <https://www.foundationdb.org/downloads/6.3.4/bindings/java/fdb-java-6.3.4.jar>`_
|
||||
* `fdb-java-6.3.4-javadoc.jar <https://www.foundationdb.org/downloads/6.3.4/bindings/java/fdb-java-6.3.4-javadoc.jar>`_
|
||||
|
||||
Go 1.11+
|
||||
--------
|
||||
|
|
|
@ -2,6 +2,14 @@
|
|||
Release Notes
|
||||
#############
|
||||
|
||||
6.2.24
|
||||
======
|
||||
|
||||
Features
|
||||
--------
|
||||
|
||||
* Added the ``suspend`` command to ``fdbcli`` which kills a process and prevents it from rejoining the cluster for a specified duration. `(PR #3550) <https://github.com/apple/foundationdb/pull/3550>`_
|
||||
|
||||
6.2.23
|
||||
======
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
Release Notes
|
||||
#############
|
||||
|
||||
6.3.3
|
||||
6.3.4
|
||||
=====
|
||||
|
||||
Features
|
||||
|
@ -36,6 +36,7 @@ Performance
|
|||
* Reduced the number of comparisons used by various map implementations. `(PR #2882) <https://github.com/apple/foundationdb/pull/2882>`_
|
||||
* Reduced the serialized size of empty strings. `(PR #3063) <https://github.com/apple/foundationdb/pull/3063>`_
|
||||
* Reduced the serialized size of various interfaces by 10x. `(PR #3068) <https://github.com/apple/foundationdb/pull/3068>`_
|
||||
* TLS handshakes can now be done in a background thread pool. `(PR #3403) <https://github.com/apple/foundationdb/pull/3403>`_
|
||||
|
||||
Reliability
|
||||
-----------
|
||||
|
@ -57,6 +58,8 @@ Fixes
|
|||
* Transaction logs configured to spill by reference had an unintended delay between each spilled batch. `(PR #3153) <https://github.com/apple/foundationdb/pull/3153>`_
|
||||
* Added guards to honor ``DISABLE_POSIX_KERNEL_AIO``. `(PR #2888) <https://github.com/apple/foundationdb/pull/2888>`_
|
||||
* Prevent blob upload timeout if request timeout is lower than expected request time. `(PR #3533) <https://github.com/apple/foundationdb/pull/3533>`_
|
||||
* In very rare scenarios, the data distributor process would crash when being shutdown. `(PR #3530) <https://github.com/apple/foundationdb/pull/3530>`_
|
||||
* The master would die immediately if it did not have the correct cluster controller interface when recruited. [6.3.4] `(PR #3537) <https://github.com/apple/foundationdb/pull/3537>`_
|
||||
|
||||
Status
|
||||
------
|
||||
|
|
|
@ -233,7 +233,7 @@ struct MutationFilesReadProgress : public ReferenceCounted<MutationFilesReadProg
|
|||
|
||||
void dumpProgress(std::string msg) {
|
||||
std::cout << msg << "\n ";
|
||||
for (const auto fp : fileProgress) {
|
||||
for (const auto& fp : fileProgress) {
|
||||
std::cout << fp->fd->getFilename() << " " << fp->mutations.size() << " mutations";
|
||||
if (fp->mutations.size() > 0) {
|
||||
std::cout << ", range " << fp->mutations[0].version.toString() << " "
|
||||
|
|
|
@ -585,6 +585,7 @@ CSimpleOpt::SOption g_rgBackupListOptions[] = {
|
|||
SO_END_OF_OPTIONS
|
||||
};
|
||||
|
||||
// g_rgRestoreOptions is used by fdbrestore and fastrestore_tool
|
||||
CSimpleOpt::SOption g_rgRestoreOptions[] = {
|
||||
#ifdef _WIN32
|
||||
{ OPT_PARENTPID, "--parentpid", SO_REQ_SEP },
|
||||
|
@ -1022,9 +1023,9 @@ static void printRestoreUsage(bool devhelp ) {
|
|||
printf(" Prefix to add to the restored keys\n");
|
||||
printf(" -n, --dryrun Perform a trial run with no changes made.\n");
|
||||
printf(" --log Enables trace file logging for the CLI session.\n"
|
||||
" --logdir PATH Specifes the output directory for trace files. If\n"
|
||||
" unspecified, defaults to the current directory. Has\n"
|
||||
" no effect unless --log is specified.\n");
|
||||
" --logdir PATH Specifies the output directory for trace files. If\n"
|
||||
" unspecified, defaults to the current directory. Has\n"
|
||||
" no effect unless --log is specified.\n");
|
||||
printf(" --loggroup LOG_GROUP\n"
|
||||
" Sets the LogGroup field with the specified value for all\n"
|
||||
" events in the trace output (defaults to `default').\n");
|
||||
|
@ -1059,43 +1060,10 @@ static void printRestoreUsage(bool devhelp ) {
|
|||
}
|
||||
|
||||
static void printFastRestoreUsage(bool devhelp) {
|
||||
printf("FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n");
|
||||
printf("Usage: %s (start | status | abort | wait) [OPTIONS]\n\n", exeRestore.toString().c_str());
|
||||
// printf(" FOLDERS Paths to folders containing the backup files.\n");
|
||||
printf("Options for all commands:\n\n");
|
||||
printf(" -C CONNFILE The path of a file containing the connection string for the\n"
|
||||
" FoundationDB cluster. The default is first the value of the\n"
|
||||
" FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',\n"
|
||||
" then `%s'.\n",
|
||||
platform::getDefaultClusterFilePath().c_str());
|
||||
printf(" -t TAGNAME The restore tag to act on. Default is 'default'\n");
|
||||
printf(" --tagname TAGNAME\n\n");
|
||||
printf(" Options for start:\n\n");
|
||||
printf(" -r URL The Backup URL for the restore to read from.\n");
|
||||
printBackupContainerInfo();
|
||||
printf(" -w Wait for the restore to complete before exiting. Prints progress updates.\n");
|
||||
printf(" --waitfordone\n");
|
||||
printf(" -k KEYS List of key ranges from the backup to restore\n");
|
||||
printf(" --remove_prefix PREFIX prefix to remove from the restored keys\n");
|
||||
printf(" --add_prefix PREFIX prefix to add to the restored keys\n");
|
||||
printf(" -n, --dry-run Perform a trial run with no changes made.\n");
|
||||
printf(" -v DBVERSION The version at which the database will be restored.\n");
|
||||
printf(" -h, --help Display this help and exit.\n");
|
||||
printf("NOTE: Fast restore is still under development. The options may not be fully supported.\n");
|
||||
|
||||
if (devhelp) {
|
||||
#ifdef _WIN32
|
||||
printf(" -q Disable error dialog on crash.\n");
|
||||
printf(" --parentpid PID\n");
|
||||
printf(" Specify a process after whose termination to exit.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
printf("\n"
|
||||
" KEYS FORMAT: \"<BEGINKEY> <ENDKEY>\" [...]\n");
|
||||
printf("\n");
|
||||
puts(BlobCredentialInfo);
|
||||
|
||||
printf(" NOTE: Fast restore aims to support the same fdbrestore option list.\n");
|
||||
printf(" But fast restore is still under development. The options may not be fully supported.\n");
|
||||
printf(" Supported options are: --dest_cluster_file, -r, --waitfordone, --logdir\n");
|
||||
printRestoreUsage(devhelp);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2210,6 +2178,11 @@ ACTOR Future<Void> runFastRestoreTool(Database db, std::string tagName, std::str
|
|||
|
||||
printf("[INFO] runFastRestoreTool: restore_ranges:%d first range:%s\n", ranges.size(),
|
||||
ranges.front().toString().c_str());
|
||||
TraceEvent ev("FastRestoreTool");
|
||||
ev.detail("RestoreRanges", ranges.size());
|
||||
for (int i = 0; i < ranges.size(); ++i) {
|
||||
ev.detail(format("Range%d", i), ranges[i]);
|
||||
}
|
||||
|
||||
if (performRestore) {
|
||||
if (dbVersion == invalidVersion) {
|
||||
|
@ -3335,7 +3308,7 @@ int main(int argc, char* argv[]) {
|
|||
break;
|
||||
|
||||
case EXE_FASTRESTORE_TOOL:
|
||||
fprintf(stderr, "ERROR: FDB Fast Restore Agent does not support argument value `%s'\n",
|
||||
fprintf(stderr, "ERROR: FDB Fast Restore Tool does not support argument value `%s'\n",
|
||||
args->File(argLoop));
|
||||
printHelpTeaser(argv[0]);
|
||||
return FDB_EXIT_ERROR;
|
||||
|
@ -3751,12 +3724,40 @@ int main(int argc, char* argv[]) {
|
|||
}
|
||||
break;
|
||||
case EXE_FASTRESTORE_TOOL:
|
||||
// TODO: We have not implmented the code commented out in this case
|
||||
if (!initCluster()) return FDB_EXIT_ERROR;
|
||||
// Support --dest_cluster_file option as fdbrestore does
|
||||
if (dryRun) {
|
||||
if (restoreType != RESTORE_START) {
|
||||
fprintf(stderr, "Restore dry run only works for 'start' command\n");
|
||||
return FDB_EXIT_ERROR;
|
||||
}
|
||||
|
||||
// Must explicitly call trace file options handling if not calling Database::createDatabase()
|
||||
initTraceFile();
|
||||
} else {
|
||||
if (restoreClusterFileDest.empty()) {
|
||||
fprintf(stderr, "Restore destination cluster file must be specified explicitly.\n");
|
||||
return FDB_EXIT_ERROR;
|
||||
}
|
||||
|
||||
if (!fileExists(restoreClusterFileDest)) {
|
||||
fprintf(stderr, "Restore destination cluster file '%s' does not exist.\n",
|
||||
restoreClusterFileDest.c_str());
|
||||
return FDB_EXIT_ERROR;
|
||||
}
|
||||
|
||||
try {
|
||||
db = Database::createDatabase(restoreClusterFileDest, Database::API_VERSION_LATEST);
|
||||
} catch (Error& e) {
|
||||
fprintf(stderr, "Restore destination cluster file '%s' invalid: %s\n",
|
||||
restoreClusterFileDest.c_str(), e.what());
|
||||
return FDB_EXIT_ERROR;
|
||||
}
|
||||
}
|
||||
// TODO: We have not implemented the code commented out in this case
|
||||
switch (restoreType) {
|
||||
case RESTORE_START:
|
||||
f = stopAfter(runFastRestoreTool(db, tagName, restoreContainer, backupKeys, restoreVersion, !dryRun,
|
||||
!quietDisplay, waitForDone));
|
||||
!quietDisplay, waitForDone));
|
||||
break;
|
||||
case RESTORE_WAIT:
|
||||
printf("[TODO][ERROR] FastRestore does not support RESTORE_WAIT yet!\n");
|
||||
|
@ -3767,8 +3768,9 @@ int main(int argc, char* argv[]) {
|
|||
printf("[TODO][ERROR] FastRestore does not support RESTORE_ABORT yet!\n");
|
||||
throw restore_error();
|
||||
// f = stopAfter( map(ba.abortRestore(db, KeyRef(tagName)),
|
||||
//[tagName](FileBackupAgent::ERestoreState s) -> Void { printf("Tag: %s State: %s\n", tagName.c_str(),
|
||||
//FileBackupAgent::restoreStateText(s).toString().c_str()); return Void();
|
||||
//[tagName](FileBackupAgent::ERestoreState s) -> Void { printf("Tag: %s State: %s\n",
|
||||
//tagName.c_str(),
|
||||
// FileBackupAgent::restoreStateText(s).toString().c_str()); return Void();
|
||||
// }) );
|
||||
break;
|
||||
case RESTORE_STATUS:
|
||||
|
@ -3848,7 +3850,8 @@ int main(int argc, char* argv[]) {
|
|||
<< FastAllocator<1024>::pageCount << " "
|
||||
<< FastAllocator<2048>::pageCount << " "
|
||||
<< FastAllocator<4096>::pageCount << " "
|
||||
<< FastAllocator<8192>::pageCount << endl;
|
||||
<< FastAllocator<8192>::pageCount << " "
|
||||
<< FastAllocator<16384>::pageCount << endl;
|
||||
|
||||
vector< std::pair<std::string, const char*> > typeNames;
|
||||
for( auto i = allocInstr.begin(); i != allocInstr.end(); ++i ) {
|
||||
|
|
|
@ -556,6 +556,10 @@ void initHelp() {
|
|||
"kill all|list|<ADDRESS...>",
|
||||
"attempts to kill one or more processes in the cluster",
|
||||
"If no addresses are specified, populates the list of processes which can be killed. Processes cannot be killed before this list has been populated.\n\nIf `all' is specified, attempts to kill all known processes.\n\nIf `list' is specified, displays all known processes. This is only useful when the database is unresponsive.\n\nFor each IP:port pair in <ADDRESS ...>, attempt to kill the specified process.");
|
||||
helpMap["suspend"] = CommandHelp(
|
||||
"suspend <SECONDS> <ADDRESS...>",
|
||||
"attempts to suspend one or more processes in the cluster",
|
||||
"If no parameters are specified, populates the list of processes which can be suspended. Processes cannot be suspended before this list has been populated.\n\nFor each IP:port pair in <ADDRESS...>, attempt to suspend the processes for the specified SECONDS after which the process will die.");
|
||||
helpMap["profile"] = CommandHelp(
|
||||
"profile <client|list|flow|heap> <action> <ARGS>",
|
||||
"namespace for all the profiling-related commands.",
|
||||
|
@ -3391,6 +3395,59 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "suspend")) {
|
||||
getTransaction(db, tr, options, intrans);
|
||||
if (tokens.size() == 1) {
|
||||
Standalone<RangeResultRef> kvs = wait( makeInterruptable( tr->getRange(KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces"), LiteralStringRef("\xff\xff\xff")), 1) ) );
|
||||
Reference<FlowLock> connectLock(new FlowLock(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM));
|
||||
std::vector<Future<Void>> addInterfs;
|
||||
for( auto it : kvs ) {
|
||||
addInterfs.push_back(addInterface(&address_interface, connectLock, it));
|
||||
}
|
||||
wait( waitForAll(addInterfs) );
|
||||
if(address_interface.size() == 0) {
|
||||
printf("\nNo addresses can be suspended.\n");
|
||||
} else if(address_interface.size() == 1) {
|
||||
printf("\nThe following address can be suspended:\n");
|
||||
} else {
|
||||
printf("\nThe following %zu addresses can be suspended:\n", address_interface.size());
|
||||
}
|
||||
for( auto it : address_interface ) {
|
||||
printf("%s\n", printable(it.first).c_str());
|
||||
}
|
||||
printf("\n");
|
||||
} else if(tokens.size() == 2) {
|
||||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else {
|
||||
for(int i = 2; i < tokens.size(); i++) {
|
||||
if(!address_interface.count(tokens[i])) {
|
||||
printf("ERROR: process `%s' not recognized.\n", printable(tokens[i]).c_str());
|
||||
is_error = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!is_error) {
|
||||
double seconds;
|
||||
int n=0;
|
||||
auto secondsStr = tokens[1].toString();
|
||||
if (sscanf(secondsStr.c_str(), "%lf%n", &seconds, &n) != 1 || n != secondsStr.size()) {
|
||||
printUsage(tokens[0]);
|
||||
is_error = true;
|
||||
} else {
|
||||
int64_t timeout_ms = seconds*1000;
|
||||
tr->setOption(FDBTransactionOptions::TIMEOUT, StringRef((uint8_t *)&timeout_ms, sizeof(int64_t)));
|
||||
for(int i = 2; i < tokens.size(); i++) {
|
||||
tr->set(LiteralStringRef("\xff\xff/suspend_worker"), address_interface[tokens[i]].first);
|
||||
}
|
||||
printf("Attempted to suspend %zu processes\n", tokens.size() - 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tokencmp(tokens[0], "force_recovery_with_data_loss")) {
|
||||
if(tokens.size() != 2) {
|
||||
printUsage(tokens[0]);
|
||||
|
|
|
@ -59,7 +59,7 @@ public:
|
|||
virtual void delref() { ReferenceCounted<AsyncFileBlobStoreWrite>::delref(); }
|
||||
|
||||
struct Part : ReferenceCounted<Part> {
|
||||
Part(int n) : number(n), writer(content.getWriteBuffer(), NULL, Unversioned()), length(0) {
|
||||
Part(int n, int minSize) : number(n), writer(content.getWriteBuffer(minSize), NULL, Unversioned()), length(0) {
|
||||
etag = std::string();
|
||||
::MD5_Init(&content_md5_buf);
|
||||
}
|
||||
|
@ -231,7 +231,7 @@ private:
|
|||
|
||||
// Make a new part to write to
|
||||
if(startNew)
|
||||
f->m_parts.push_back(Reference<Part>(new Part(f->m_parts.size() + 1)));
|
||||
f->m_parts.push_back(Reference<Part>(new Part(f->m_parts.size() + 1, f->m_bstore->knobs.multipart_min_part_size)));
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
@ -247,7 +247,7 @@ public:
|
|||
: m_bstore(bstore), m_bucket(bucket), m_object(object), m_cursor(0), m_concurrentUploads(bstore->knobs.concurrent_writes_per_file) {
|
||||
|
||||
// Add first part
|
||||
m_parts.push_back(Reference<Part>(new Part(1)));
|
||||
m_parts.push_back(Reference<Part>(new Part(1, m_bstore->knobs.multipart_min_part_size)));
|
||||
}
|
||||
|
||||
};
|
||||
|
|
|
@ -908,8 +908,8 @@ struct StringRefReader {
|
|||
|
||||
// Functions for consuming big endian (network byte order) integers.
|
||||
// Consumes a big endian number, swaps it to little endian, and returns it.
|
||||
const int32_t consumeNetworkInt32() { return (int32_t)bigEndian32((uint32_t)consume<int32_t>()); }
|
||||
const uint32_t consumeNetworkUInt32() { return bigEndian32(consume<uint32_t>()); }
|
||||
int32_t consumeNetworkInt32() { return (int32_t)bigEndian32((uint32_t)consume<int32_t>()); }
|
||||
uint32_t consumeNetworkUInt32() { return bigEndian32(consume<uint32_t>()); }
|
||||
|
||||
// Convert big Endian value (e.g., encoded in log file) into a littleEndian uint64_t value.
|
||||
int64_t consumeNetworkInt64() { return (int64_t)bigEndian64((uint32_t)consume<int64_t>()); }
|
||||
|
|
|
@ -842,10 +842,18 @@ public:
|
|||
|
||||
state std::vector<LogFile> logs;
|
||||
state std::vector<LogFile> plogs;
|
||||
TraceEvent("BackupContainerListFiles").detail("URL", bc->getURL());
|
||||
|
||||
wait(store(logs, bc->listLogFiles(scanBegin, scanEnd, false)) &&
|
||||
store(plogs, bc->listLogFiles(scanBegin, scanEnd, true)) &&
|
||||
store(desc.snapshots, bc->listKeyspaceSnapshots()));
|
||||
|
||||
TraceEvent("BackupContainerListFiles")
|
||||
.detail("URL", bc->getURL())
|
||||
.detail("LogFiles", logs.size())
|
||||
.detail("PLogsFiles", plogs.size())
|
||||
.detail("Snapshots", desc.snapshots.size());
|
||||
|
||||
if (plogs.size() > 0) {
|
||||
desc.partitioned = true;
|
||||
logs.swap(plogs);
|
||||
|
@ -1207,7 +1215,7 @@ public:
|
|||
}
|
||||
|
||||
// for each range in tags, check all tags from 1 are continouous
|
||||
for (const auto [beginEnd, count] : tags) {
|
||||
for (const auto& [beginEnd, count] : tags) {
|
||||
for (int i = 1; i < count; i++) {
|
||||
if (!isContinuous(files, tagIndices[i], beginEnd.first, std::min(beginEnd.second - 1, end), nullptr)) {
|
||||
TraceEvent(SevWarn, "BackupFileNotContinuous")
|
||||
|
@ -1310,7 +1318,7 @@ public:
|
|||
|
||||
// for each range in tags, check all partitions from 1 are continouous
|
||||
Version lastEnd = begin;
|
||||
for (const auto [beginEnd, count] : tags) {
|
||||
for (const auto& [beginEnd, count] : tags) {
|
||||
Version tagEnd = beginEnd.second; // This range's minimum continous partition version
|
||||
for (int i = 1; i < count; i++) {
|
||||
std::map<std::pair<Version, Version>, int> rangeTags;
|
||||
|
@ -1611,7 +1619,7 @@ public:
|
|||
std::string uniquePath = fullPath + "." + deterministicRandom()->randomUniqueID().toString() + ".lnk";
|
||||
unlink(uniquePath.c_str());
|
||||
ASSERT(symlink(basename(path).c_str(), uniquePath.c_str()) == 0);
|
||||
fullPath = uniquePath = uniquePath;
|
||||
fullPath = uniquePath;
|
||||
}
|
||||
// Opening cached mode forces read/write mode at a lower level, overriding the readonly request. So cached mode
|
||||
// can't be used because backup files are read-only. Cached mode can only help during restore task retries handled
|
||||
|
|
|
@ -1057,7 +1057,7 @@ ACTOR Future<Void> writeEntireFileFromBuffer_impl(Reference<BlobStoreEndpoint> b
|
|||
|
||||
ACTOR Future<Void> writeEntireFile_impl(Reference<BlobStoreEndpoint> bstore, std::string bucket, std::string object, std::string content) {
|
||||
state UnsentPacketQueue packets;
|
||||
PacketWriter pw(packets.getWriteBuffer(), NULL, Unversioned());
|
||||
PacketWriter pw(packets.getWriteBuffer(content.size()), NULL, Unversioned());
|
||||
pw.serializeBytes(content);
|
||||
if(content.size() > bstore->knobs.multipart_max_part_size)
|
||||
throw file_too_large();
|
||||
|
@ -1180,7 +1180,7 @@ ACTOR Future<Void> finishMultiPartUpload_impl(Reference<BlobStoreEndpoint> bstor
|
|||
|
||||
std::string resource = format("/%s/%s?uploadId=%s", bucket.c_str(), object.c_str(), uploadID.c_str());
|
||||
HTTP::Headers headers;
|
||||
PacketWriter pw(part_list.getWriteBuffer(), NULL, Unversioned());
|
||||
PacketWriter pw(part_list.getWriteBuffer(manifest.size()), NULL, Unversioned());
|
||||
pw.serializeBytes(manifest);
|
||||
Reference<HTTP::Response> r = wait(bstore->doRequest("POST", resource, headers, &part_list, manifest.size(), {200}));
|
||||
// TODO: In the event that the client times out just before the request completes (so the client is unaware) then the next retry
|
||||
|
|
|
@ -352,9 +352,6 @@ namespace HTTP {
|
|||
send_start = timer();
|
||||
|
||||
loop {
|
||||
wait(conn->onWritable());
|
||||
wait( delay( 0, TaskPriority::WriteSocket ) );
|
||||
|
||||
// If we already got a response, before finishing sending the request, then close the connection,
|
||||
// set the Connection header to "close" as a hint to the caller that this connection can't be used
|
||||
// again, and break out of the send loop.
|
||||
|
@ -375,6 +372,9 @@ namespace HTTP {
|
|||
pContent->sent(len);
|
||||
if(pContent->empty())
|
||||
break;
|
||||
|
||||
wait(conn->onWritable());
|
||||
wait(yield(TaskPriority::WriteSocket));
|
||||
}
|
||||
|
||||
wait(responseReading);
|
||||
|
|
|
@ -89,7 +89,7 @@ void ClientKnobs::initialize(bool randomize) {
|
|||
init( STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, 15.0 );
|
||||
init( AGGREGATE_HEALTH_METRICS_MAX_STALENESS, 0.5 );
|
||||
init( DETAILED_HEALTH_METRICS_MAX_STALENESS, 5.0 );
|
||||
init( TAG_ENCODE_KEY_SERVERS, false ); if( randomize && BUGGIFY ) TAG_ENCODE_KEY_SERVERS = true;
|
||||
init( TAG_ENCODE_KEY_SERVERS, true ); if( randomize && BUGGIFY ) TAG_ENCODE_KEY_SERVERS = false;
|
||||
|
||||
//KeyRangeMap
|
||||
init( KRM_GET_RANGE_LIMIT, 1e5 ); if( randomize && BUGGIFY ) KRM_GET_RANGE_LIMIT = 10;
|
||||
|
|
|
@ -435,7 +435,7 @@ struct ProxySnapRequest
|
|||
{
|
||||
constexpr static FileIdentifier file_identifier = 5427684;
|
||||
Arena arena;
|
||||
StringRef snapPayload;
|
||||
StringRef snapPayload; // command used to snapshot the data folder
|
||||
UID snapUID;
|
||||
ReplyPromise<Void> reply;
|
||||
Optional<UID> debugID;
|
||||
|
|
|
@ -767,15 +767,16 @@ void MultiVersionDatabase::Connector::connect() {
|
|||
}
|
||||
|
||||
tr = candidateDatabase->createTransaction();
|
||||
return ErrorOr<ThreadFuture<Void>>(mapThreadFuture<Version, Void>(tr->getReadVersion(), [this](ErrorOr<Version> v) {
|
||||
// If the version attempt returns an error, we regard that as a connection (except operation_cancelled)
|
||||
if(v.isError() && v.getError().code() == error_code_operation_cancelled) {
|
||||
return ErrorOr<Void>(v.getError());
|
||||
}
|
||||
else {
|
||||
return ErrorOr<Void>(Void());
|
||||
}
|
||||
}));
|
||||
return ErrorOr<ThreadFuture<Void>>(
|
||||
mapThreadFuture<Version, Void>(tr->getReadVersion(), [](ErrorOr<Version> v) {
|
||||
// If the version attempt returns an error, we regard that as a connection (except
|
||||
// operation_cancelled)
|
||||
if (v.isError() && v.getError().code() == error_code_operation_cancelled) {
|
||||
return ErrorOr<Void>(v.getError());
|
||||
} else {
|
||||
return ErrorOr<Void>(Void());
|
||||
}
|
||||
}));
|
||||
});
|
||||
|
||||
|
||||
|
@ -1045,7 +1046,7 @@ void MultiVersionApi::setSupportedClientVersions(Standalone<StringRef> versions)
|
|||
}, NULL);
|
||||
|
||||
if(!bypassMultiClientApi) {
|
||||
runOnExternalClients([this, versions](Reference<ClientInfo> client){
|
||||
runOnExternalClients([versions](Reference<ClientInfo> client) {
|
||||
client->api->setNetworkOption(FDBNetworkOptions::SUPPORTED_CLIENT_VERSIONS, versions);
|
||||
});
|
||||
}
|
||||
|
@ -1105,9 +1106,8 @@ void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option,
|
|||
|
||||
if(!bypassMultiClientApi) {
|
||||
if(networkSetup) {
|
||||
runOnExternalClients([this, option, value](Reference<ClientInfo> client) {
|
||||
client->api->setNetworkOption(option, value);
|
||||
});
|
||||
runOnExternalClients(
|
||||
[option, value](Reference<ClientInfo> client) { client->api->setNetworkOption(option, value); });
|
||||
}
|
||||
else {
|
||||
options.push_back(std::make_pair(option, value.castTo<Standalone<StringRef>>()));
|
||||
|
|
|
@ -1755,13 +1755,19 @@ void ReadYourWritesTransaction::atomicOp( const KeyRef& key, const ValueRef& ope
|
|||
}
|
||||
|
||||
void ReadYourWritesTransaction::set( const KeyRef& key, const ValueRef& value ) {
|
||||
if (key == LiteralStringRef("\xff\xff/reboot_worker")){
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(value, IncludeVersion()).reboot.send( RebootRequest() );
|
||||
return;
|
||||
}
|
||||
if (key == LiteralStringRef("\xff\xff/reboot_and_check_worker")){
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(value, IncludeVersion()).reboot.send( RebootRequest(false, true) );
|
||||
return;
|
||||
if (key.startsWith(systemKeys.end)) {
|
||||
if (key == LiteralStringRef("\xff\xff/reboot_worker")){
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(value, IncludeVersion()).reboot.send( RebootRequest() );
|
||||
return;
|
||||
}
|
||||
if (key == LiteralStringRef("\xff\xff/suspend_worker")){
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(value, IncludeVersion()).reboot.send( RebootRequest(false, false, options.timeoutInSeconds) );
|
||||
return;
|
||||
}
|
||||
if (key == LiteralStringRef("\xff\xff/reboot_and_check_worker")){
|
||||
BinaryReader::fromStringRef<ClientWorkerInterface>(value, IncludeVersion()).reboot.send( RebootRequest(false, true) );
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (key == metadataVersionKey) {
|
||||
throw client_invalid_operation();
|
||||
|
|
|
@ -27,18 +27,21 @@
|
|||
#elif !defined(FDBCLIENT_RUNTRANSACTION_ACTOR_H)
|
||||
#define FDBCLIENT_RUNTRANSACTION_ACTOR_H
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "flow/flow.h"
|
||||
#include "fdbclient/ReadYourWrites.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
ACTOR template < class Function >
|
||||
Future<decltype(fake<Function>()(Reference<ReadYourWritesTransaction>()).getValue())>
|
||||
runRYWTransaction(Database cx, Function func) {
|
||||
ACTOR template <class Function>
|
||||
Future<decltype(std::declval<Function>()(Reference<ReadYourWritesTransaction>()).getValue())> runRYWTransaction(
|
||||
Database cx, Function func) {
|
||||
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
||||
loop{
|
||||
try {
|
||||
// func should be idempodent; otherwise, retry will get undefined result
|
||||
state decltype( fake<Function>()( Reference<ReadYourWritesTransaction>() ).getValue()) result = wait(func(tr));
|
||||
state decltype(std::declval<Function>()(Reference<ReadYourWritesTransaction>()).getValue()) result =
|
||||
wait(func(tr));
|
||||
wait(tr->commit());
|
||||
return result;
|
||||
}
|
||||
|
@ -48,13 +51,14 @@ runRYWTransaction(Database cx, Function func) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR template < class Function >
|
||||
Future<decltype(fake<Function>()(Reference<ReadYourWritesTransaction>()).getValue())>
|
||||
ACTOR template <class Function>
|
||||
Future<decltype(std::declval<Function>()(Reference<ReadYourWritesTransaction>()).getValue())>
|
||||
runRYWTransactionFailIfLocked(Database cx, Function func) {
|
||||
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
||||
loop{
|
||||
try {
|
||||
state decltype( fake<Function>()( Reference<ReadYourWritesTransaction>() ).getValue()) result = wait(func(tr));
|
||||
state decltype(std::declval<Function>()(Reference<ReadYourWritesTransaction>()).getValue()) result =
|
||||
wait(func(tr));
|
||||
wait(tr->commit());
|
||||
return result;
|
||||
}
|
||||
|
@ -66,11 +70,11 @@ runRYWTransactionFailIfLocked(Database cx, Function func) {
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR template < class Function >
|
||||
Future<decltype(fake<Function>()(Reference<ReadYourWritesTransaction>()).getValue())>
|
||||
runRYWTransactionNoRetry(Database cx, Function func) {
|
||||
ACTOR template <class Function>
|
||||
Future<decltype(std::declval<Function>()(Reference<ReadYourWritesTransaction>()).getValue())> runRYWTransactionNoRetry(
|
||||
Database cx, Function func) {
|
||||
state Reference<ReadYourWritesTransaction> tr(new ReadYourWritesTransaction(cx));
|
||||
state decltype(fake<Function>()(Reference<ReadYourWritesTransaction>()).getValue()) result = wait(func(tr));
|
||||
state decltype(std::declval<Function>()(Reference<ReadYourWritesTransaction>()).getValue()) result = wait(func(tr));
|
||||
wait(tr->commit());
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -338,13 +338,10 @@ Future<Standalone<RangeResultRef>> ConflictingKeysImpl::getRange(ReadYourWritesT
|
|||
if (beginIter->begin() != kr.begin) ++beginIter;
|
||||
auto endIter = krMapPtr->rangeContaining(kr.end);
|
||||
for (auto it = beginIter; it != endIter; ++it) {
|
||||
// it->begin() is stored in the CoalescedKeyRangeMap in TransactionInfo
|
||||
// it->value() is always constants in SystemData.cpp
|
||||
// Thus, push_back() can be used
|
||||
result.push_back(result.arena(), KeyValueRef(it->begin(), it->value()));
|
||||
result.push_back_deep(result.arena(), KeyValueRef(it->begin(), it->value()));
|
||||
}
|
||||
if (endIter->begin() != kr.end)
|
||||
result.push_back(result.arena(), KeyValueRef(endIter->begin(), endIter->value()));
|
||||
result.push_back_deep(result.arena(), KeyValueRef(endIter->begin(), endIter->value()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -339,7 +339,7 @@ namespace ThrottleApi {
|
|||
loop {
|
||||
try {
|
||||
Optional<Value> value = wait(tr.get(tagThrottleAutoEnabledKey));
|
||||
if(!value.present() || (enabled && value.get() != LiteralStringRef("1") || (!enabled && value.get() != LiteralStringRef("0")))) {
|
||||
if (!value.present() || (enabled && value.get() != LiteralStringRef("1")) || (!enabled && value.get() != LiteralStringRef("0"))) {
|
||||
tr.set(tagThrottleAutoEnabledKey, LiteralStringRef(enabled ? "1" : "0"));
|
||||
signalThrottleChange(tr);
|
||||
|
||||
|
@ -352,4 +352,4 @@ namespace ThrottleApi {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -409,12 +409,16 @@ ACTOR Future<Void> connectionWriter( Reference<Peer> self, Reference<IConnection
|
|||
loop {
|
||||
lastWriteTime = now();
|
||||
|
||||
int sent = conn->write(self->unsent.getUnsent(), /* limit= */ FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
|
||||
if (sent) {
|
||||
int sent = conn->write(self->unsent.getUnsent(), FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
|
||||
|
||||
if (sent != 0) {
|
||||
self->transport->bytesSent += sent;
|
||||
self->unsent.sent(sent);
|
||||
}
|
||||
if (self->unsent.empty()) break;
|
||||
|
||||
if (self->unsent.empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
TEST(true); // We didn't write everything, so apparently the write buffer is full. Wait for it to be nonfull.
|
||||
wait( conn->onWritable() );
|
||||
|
|
|
@ -184,7 +184,7 @@ TEST_CASE("/BackupProgress/Unfinished") {
|
|||
std::map<std::tuple<LogEpoch, Version, int>, std::map<Tag, Version>> unfinished = progress.getUnfinishedBackup();
|
||||
|
||||
ASSERT(unfinished.size() == 1);
|
||||
for (const auto [epochVersionCount, tagVersion] : unfinished) {
|
||||
for (const auto& [epochVersionCount, tagVersion] : unfinished) {
|
||||
ASSERT(std::get<0>(epochVersionCount) == epoch1 && std::get<1>(epochVersionCount) == end1 &&
|
||||
std::get<2>(epochVersionCount) == 1);
|
||||
ASSERT(tagVersion.size() == 1 && tagVersion.begin()->first == tag1 && tagVersion.begin()->second == begin1);
|
||||
|
@ -195,11 +195,11 @@ TEST_CASE("/BackupProgress/Unfinished") {
|
|||
progress.addBackupStatus(status1);
|
||||
unfinished = progress.getUnfinishedBackup();
|
||||
ASSERT(unfinished.size() == 1);
|
||||
for (const auto [epochVersionCount, tagVersion] : unfinished) {
|
||||
for (const auto& [epochVersionCount, tagVersion] : unfinished) {
|
||||
ASSERT(std::get<0>(epochVersionCount) == epoch1 && std::get<1>(epochVersionCount) == end1 &&
|
||||
std::get<2>(epochVersionCount) == 1);
|
||||
ASSERT(tagVersion.size() == 1 && tagVersion.begin()->first == tag1 && tagVersion.begin()->second == saved1 + 1);
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,8 +47,8 @@ struct VersionedMessage {
|
|||
|
||||
VersionedMessage(LogMessageVersion v, StringRef m, const VectorRef<Tag>& t, const Arena& a)
|
||||
: version(v), message(m), tags(t), arena(a), bytes(a.getSize()) {}
|
||||
const Version getVersion() const { return version.version; }
|
||||
const uint32_t getSubVersion() const { return version.sub; }
|
||||
Version getVersion() const { return version.version; }
|
||||
uint32_t getSubVersion() const { return version.sub; }
|
||||
|
||||
// Returns true if the message is a mutation that should be backuped, i.e.,
|
||||
// either key is not in system key space or is not a metadataVersionKey.
|
||||
|
@ -369,7 +369,7 @@ struct BackupData {
|
|||
bool modified = false;
|
||||
bool minVersionChanged = false;
|
||||
Version minVersion = std::numeric_limits<Version>::max();
|
||||
for (const auto [uid, version] : uidVersions) {
|
||||
for (const auto& [uid, version] : uidVersions) {
|
||||
auto it = backups.find(uid);
|
||||
if (it == backups.end()) {
|
||||
modified = true;
|
||||
|
|
|
@ -142,6 +142,7 @@ set(FDBSERVER_SRCS
|
|||
workloads/DDMetricsExclude.actor.cpp
|
||||
workloads/DiskDurability.actor.cpp
|
||||
workloads/DiskDurabilityTest.actor.cpp
|
||||
workloads/Downgrade.actor.cpp
|
||||
workloads/DummyWorkload.actor.cpp
|
||||
workloads/ExternalWorkload.actor.cpp
|
||||
workloads/FastTriggeredWatches.actor.cpp
|
||||
|
|
|
@ -159,15 +159,16 @@ public:
|
|||
|
||||
// TeamCollection's server team info.
|
||||
class TCTeamInfo : public ReferenceCounted<TCTeamInfo>, public IDataDistributionTeam {
|
||||
public:
|
||||
vector< Reference<TCServerInfo> > servers;
|
||||
vector<UID> serverIDs;
|
||||
Reference<TCMachineTeamInfo> machineTeam;
|
||||
Future<Void> tracker;
|
||||
bool healthy;
|
||||
bool wrongConfiguration; //True if any of the servers in the team have the wrong configuration
|
||||
int priority;
|
||||
|
||||
public:
|
||||
Reference<TCMachineTeamInfo> machineTeam;
|
||||
Future<Void> tracker;
|
||||
|
||||
explicit TCTeamInfo(vector<Reference<TCServerInfo>> const& servers)
|
||||
: servers(servers), healthy(true), priority(SERVER_KNOBS->PRIORITY_TEAM_HEALTHY), wrongConfiguration(false) {
|
||||
if (servers.empty()) {
|
||||
|
@ -179,21 +180,19 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
virtual vector<StorageServerInterface> getLastKnownServerInterfaces() {
|
||||
vector<StorageServerInterface> v;
|
||||
v.reserve(servers.size());
|
||||
for(int i=0; i<servers.size(); i++)
|
||||
v.push_back(servers[i]->lastKnownInterface);
|
||||
vector<StorageServerInterface> getLastKnownServerInterfaces() const override {
|
||||
vector<StorageServerInterface> v(servers.size());
|
||||
for (const auto& server : servers) v.push_back(server->lastKnownInterface);
|
||||
return v;
|
||||
}
|
||||
virtual int size() {
|
||||
int size() const override {
|
||||
ASSERT(servers.size() == serverIDs.size());
|
||||
return servers.size();
|
||||
}
|
||||
virtual vector<UID> const& getServerIDs() { return serverIDs; }
|
||||
vector<UID> const& getServerIDs() const override { return serverIDs; }
|
||||
const vector<Reference<TCServerInfo>>& getServers() { return servers; }
|
||||
|
||||
virtual std::string getServerIDsStr() {
|
||||
std::string getServerIDsStr() const {
|
||||
std::stringstream ss;
|
||||
|
||||
if (serverIDs.empty()) return "[unset]";
|
||||
|
@ -205,18 +204,18 @@ public:
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
virtual void addDataInFlightToTeam( int64_t delta ) {
|
||||
void addDataInFlightToTeam(int64_t delta) override {
|
||||
for(int i=0; i<servers.size(); i++)
|
||||
servers[i]->dataInFlightToServer += delta;
|
||||
}
|
||||
virtual int64_t getDataInFlightToTeam() {
|
||||
int64_t getDataInFlightToTeam() const override {
|
||||
int64_t dataInFlight = 0.0;
|
||||
for(int i=0; i<servers.size(); i++)
|
||||
dataInFlight += servers[i]->dataInFlightToServer;
|
||||
return dataInFlight;
|
||||
}
|
||||
|
||||
virtual int64_t getLoadBytes( bool includeInFlight = true, double inflightPenalty = 1.0 ) {
|
||||
int64_t getLoadBytes(bool includeInFlight = true, double inflightPenalty = 1.0) const override {
|
||||
int64_t physicalBytes = getLoadAverage();
|
||||
double minAvailableSpaceRatio = getMinAvailableSpaceRatio(includeInFlight);
|
||||
int64_t inFlightBytes = includeInFlight ? getDataInFlightToTeam() / servers.size() : 0;
|
||||
|
@ -233,18 +232,18 @@ public:
|
|||
return (physicalBytes + (inflightPenalty*inFlightBytes)) * availableSpaceMultiplier;
|
||||
}
|
||||
|
||||
virtual int64_t getMinAvailableSpace( bool includeInFlight = true ) {
|
||||
int64_t getMinAvailableSpace(bool includeInFlight = true) const override {
|
||||
int64_t minAvailableSpace = std::numeric_limits<int64_t>::max();
|
||||
for(int i=0; i<servers.size(); i++) {
|
||||
if( servers[i]->serverMetrics.present() ) {
|
||||
auto& replyValue = servers[i]->serverMetrics.get();
|
||||
for (const auto& server : servers) {
|
||||
if (server->serverMetrics.present()) {
|
||||
auto& replyValue = server->serverMetrics.get();
|
||||
|
||||
ASSERT(replyValue.available.bytes >= 0);
|
||||
ASSERT(replyValue.capacity.bytes >= 0);
|
||||
|
||||
int64_t bytesAvailable = replyValue.available.bytes;
|
||||
if(includeInFlight) {
|
||||
bytesAvailable -= servers[i]->dataInFlightToServer;
|
||||
bytesAvailable -= server->dataInFlightToServer;
|
||||
}
|
||||
|
||||
minAvailableSpace = std::min(bytesAvailable, minAvailableSpace);
|
||||
|
@ -254,18 +253,18 @@ public:
|
|||
return minAvailableSpace; // Could be negative
|
||||
}
|
||||
|
||||
virtual double getMinAvailableSpaceRatio( bool includeInFlight = true ) {
|
||||
double getMinAvailableSpaceRatio(bool includeInFlight = true) const override {
|
||||
double minRatio = 1.0;
|
||||
for(int i=0; i<servers.size(); i++) {
|
||||
if( servers[i]->serverMetrics.present() ) {
|
||||
auto& replyValue = servers[i]->serverMetrics.get();
|
||||
for (const auto& server : servers) {
|
||||
if (server->serverMetrics.present()) {
|
||||
auto& replyValue = server->serverMetrics.get();
|
||||
|
||||
ASSERT(replyValue.available.bytes >= 0);
|
||||
ASSERT(replyValue.capacity.bytes >= 0);
|
||||
|
||||
int64_t bytesAvailable = replyValue.available.bytes;
|
||||
if(includeInFlight) {
|
||||
bytesAvailable = std::max((int64_t)0, bytesAvailable - servers[i]->dataInFlightToServer);
|
||||
bytesAvailable = std::max((int64_t)0, bytesAvailable - server->dataInFlightToServer);
|
||||
}
|
||||
|
||||
if(replyValue.capacity.bytes == 0)
|
||||
|
@ -278,29 +277,27 @@ public:
|
|||
return minRatio;
|
||||
}
|
||||
|
||||
virtual bool hasHealthyAvailableSpace(double minRatio) {
|
||||
bool hasHealthyAvailableSpace(double minRatio) const override {
|
||||
return getMinAvailableSpaceRatio() >= minRatio && getMinAvailableSpace() > SERVER_KNOBS->MIN_AVAILABLE_SPACE;
|
||||
}
|
||||
|
||||
virtual Future<Void> updateStorageMetrics() {
|
||||
return doUpdateStorageMetrics( this );
|
||||
}
|
||||
Future<Void> updateStorageMetrics() override { return doUpdateStorageMetrics(this); }
|
||||
|
||||
virtual bool isOptimal() {
|
||||
for(int i=0; i<servers.size(); i++) {
|
||||
if( servers[i]->lastKnownClass.machineClassFitness( ProcessClass::Storage ) > ProcessClass::UnsetFit ) {
|
||||
bool isOptimal() const override {
|
||||
for (const auto& server : servers) {
|
||||
if (server->lastKnownClass.machineClassFitness(ProcessClass::Storage) > ProcessClass::UnsetFit) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool isWrongConfiguration() { return wrongConfiguration; }
|
||||
virtual void setWrongConfiguration(bool wrongConfiguration) { this->wrongConfiguration = wrongConfiguration; }
|
||||
virtual bool isHealthy() { return healthy; }
|
||||
virtual void setHealthy(bool h) { healthy = h; }
|
||||
virtual int getPriority() { return priority; }
|
||||
virtual void setPriority(int p) { priority = p; }
|
||||
bool isWrongConfiguration() const override { return wrongConfiguration; }
|
||||
void setWrongConfiguration(bool wrongConfiguration) override { this->wrongConfiguration = wrongConfiguration; }
|
||||
bool isHealthy() const override { return healthy; }
|
||||
void setHealthy(bool h) override { healthy = h; }
|
||||
int getPriority() const override { return priority; }
|
||||
void setPriority(int p) override { priority = p; }
|
||||
virtual void addref() { ReferenceCounted<TCTeamInfo>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<TCTeamInfo>::delref(); }
|
||||
|
||||
|
@ -313,7 +310,7 @@ public:
|
|||
|
||||
private:
|
||||
// Calculate an "average" of the metrics replies that we received. Penalize teams from which we did not receive all replies.
|
||||
int64_t getLoadAverage() {
|
||||
int64_t getLoadAverage() const {
|
||||
int64_t bytesSum = 0;
|
||||
int added = 0;
|
||||
for(int i=0; i<servers.size(); i++)
|
||||
|
|
|
@ -38,28 +38,28 @@ struct RelocateShard {
|
|||
};
|
||||
|
||||
struct IDataDistributionTeam {
|
||||
virtual vector<StorageServerInterface> getLastKnownServerInterfaces() = 0;
|
||||
virtual int size() = 0;
|
||||
virtual vector<UID> const& getServerIDs() = 0;
|
||||
virtual vector<StorageServerInterface> getLastKnownServerInterfaces() const = 0;
|
||||
virtual int size() const = 0;
|
||||
virtual vector<UID> const& getServerIDs() const = 0;
|
||||
virtual void addDataInFlightToTeam( int64_t delta ) = 0;
|
||||
virtual int64_t getDataInFlightToTeam() = 0;
|
||||
virtual int64_t getLoadBytes( bool includeInFlight = true, double inflightPenalty = 1.0 ) = 0;
|
||||
virtual int64_t getMinAvailableSpace( bool includeInFlight = true ) = 0;
|
||||
virtual double getMinAvailableSpaceRatio( bool includeInFlight = true ) = 0;
|
||||
virtual bool hasHealthyAvailableSpace( double minRatio ) = 0;
|
||||
virtual int64_t getDataInFlightToTeam() const = 0;
|
||||
virtual int64_t getLoadBytes(bool includeInFlight = true, double inflightPenalty = 1.0) const = 0;
|
||||
virtual int64_t getMinAvailableSpace(bool includeInFlight = true) const = 0;
|
||||
virtual double getMinAvailableSpaceRatio(bool includeInFlight = true) const = 0;
|
||||
virtual bool hasHealthyAvailableSpace(double minRatio) const = 0;
|
||||
virtual Future<Void> updateStorageMetrics() = 0;
|
||||
virtual void addref() = 0;
|
||||
virtual void delref() = 0;
|
||||
virtual bool isHealthy() = 0;
|
||||
virtual bool isHealthy() const = 0;
|
||||
virtual void setHealthy(bool) = 0;
|
||||
virtual int getPriority() = 0;
|
||||
virtual int getPriority() const = 0;
|
||||
virtual void setPriority(int) = 0;
|
||||
virtual bool isOptimal() = 0;
|
||||
virtual bool isWrongConfiguration() = 0;
|
||||
virtual bool isOptimal() const = 0;
|
||||
virtual bool isWrongConfiguration() const = 0;
|
||||
virtual void setWrongConfiguration(bool) = 0;
|
||||
virtual void addServers(const vector<UID> &servers) = 0;
|
||||
|
||||
std::string getDesc() {
|
||||
std::string getDesc() const {
|
||||
const auto& servers = getLastKnownServerInterfaces();
|
||||
std::string s = format("Size %d; ", servers.size());
|
||||
for(int i=0; i<servers.size(); i++) {
|
||||
|
|
|
@ -83,61 +83,53 @@ struct RelocateData {
|
|||
};
|
||||
|
||||
class ParallelTCInfo : public ReferenceCounted<ParallelTCInfo>, public IDataDistributionTeam {
|
||||
public:
|
||||
vector<Reference<IDataDistributionTeam>> teams;
|
||||
vector<UID> tempServerIDs;
|
||||
|
||||
ParallelTCInfo() { }
|
||||
|
||||
void addTeam(Reference<IDataDistributionTeam> team) {
|
||||
teams.push_back(team);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
teams.clear();
|
||||
}
|
||||
|
||||
int64_t sum(std::function<int64_t(Reference<IDataDistributionTeam>)> func) {
|
||||
int64_t sum(std::function<int64_t(IDataDistributionTeam const&)> func) const {
|
||||
int64_t result = 0;
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
result += func(*it);
|
||||
for (const auto& team : teams) {
|
||||
result += func(*team);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
vector<T> collect(std::function < vector<T>(Reference<IDataDistributionTeam>)> func) {
|
||||
vector<T> result;
|
||||
template <class T>
|
||||
vector<T> collect(std::function<vector<T>(IDataDistributionTeam const&)> func) const {
|
||||
vector<T> result(teams.size());
|
||||
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
vector<T> newItems = func(*it);
|
||||
for (const auto& team : teams) {
|
||||
vector<T> newItems = func(*team);
|
||||
result.insert(result.end(), newItems.begin(), newItems.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool any(std::function<bool(Reference<IDataDistributionTeam>)> func) {
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
if (func(*it)) {
|
||||
bool any(std::function<bool(IDataDistributionTeam const&)> func) const {
|
||||
for (const auto& team : teams) {
|
||||
if (func(*team)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool all(std::function<bool(Reference<IDataDistributionTeam>)> func) {
|
||||
return !any([func](Reference<IDataDistributionTeam> team) {
|
||||
return !func(team);
|
||||
});
|
||||
public:
|
||||
ParallelTCInfo() = default;
|
||||
|
||||
void addTeam(Reference<IDataDistributionTeam> team) { teams.push_back(team); }
|
||||
|
||||
void clear() { teams.clear(); }
|
||||
|
||||
bool all(std::function<bool(IDataDistributionTeam const&)> func) const {
|
||||
return !any([func](IDataDistributionTeam const& team) { return !func(team); });
|
||||
}
|
||||
|
||||
virtual vector<StorageServerInterface> getLastKnownServerInterfaces() {
|
||||
return collect<StorageServerInterface>([](Reference<IDataDistributionTeam> team) {
|
||||
return team->getLastKnownServerInterfaces();
|
||||
});
|
||||
vector<StorageServerInterface> getLastKnownServerInterfaces() const override {
|
||||
return collect<StorageServerInterface>(
|
||||
[](IDataDistributionTeam const& team) { return team.getLastKnownServerInterfaces(); });
|
||||
}
|
||||
|
||||
virtual int size() {
|
||||
int size() const override {
|
||||
int totalSize = 0;
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
totalSize += (*it)->size();
|
||||
|
@ -145,94 +137,85 @@ public:
|
|||
return totalSize;
|
||||
}
|
||||
|
||||
virtual vector<UID> const& getServerIDs() {
|
||||
vector<UID> const& getServerIDs() const override {
|
||||
static vector<UID> tempServerIDs;
|
||||
tempServerIDs.clear();
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
vector<UID> const& childIDs = (*it)->getServerIDs();
|
||||
for (const auto& team : teams) {
|
||||
vector<UID> const &childIDs = team->getServerIDs();
|
||||
tempServerIDs.insert(tempServerIDs.end(), childIDs.begin(), childIDs.end());
|
||||
}
|
||||
return tempServerIDs;
|
||||
}
|
||||
|
||||
virtual void addDataInFlightToTeam(int64_t delta) {
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
(*it)->addDataInFlightToTeam(delta);
|
||||
void addDataInFlightToTeam(int64_t delta) override {
|
||||
for (auto& team : teams) {
|
||||
team->addDataInFlightToTeam(delta);
|
||||
}
|
||||
}
|
||||
|
||||
virtual int64_t getDataInFlightToTeam() {
|
||||
return sum([](Reference<IDataDistributionTeam> team) {
|
||||
return team->getDataInFlightToTeam();
|
||||
int64_t getDataInFlightToTeam() const override {
|
||||
return sum([](IDataDistributionTeam const& team) { return team.getDataInFlightToTeam(); });
|
||||
}
|
||||
|
||||
int64_t getLoadBytes(bool includeInFlight = true, double inflightPenalty = 1.0) const override {
|
||||
return sum([includeInFlight, inflightPenalty](IDataDistributionTeam const& team) {
|
||||
return team.getLoadBytes(includeInFlight, inflightPenalty);
|
||||
});
|
||||
}
|
||||
|
||||
virtual int64_t getLoadBytes(bool includeInFlight = true, double inflightPenalty = 1.0 ) {
|
||||
return sum([includeInFlight, inflightPenalty](Reference<IDataDistributionTeam> team) {
|
||||
return team->getLoadBytes(includeInFlight, inflightPenalty);
|
||||
});
|
||||
}
|
||||
|
||||
virtual int64_t getMinAvailableSpace(bool includeInFlight = true) {
|
||||
int64_t getMinAvailableSpace(bool includeInFlight = true) const override {
|
||||
int64_t result = std::numeric_limits<int64_t>::max();
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
result = std::min(result, (*it)->getMinAvailableSpace(includeInFlight));
|
||||
for (const auto& team : teams) {
|
||||
result = std::min(result, team->getMinAvailableSpace(includeInFlight));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual double getMinAvailableSpaceRatio(bool includeInFlight = true) {
|
||||
double getMinAvailableSpaceRatio(bool includeInFlight = true) const override {
|
||||
double result = std::numeric_limits<double>::max();
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
result = std::min(result, (*it)->getMinAvailableSpaceRatio(includeInFlight));
|
||||
for (const auto& team : teams) {
|
||||
result = std::min(result, team->getMinAvailableSpaceRatio(includeInFlight));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual bool hasHealthyAvailableSpace(double minRatio) {
|
||||
return all([minRatio](Reference<IDataDistributionTeam> team) {
|
||||
return team->hasHealthyAvailableSpace(minRatio);
|
||||
});
|
||||
bool hasHealthyAvailableSpace(double minRatio) const {
|
||||
return all([minRatio](IDataDistributionTeam const& team) { return team.hasHealthyAvailableSpace(minRatio); });
|
||||
}
|
||||
|
||||
virtual Future<Void> updateStorageMetrics() {
|
||||
vector<Future<Void>> futures;
|
||||
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
futures.push_back((*it)->updateStorageMetrics());
|
||||
for (auto& team : teams) {
|
||||
futures.push_back(team->updateStorageMetrics());
|
||||
}
|
||||
return waitForAll(futures);
|
||||
}
|
||||
|
||||
virtual bool isOptimal() {
|
||||
return all([](Reference<IDataDistributionTeam> team) {
|
||||
return team->isOptimal();
|
||||
});
|
||||
bool isOptimal() const override {
|
||||
return all([](IDataDistributionTeam const& team) { return team.isOptimal(); });
|
||||
}
|
||||
|
||||
virtual bool isWrongConfiguration() {
|
||||
return any([](Reference<IDataDistributionTeam> team) {
|
||||
return team->isWrongConfiguration();
|
||||
});
|
||||
bool isWrongConfiguration() const override {
|
||||
return any([](IDataDistributionTeam const& team) { return team.isWrongConfiguration(); });
|
||||
}
|
||||
virtual void setWrongConfiguration(bool wrongConfiguration) {
|
||||
void setWrongConfiguration(bool wrongConfiguration) override {
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
(*it)->setWrongConfiguration(wrongConfiguration);
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool isHealthy() {
|
||||
return all([](Reference<IDataDistributionTeam> team) {
|
||||
return team->isHealthy();
|
||||
});
|
||||
bool isHealthy() const override {
|
||||
return all([](IDataDistributionTeam const& team) { return team.isHealthy(); });
|
||||
}
|
||||
|
||||
virtual void setHealthy(bool h) {
|
||||
void setHealthy(bool h) override {
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
(*it)->setHealthy(h);
|
||||
}
|
||||
}
|
||||
|
||||
virtual int getPriority() {
|
||||
int getPriority() const override {
|
||||
int priority = 0;
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
priority = std::max(priority, (*it)->getPriority());
|
||||
|
@ -240,7 +223,7 @@ public:
|
|||
return priority;
|
||||
}
|
||||
|
||||
virtual void setPriority(int p) {
|
||||
void setPriority(int p) override {
|
||||
for (auto it = teams.begin(); it != teams.end(); it++) {
|
||||
(*it)->setPriority(p);
|
||||
}
|
||||
|
@ -248,7 +231,7 @@ public:
|
|||
virtual void addref() { ReferenceCounted<ParallelTCInfo>::addref(); }
|
||||
virtual void delref() { ReferenceCounted<ParallelTCInfo>::delref(); }
|
||||
|
||||
virtual void addServers(const std::vector<UID>& servers) {
|
||||
void addServers(const std::vector<UID>& servers) override {
|
||||
ASSERT(!teams.empty());
|
||||
teams[0]->addServers(servers);
|
||||
}
|
||||
|
|
|
@ -218,7 +218,7 @@ public:
|
|||
void dispose() { shutdown(this, true); }
|
||||
void close() { shutdown(this, false); }
|
||||
|
||||
StorageBytes getStorageBytes() {
|
||||
StorageBytes getStorageBytes() const {
|
||||
int64_t free;
|
||||
int64_t total;
|
||||
|
||||
|
@ -789,7 +789,7 @@ public:
|
|||
{
|
||||
}
|
||||
|
||||
virtual location push( StringRef contents ) {
|
||||
location push(StringRef contents) override {
|
||||
ASSERT( recovered );
|
||||
uint8_t const* begin = contents.begin();
|
||||
uint8_t const* end = contents.end();
|
||||
|
@ -807,7 +807,7 @@ public:
|
|||
return endLocation();
|
||||
}
|
||||
|
||||
virtual void pop( location upTo ) {
|
||||
void pop(location upTo) override {
|
||||
ASSERT( !upTo.hi );
|
||||
ASSERT( !recovered || upTo.lo <= endLocation() );
|
||||
|
||||
|
@ -829,14 +829,14 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
virtual Future<Standalone<StringRef>> read(location from, location to, CheckHashes ch) { return read(this, from, to, ch); }
|
||||
|
||||
int getMaxPayload() {
|
||||
return Page::maxPayload;
|
||||
Future<Standalone<StringRef>> read(location from, location to, CheckHashes ch) override {
|
||||
return read(this, from, to, ch);
|
||||
}
|
||||
|
||||
int getMaxPayload() const { return Page::maxPayload; }
|
||||
|
||||
// Always commit an entire page. Commit overhead is the unused space in a to-be-committed page
|
||||
virtual int getCommitOverhead() {
|
||||
int getCommitOverhead() const override {
|
||||
if(!pushedPageCount()) {
|
||||
if(!anyPopped)
|
||||
return 0;
|
||||
|
@ -849,7 +849,7 @@ public:
|
|||
return backPage().remainingCapacity();
|
||||
}
|
||||
|
||||
virtual Future<Void> commit() {
|
||||
Future<Void> commit() override {
|
||||
ASSERT( recovered );
|
||||
if (!pushedPageCount()) {
|
||||
if (!anyPopped) return Void();
|
||||
|
@ -887,30 +887,30 @@ public:
|
|||
rawQueue->stall();
|
||||
}
|
||||
|
||||
virtual Future<bool> initializeRecovery(location recoverAt) { return initializeRecovery( this, recoverAt ); }
|
||||
virtual Future<Standalone<StringRef>> readNext( int bytes ) { return readNext(this, bytes); }
|
||||
Future<bool> initializeRecovery(location recoverAt) override { return initializeRecovery(this, recoverAt); }
|
||||
Future<Standalone<StringRef>> readNext(int bytes) override { return readNext(this, bytes); }
|
||||
|
||||
// FIXME: getNextReadLocation should ASSERT( initialized ), but the memory storage engine needs
|
||||
// to be changed to understand the new intiailizeRecovery protocol.
|
||||
virtual location getNextReadLocation() { return nextReadLocation; }
|
||||
virtual location getNextCommitLocation() { ASSERT( initialized ); return lastCommittedSeq + sizeof(Page); }
|
||||
virtual location getNextPushLocation() { ASSERT( initialized ); return endLocation(); }
|
||||
location getNextReadLocation() const override { return nextReadLocation; }
|
||||
location getNextCommitLocation() const override {
|
||||
ASSERT(initialized);
|
||||
return lastCommittedSeq + sizeof(Page);
|
||||
}
|
||||
location getNextPushLocation() const override {
|
||||
ASSERT(initialized);
|
||||
return endLocation();
|
||||
}
|
||||
|
||||
virtual Future<Void> getError() { return rawQueue->getError(); }
|
||||
virtual Future<Void> onClosed() { return rawQueue->onClosed(); }
|
||||
Future<Void> getError() override { return rawQueue->getError(); }
|
||||
Future<Void> onClosed() override { return rawQueue->onClosed(); }
|
||||
|
||||
virtual void dispose() {
|
||||
void dispose() override {
|
||||
TraceEvent("DQDestroy", dbgid).detail("LastPoppedSeq", lastPoppedSeq).detail("PoppedSeq", poppedSeq).detail("NextPageSeq", nextPageSeq).detail("File0Name", rawQueue->files[0].dbgFilename);
|
||||
dispose(this);
|
||||
}
|
||||
ACTOR static void dispose(DiskQueue* self) {
|
||||
wait( self->onSafeToDestruct() );
|
||||
TraceEvent("DQDestroyDone", self->dbgid).detail("File0Name", self->rawQueue->files[0].dbgFilename);
|
||||
self->rawQueue->dispose();
|
||||
delete self;
|
||||
}
|
||||
|
||||
virtual void close() {
|
||||
void close() override {
|
||||
TraceEvent("DQClose", dbgid)
|
||||
.detail("LastPoppedSeq", lastPoppedSeq)
|
||||
.detail("PoppedSeq", poppedSeq)
|
||||
|
@ -919,6 +919,17 @@ public:
|
|||
.detail("File0Name", rawQueue->files[0].dbgFilename);
|
||||
close(this);
|
||||
}
|
||||
|
||||
StorageBytes getStorageBytes() const override { return rawQueue->getStorageBytes(); }
|
||||
|
||||
private:
|
||||
ACTOR static void dispose(DiskQueue* self) {
|
||||
wait(self->onSafeToDestruct());
|
||||
TraceEvent("DQDestroyDone", self->dbgid).detail("File0Name", self->rawQueue->files[0].dbgFilename);
|
||||
self->rawQueue->dispose();
|
||||
delete self;
|
||||
}
|
||||
|
||||
ACTOR static void close(DiskQueue* self) {
|
||||
wait( self->onSafeToDestruct() );
|
||||
TraceEvent("DQCloseDone", self->dbgid).detail("File0Name", self->rawQueue->files[0].dbgFilename);
|
||||
|
@ -926,11 +937,6 @@ public:
|
|||
delete self;
|
||||
}
|
||||
|
||||
virtual StorageBytes getStorageBytes() {
|
||||
return rawQueue->getStorageBytes();
|
||||
}
|
||||
|
||||
private:
|
||||
#pragma pack(push, 1)
|
||||
struct PageHeader {
|
||||
union {
|
||||
|
@ -1399,29 +1405,30 @@ public:
|
|||
Future<bool> initializeRecovery(location recoverAt) { return queue->initializeRecovery(recoverAt); }
|
||||
Future<Standalone<StringRef>> readNext( int bytes ) { return readNext(this, bytes); }
|
||||
|
||||
virtual location getNextReadLocation() { return queue->getNextReadLocation(); }
|
||||
location getNextReadLocation() const override { return queue->getNextReadLocation(); }
|
||||
|
||||
virtual Future<Standalone<StringRef>> read( location start, location end, CheckHashes ch ) { return queue->read( start, end, ch ); }
|
||||
virtual location getNextCommitLocation() { return queue->getNextCommitLocation(); }
|
||||
virtual location getNextPushLocation() { return queue->getNextPushLocation(); }
|
||||
Future<Standalone<StringRef>> read(location start, location end, CheckHashes ch) override {
|
||||
return queue->read(start, end, ch);
|
||||
}
|
||||
location getNextCommitLocation() const override { return queue->getNextCommitLocation(); }
|
||||
location getNextPushLocation() const override { return queue->getNextPushLocation(); }
|
||||
|
||||
|
||||
virtual location push( StringRef contents ) {
|
||||
location push(StringRef contents) override {
|
||||
pushed = queue->push(contents);
|
||||
return pushed;
|
||||
}
|
||||
|
||||
virtual void pop( location upTo ) {
|
||||
void pop(location upTo) override {
|
||||
popped = std::max(popped, upTo);
|
||||
ASSERT_WE_THINK(committed >= popped);
|
||||
queue->pop(std::min(committed, popped));
|
||||
}
|
||||
|
||||
virtual int getCommitOverhead() {
|
||||
int getCommitOverhead() const override {
|
||||
return queue->getCommitOverhead() + (popped > committed ? queue->getMaxPayload() : 0);
|
||||
}
|
||||
|
||||
Future<Void> commit() {
|
||||
Future<Void> commit() override {
|
||||
location pushLocation = pushed;
|
||||
location popLocation = popped;
|
||||
|
||||
|
@ -1444,7 +1451,7 @@ public:
|
|||
return commitFuture;
|
||||
}
|
||||
|
||||
virtual StorageBytes getStorageBytes() { return queue->getStorageBytes(); }
|
||||
StorageBytes getStorageBytes() const override { return queue->getStorageBytes(); }
|
||||
|
||||
private:
|
||||
DiskQueue *queue;
|
||||
|
|
|
@ -71,18 +71,23 @@ public:
|
|||
// Before calling push or commit, the caller *must* perform recovery by calling readNext() until it returns less than the requested number of bytes.
|
||||
// Thereafter it may not be called again.
|
||||
virtual Future<Standalone<StringRef>> readNext( int bytes ) = 0; // Return the next bytes in the queue (beginning, the first time called, with the first unpopped byte)
|
||||
virtual location getNextReadLocation() = 0; // Returns a location >= the location of all bytes previously returned by readNext(), and <= the location of all bytes subsequently returned
|
||||
virtual location getNextCommitLocation() = 0; // If commit() were to be called, all buffered writes would be written starting at `location`.
|
||||
virtual location getNextPushLocation() = 0; // If push() were to be called, the pushed data would be written starting at `location`.
|
||||
virtual location getNextReadLocation()
|
||||
const = 0; // Returns a location >= the location of all bytes previously returned by readNext(), and <= the
|
||||
// location of all bytes subsequently returned
|
||||
virtual location getNextCommitLocation()
|
||||
const = 0; // If commit() were to be called, all buffered writes would be written starting at `location`.
|
||||
virtual location getNextPushLocation()
|
||||
const = 0; // If push() were to be called, the pushed data would be written starting at `location`.
|
||||
|
||||
virtual Future<Standalone<StringRef>> read( location start, location end, CheckHashes vc ) = 0;
|
||||
virtual location push( StringRef contents ) = 0; // Appends the given bytes to the byte stream. Returns a location token representing the *end* of the contents.
|
||||
virtual void pop( location upTo ) = 0; // Removes all bytes before the given location token from the byte stream.
|
||||
virtual Future<Void> commit() = 0; // returns when all prior pushes and pops are durable. If commit does not return (due to close or a crash), any prefix of the pushed bytes and any prefix of the popped bytes may be durable.
|
||||
|
||||
virtual int getCommitOverhead() = 0; // returns the amount of unused space that would be written by a commit that immediately followed this call
|
||||
virtual int getCommitOverhead() const = 0; // returns the amount of unused space that would be written by a commit
|
||||
// that immediately followed this call
|
||||
|
||||
virtual StorageBytes getStorageBytes() = 0;
|
||||
virtual StorageBytes getStorageBytes() const = 0;
|
||||
};
|
||||
|
||||
template<>
|
||||
|
|
|
@ -364,7 +364,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
init( MAX_PROXY_COMPUTE, 2.0 );
|
||||
init( PROXY_COMPUTE_BUCKETS, 20000 );
|
||||
init( PROXY_COMPUTE_GROWTH_RATE, 0.01 );
|
||||
init( TXN_STATE_SEND_AMOUNT, 2 );
|
||||
init( TXN_STATE_SEND_AMOUNT, 4 );
|
||||
init( ASK_READ_VERSION_FROM_MASTER, true );
|
||||
|
||||
// Master Server
|
||||
|
@ -382,7 +382,8 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
init( PROVISIONAL_START_DELAY, 1.0 );
|
||||
init( PROVISIONAL_MAX_DELAY, 60.0 );
|
||||
init( PROVISIONAL_DELAY_GROWTH, 1.5 );
|
||||
init( SECONDS_BEFORE_RECRUIT_BACKUP_WORKER, 4.0 ); if( randomize && BUGGIFY ) SECONDS_BEFORE_RECRUIT_BACKUP_WORKER = deterministicRandom()->random01() * 8;
|
||||
init( SECONDS_BEFORE_RECRUIT_BACKUP_WORKER, 4.0 ); if( randomize && BUGGIFY ) SECONDS_BEFORE_RECRUIT_BACKUP_WORKER = deterministicRandom()->random01() * 8;
|
||||
init( CC_INTERFACE_TIMEOUT, 10.0 ); if( randomize && BUGGIFY ) CC_INTERFACE_TIMEOUT = 0.0;
|
||||
|
||||
// Resolver
|
||||
init( SAMPLE_OFFSET_PER_KEY, 100 );
|
||||
|
@ -558,7 +559,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
init( MIN_TAG_PAGES_READ_RATE, 1.0e4 ); if( randomize && BUGGIFY ) MIN_TAG_PAGES_READ_RATE = 0;
|
||||
init( READ_TAG_MEASUREMENT_INTERVAL, 30.0 ); if( randomize && BUGGIFY ) READ_TAG_MEASUREMENT_INTERVAL = 1.0;
|
||||
init( OPERATION_COST_BYTE_FACTOR, 16384 ); if( randomize && BUGGIFY ) OPERATION_COST_BYTE_FACTOR = 4096;
|
||||
init( PREFIX_COMPRESS_KVS_MEM_SNAPSHOTS, false ); if( randomize && BUGGIFY ) PREFIX_COMPRESS_KVS_MEM_SNAPSHOTS = true;
|
||||
init( PREFIX_COMPRESS_KVS_MEM_SNAPSHOTS, true ); if( randomize && BUGGIFY ) PREFIX_COMPRESS_KVS_MEM_SNAPSHOTS = false;
|
||||
|
||||
//Wait Failure
|
||||
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
||||
|
@ -611,7 +612,7 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
init( FASTRESTORE_NUM_LOADERS, 2 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_LOADERS = deterministicRandom()->random01() * 10 + 1; }
|
||||
init( FASTRESTORE_NUM_APPLIERS, 3 ); if( randomize && BUGGIFY ) { FASTRESTORE_NUM_APPLIERS = deterministicRandom()->random01() * 10 + 1; }
|
||||
init( FASTRESTORE_TXN_BATCH_MAX_BYTES, 1048576.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_TXN_BATCH_MAX_BYTES = deterministicRandom()->random01() * 1024.0 * 1024.0 + 1.0; }
|
||||
init( FASTRESTORE_VERSIONBATCH_MAX_BYTES, 10.0 * 1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_VERSIONBATCH_MAX_BYTES = deterministicRandom()->random01() * 10.0 * 1024.0 * 1024.0 * 1024.0; }
|
||||
init( FASTRESTORE_VERSIONBATCH_MAX_BYTES, 2.0 * 1024.0 * 1024.0 ); if( randomize && BUGGIFY ) { FASTRESTORE_VERSIONBATCH_MAX_BYTES = deterministicRandom()->random01() * 10.0 * 1024.0 * 1024.0 * 1024.0; }
|
||||
init( FASTRESTORE_VB_PARALLELISM, 5 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_PARALLELISM = deterministicRandom()->random01() * 20 + 1; }
|
||||
init( FASTRESTORE_VB_MONITOR_DELAY, 30 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_MONITOR_DELAY = deterministicRandom()->random01() * 20 + 1; }
|
||||
init( FASTRESTORE_VB_LAUNCH_DELAY, 5 ); if( randomize && BUGGIFY ) { FASTRESTORE_VB_LAUNCH_DELAY = deterministicRandom()->random01() * 60 + 1; }
|
||||
|
@ -643,9 +644,8 @@ void ServerKnobs::initialize(bool randomize, ClientKnobs* clientKnobs, bool isSi
|
|||
init( REDWOOD_LAZY_CLEAR_BATCH_SIZE_PAGES, 10 );
|
||||
init( REDWOOD_LAZY_CLEAR_MIN_PAGES, 0 );
|
||||
init( REDWOOD_LAZY_CLEAR_MAX_PAGES, 1e6 );
|
||||
init( REDWOOD_REMAP_CLEANUP_BATCH_SIZE, 5000 );
|
||||
init( REDWOOD_REMAP_CLEANUP_VERSION_LAG_MIN, 4 );
|
||||
init( REDWOOD_REMAP_CLEANUP_VERSION_LAG_MAX, 15 );
|
||||
init( REDWOOD_REMAP_CLEANUP_WINDOW, 50 );
|
||||
init( REDWOOD_REMAP_CLEANUP_LAG, 0.1 );
|
||||
init( REDWOOD_LOGGING_INTERVAL, 5.0 );
|
||||
|
||||
// Server request latency measurement
|
||||
|
|
|
@ -309,6 +309,7 @@ public:
|
|||
double PROVISIONAL_DELAY_GROWTH;
|
||||
double PROVISIONAL_MAX_DELAY;
|
||||
double SECONDS_BEFORE_RECRUIT_BACKUP_WORKER;
|
||||
double CC_INTERFACE_TIMEOUT;
|
||||
|
||||
// Resolver
|
||||
int64_t KEY_BYTES_PER_SAMPLE;
|
||||
|
@ -575,9 +576,8 @@ public:
|
|||
int REDWOOD_LAZY_CLEAR_BATCH_SIZE_PAGES; // Number of pages to try to pop from the lazy delete queue and process at once
|
||||
int REDWOOD_LAZY_CLEAR_MIN_PAGES; // Minimum number of pages to free before ending a lazy clear cycle, unless the queue is empty
|
||||
int REDWOOD_LAZY_CLEAR_MAX_PAGES; // Maximum number of pages to free before ending a lazy clear cycle, unless the queue is empty
|
||||
int REDWOOD_REMAP_CLEANUP_BATCH_SIZE; // Number of queue entries for remap cleanup to process and potentially coalesce at once.
|
||||
int REDWOOD_REMAP_CLEANUP_VERSION_LAG_MIN; // Number of versions between head of remap queue and oldest retained version before remap cleanup starts
|
||||
int REDWOOD_REMAP_CLEANUP_VERSION_LAG_MAX; // Number of versions between head of remap queue and oldest retained version before remap cleanup may stop
|
||||
int64_t REDWOOD_REMAP_CLEANUP_WINDOW; // Remap remover lag interval in which to coalesce page writes
|
||||
double REDWOOD_REMAP_CLEANUP_LAG; // Maximum allowed remap remover lag behind the cleanup window as a multiple of the window size
|
||||
double REDWOOD_LOGGING_INTERVAL;
|
||||
|
||||
// Server request latency measurement
|
||||
|
|
|
@ -136,7 +136,7 @@ Future<Standalone<StringRef>> LogSystemDiskQueueAdapter::readNext( int bytes ) {
|
|||
return LogSystemDiskQueueAdapterImpl::readNext(this, bytes);
|
||||
}
|
||||
|
||||
IDiskQueue::location LogSystemDiskQueueAdapter::getNextReadLocation() {
|
||||
IDiskQueue::location LogSystemDiskQueueAdapter::getNextReadLocation() const {
|
||||
return IDiskQueue::location( 0, recoveryQueueLoc );
|
||||
}
|
||||
|
||||
|
|
|
@ -74,23 +74,35 @@ public:
|
|||
Future<CommitMessage> getCommitMessage();
|
||||
|
||||
// IClosable interface
|
||||
virtual Future<Void> getError();
|
||||
virtual Future<Void> onClosed();
|
||||
virtual void dispose();
|
||||
virtual void close();
|
||||
Future<Void> getError() override;
|
||||
Future<Void> onClosed() override;
|
||||
void dispose() override;
|
||||
void close() override;
|
||||
|
||||
// IDiskQueue interface
|
||||
virtual Future<bool> initializeRecovery(location recoverAt) { return false; }
|
||||
virtual Future<Standalone<StringRef>> readNext( int bytes );
|
||||
virtual IDiskQueue::location getNextReadLocation();
|
||||
virtual IDiskQueue::location getNextCommitLocation() { ASSERT(false); throw internal_error(); }
|
||||
virtual IDiskQueue::location getNextPushLocation() { ASSERT(false); throw internal_error(); }
|
||||
virtual Future<Standalone<StringRef>> read( location start, location end, CheckHashes ch ) { ASSERT(false); throw internal_error(); }
|
||||
virtual IDiskQueue::location push( StringRef contents );
|
||||
virtual void pop( IDiskQueue::location upTo );
|
||||
virtual Future<Void> commit();
|
||||
virtual StorageBytes getStorageBytes() { ASSERT(false); throw internal_error(); }
|
||||
virtual int getCommitOverhead() { return 0; } //SOMEDAY: could this be more accurate?
|
||||
Future<bool> initializeRecovery(location recoverAt) override { return false; }
|
||||
Future<Standalone<StringRef>> readNext(int bytes) override;
|
||||
IDiskQueue::location getNextReadLocation() const override;
|
||||
IDiskQueue::location getNextCommitLocation() const override {
|
||||
ASSERT(false);
|
||||
throw internal_error();
|
||||
}
|
||||
IDiskQueue::location getNextPushLocation() const override {
|
||||
ASSERT(false);
|
||||
throw internal_error();
|
||||
}
|
||||
Future<Standalone<StringRef>> read(location start, location end, CheckHashes ch) override {
|
||||
ASSERT(false);
|
||||
throw internal_error();
|
||||
}
|
||||
IDiskQueue::location push(StringRef contents) override;
|
||||
void pop(IDiskQueue::location upTo) override;
|
||||
Future<Void> commit() override;
|
||||
StorageBytes getStorageBytes() const override {
|
||||
ASSERT(false);
|
||||
throw internal_error();
|
||||
}
|
||||
int getCommitOverhead() const override { return 0; } // SOMEDAY: could this be more accurate?
|
||||
|
||||
private:
|
||||
Reference<AsyncVar<PeekTxsInfo>> peekLocality;
|
||||
|
|
|
@ -468,7 +468,7 @@ struct ProxyCommitData {
|
|||
return tags;
|
||||
}
|
||||
|
||||
const bool needsCacheTag(KeyRangeRef range) {
|
||||
bool needsCacheTag(KeyRangeRef range) {
|
||||
auto ranges = cacheInfo.intersectingRanges(range);
|
||||
for(auto r : ranges) {
|
||||
if(r.value()) {
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "fdbserver/RestoreApplier.actor.h"
|
||||
#include "fdbserver/RestoreLoader.actor.h"
|
||||
|
||||
#include "flow/Platform.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
ACTOR static Future<Void> clearDB(Database cx);
|
||||
|
@ -716,6 +717,8 @@ ACTOR static Future<Version> collectBackupFiles(Reference<IBackupContainer> bc,
|
|||
|
||||
std::set<RestoreFileFR> uniqueRangeFiles;
|
||||
std::set<RestoreFileFR> uniqueLogFiles;
|
||||
double rangeSize = 0;
|
||||
double logSize = 0;
|
||||
*minRangeVersion = MAX_VERSION;
|
||||
for (const RangeFile& f : restorable.get().ranges) {
|
||||
TraceEvent(SevFRDebugInfo, "FastRestoreControllerPhaseCollectBackupFiles").detail("RangeFile", f.toString());
|
||||
|
@ -726,6 +729,7 @@ ACTOR static Future<Version> collectBackupFiles(Reference<IBackupContainer> bc,
|
|||
TraceEvent(SevFRDebugInfo, "FastRestoreControllerPhaseCollectBackupFiles")
|
||||
.detail("RangeFileFR", file.toString());
|
||||
uniqueRangeFiles.insert(file);
|
||||
rangeSize += file.fileSize;
|
||||
*minRangeVersion = std::min(*minRangeVersion, file.version);
|
||||
}
|
||||
for (const LogFile& f : restorable.get().logs) {
|
||||
|
@ -737,6 +741,7 @@ ACTOR static Future<Version> collectBackupFiles(Reference<IBackupContainer> bc,
|
|||
TraceEvent(SevFRDebugInfo, "FastRestoreControllerPhaseCollectBackupFiles").detail("LogFileFR", file.toString());
|
||||
logFiles->push_back(file);
|
||||
uniqueLogFiles.insert(file);
|
||||
logSize += file.fileSize;
|
||||
}
|
||||
// Assign unique range files and log files to output
|
||||
rangeFiles->assign(uniqueRangeFiles.begin(), uniqueRangeFiles.end());
|
||||
|
@ -745,7 +750,9 @@ ACTOR static Future<Version> collectBackupFiles(Reference<IBackupContainer> bc,
|
|||
TraceEvent("FastRestoreControllerPhaseCollectBackupFilesDone")
|
||||
.detail("BackupDesc", desc.toString())
|
||||
.detail("RangeFiles", rangeFiles->size())
|
||||
.detail("LogFiles", logFiles->size());
|
||||
.detail("LogFiles", logFiles->size())
|
||||
.detail("RangeFileBytes", rangeSize)
|
||||
.detail("LogFileBytes", logSize);
|
||||
return request.targetVersion;
|
||||
}
|
||||
|
||||
|
|
|
@ -424,7 +424,7 @@ struct RestoreControllerData : RestoreRoleData, public ReferenceCounted<RestoreC
|
|||
if (bcUrl == url && bc.isValid()) {
|
||||
return;
|
||||
}
|
||||
printf("initBackupContainer, url:%s\n", url.toString().c_str());
|
||||
TraceEvent("FastRestoreControllerInitBackupContainer").detail("URL", url);
|
||||
bcUrl = url;
|
||||
bc = IBackupContainer::openContainer(url.toString());
|
||||
}
|
||||
|
|
|
@ -123,9 +123,13 @@ struct CacheRangeInfo : ReferenceCounted<CacheRangeInfo>, NonCopyable {
|
|||
}
|
||||
};
|
||||
|
||||
const int VERSION_OVERHEAD = 64 + sizeof(Version) + sizeof(Standalone<VersionUpdateRef>) + //mutationLog, 64b overhead for map
|
||||
2 * (64 + sizeof(Version) + sizeof(Reference<VersionedMap<KeyRef,
|
||||
ValueOrClearToRef>::PTreeT>)); //versioned map [ x2 for createNewVersion(version+1) ], 64b overhead for map
|
||||
const int VERSION_OVERHEAD =
|
||||
64 + sizeof(Version) + sizeof(Standalone<VerUpdateRef>) + // mutationLog, 64b overhead for map
|
||||
2 * (64 + sizeof(Version) +
|
||||
sizeof(
|
||||
Reference<VersionedMap<KeyRef,
|
||||
ValueOrClearToRef>::PTreeT>)); // versioned map [ x2 for createNewVersion(version+1)
|
||||
// ], 64b overhead for map
|
||||
static int mvccStorageBytes( MutationRef const& m ) { return VersionedMap<KeyRef, ValueOrClearToRef>::overheadPerItem * 2 + (MutationRef::OVERHEAD_BYTES + m.param1.size() + m.param2.size()) * 2; }
|
||||
|
||||
struct FetchInjectionInfo {
|
||||
|
@ -141,7 +145,7 @@ private:
|
|||
VersionedData versionedData;
|
||||
// in-memory mutationLog that the versionedData contains references to
|
||||
// TODO change it to a deque, already contains mutations in version order
|
||||
std::map<Version, Standalone<VersionUpdateRef>> mutationLog; // versions (durableVersion, version]
|
||||
std::map<Version, Standalone<VerUpdateRef>> mutationLog; // versions (durableVersion, version]
|
||||
|
||||
public:
|
||||
UID thisServerID; // unique id
|
||||
|
@ -297,12 +301,12 @@ public:
|
|||
}
|
||||
|
||||
Arena lastArena;
|
||||
std::map<Version, Standalone<VersionUpdateRef>> const& getMutationLog() const { return mutationLog; }
|
||||
std::map<Version, Standalone<VersionUpdateRef>>& getMutableMutationLog() { return mutationLog; }
|
||||
std::map<Version, Standalone<VerUpdateRef>> const& getMutationLog() const { return mutationLog; }
|
||||
std::map<Version, Standalone<VerUpdateRef>>& getMutableMutationLog() { return mutationLog; }
|
||||
VersionedData const& data() const { return versionedData; }
|
||||
VersionedData& mutableData() { return versionedData; }
|
||||
|
||||
Standalone<VersionUpdateRef>& addVersionToMutationLog(Version v) {
|
||||
Standalone<VerUpdateRef>& addVersionToMutationLog(Version v) {
|
||||
// return existing version...
|
||||
auto m = mutationLog.find(v);
|
||||
if (m != mutationLog.end())
|
||||
|
@ -317,11 +321,12 @@ public:
|
|||
return u;
|
||||
}
|
||||
|
||||
MutationRef addMutationToMutationLog(Standalone<VersionUpdateRef> &mLV, MutationRef const& m){
|
||||
MutationRef addMutationToMutationLog(Standalone<VerUpdateRef>& mLV, MutationRef const& m){
|
||||
//TODO find out more
|
||||
//byteSampleApplyMutation(m, mLV.version);
|
||||
counters.bytesInput += mvccStorageBytes(m);
|
||||
return mLV.mutations.push_back_deep( mLV.arena(), m );
|
||||
return mLV.push_back_deep(mLV.arena(), m);
|
||||
}
|
||||
|
||||
};
|
||||
void applyMutation( StorageCacheUpdater* updater, StorageCacheData *data, MutationRef const& mutation, Version version );
|
||||
|
||||
|
@ -1600,7 +1605,6 @@ public:
|
|||
private:
|
||||
Version fromVersion;
|
||||
KeyRef cacheStartKey;
|
||||
bool nowAssigned;
|
||||
bool processedCacheStartKey;
|
||||
|
||||
// Applies private mutations, as the name suggests. It basically establishes the key-ranges
|
||||
|
|
|
@ -135,22 +135,6 @@ struct TLogConfirmRunningRequest {
|
|||
}
|
||||
};
|
||||
|
||||
struct VersionUpdateRef {
|
||||
Version version;
|
||||
MutationListRef mutations;
|
||||
bool isPrivateData;
|
||||
|
||||
VersionUpdateRef() : isPrivateData(false), version(invalidVersion) {}
|
||||
VersionUpdateRef( Arena& to, const VersionUpdateRef& from ) : version(from.version), mutations( to, from.mutations ), isPrivateData( from.isPrivateData ) {}
|
||||
int totalSize() const { return mutations.totalSize(); }
|
||||
int expectedSize() const { return mutations.expectedSize(); }
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, version, mutations, isPrivateData);
|
||||
}
|
||||
};
|
||||
|
||||
struct VerUpdateRef {
|
||||
Version version;
|
||||
VectorRef<MutationRef> mutations;
|
||||
|
@ -160,6 +144,11 @@ struct VerUpdateRef {
|
|||
VerUpdateRef( Arena& to, const VerUpdateRef& from ) : version(from.version), mutations( to, from.mutations ), isPrivateData( from.isPrivateData ) {}
|
||||
int expectedSize() const { return mutations.expectedSize(); }
|
||||
|
||||
MutationRef push_back_deep(Arena& arena, const MutationRef& m) {
|
||||
mutations.push_back_deep(arena, m);
|
||||
return mutations.back();
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
void serialize( Ar& ar ) {
|
||||
serializer(ar, version, mutations, isPrivateData);
|
||||
|
|
|
@ -1108,6 +1108,8 @@ class DWALPager : public IPager2 {
|
|||
public:
|
||||
typedef FastAllocatedPage Page;
|
||||
typedef FIFOQueue<LogicalPageID> LogicalPageQueueT;
|
||||
typedef std::map<Version, LogicalPageID> VersionToPageMapT;
|
||||
typedef std::unordered_map<LogicalPageID, VersionToPageMapT> PageToVersionedMapT;
|
||||
|
||||
#pragma pack(push, 1)
|
||||
struct DelayedFreePage {
|
||||
|
@ -1129,6 +1131,10 @@ public:
|
|||
LogicalPageID originalPageID;
|
||||
LogicalPageID newPageID;
|
||||
|
||||
bool isFree() const {
|
||||
return newPageID == invalidLogicalPageID;
|
||||
}
|
||||
|
||||
bool operator<(const RemappedPage& rhs) { return version < rhs.version; }
|
||||
|
||||
std::string toString() const {
|
||||
|
@ -1145,19 +1151,14 @@ public:
|
|||
// If the file already exists, pageSize might be different than desiredPageSize
|
||||
// Use pageCacheSizeBytes == 0 to use default from flow knobs
|
||||
// If filename is empty, the pager will exist only in memory and once the cache is full writes will fail.
|
||||
DWALPager(int desiredPageSize, std::string filename, int64_t pageCacheSizeBytes, bool memoryOnly = false)
|
||||
DWALPager(int desiredPageSize, std::string filename, int64_t pageCacheSizeBytes, Version remapCleanupWindow, bool memoryOnly = false)
|
||||
: desiredPageSize(desiredPageSize), filename(filename), pHeader(nullptr), pageCacheBytes(pageCacheSizeBytes),
|
||||
memoryOnly(memoryOnly) {
|
||||
memoryOnly(memoryOnly), remapCleanupWindow(remapCleanupWindow) {
|
||||
|
||||
if (!g_redwoodMetricsActor.isValid()) {
|
||||
g_redwoodMetricsActor = redwoodMetricsLogger();
|
||||
}
|
||||
|
||||
if (pageCacheBytes == 0) {
|
||||
pageCacheBytes = g_network->isSimulated()
|
||||
? (BUGGIFY ? FLOW_KNOBS->BUGGIFY_SIM_PAGE_CACHE_4K : FLOW_KNOBS->SIM_PAGE_CACHE_4K)
|
||||
: FLOW_KNOBS->PAGE_CACHE_4K;
|
||||
}
|
||||
commitFuture = Void();
|
||||
recoverFuture = forwardError(recover(this), errorPromise);
|
||||
}
|
||||
|
@ -1263,9 +1264,7 @@ public:
|
|||
|
||||
Standalone<VectorRef<RemappedPage>> remaps = wait(self->remapQueue.peekAll());
|
||||
for (auto& r : remaps) {
|
||||
if (r.newPageID != invalidLogicalPageID) {
|
||||
self->remappedPages[r.originalPageID][r.version] = r.newPageID;
|
||||
}
|
||||
self->remappedPages[r.originalPageID][r.version] = r.newPageID;
|
||||
}
|
||||
|
||||
// If the header was recovered from the backup at Page 1 then write and sync it to Page 0 before continuing.
|
||||
|
@ -1488,10 +1487,12 @@ public:
|
|||
void freePage(LogicalPageID pageID, Version v) override {
|
||||
// If pageID has been remapped, then it can't be freed until all existing remaps for that page have been undone,
|
||||
// so queue it for later deletion
|
||||
if (remappedPages.find(pageID) != remappedPages.end()) {
|
||||
auto i = remappedPages.find(pageID);
|
||||
if (i != remappedPages.end()) {
|
||||
debug_printf("DWALPager(%s) op=freeRemapped %s @%" PRId64 " oldestVersion=%" PRId64 "\n", filename.c_str(),
|
||||
toString(pageID).c_str(), v, pLastCommittedHeader->oldestVersion);
|
||||
remapQueue.pushBack(RemappedPage{ v, pageID, invalidLogicalPageID });
|
||||
i->second[v] = invalidLogicalPageID;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1590,6 +1591,7 @@ public:
|
|||
debug_printf("DWALPager(%s) read %s @%" PRId64 " -> %s\n", filename.c_str(), toString(pageID).c_str(),
|
||||
v, toString(j->second).c_str());
|
||||
pageID = j->second;
|
||||
ASSERT(pageID != invalidLogicalPageID);
|
||||
}
|
||||
} else {
|
||||
debug_printf("DWALPager(%s) read %s @%" PRId64 " (not remapped)\n", filename.c_str(),
|
||||
|
@ -1621,128 +1623,97 @@ public:
|
|||
return std::min(pLastCommittedHeader->oldestVersion, snapshots.front().version);
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> remapCopyAndFree(DWALPager* self, RemappedPage m) {
|
||||
debug_printf("DWALPager(%s) remapCleanup copyAndFree %s\n", self->filename.c_str(), m.toString().c_str());
|
||||
ACTOR static Future<Void> remapCopyAndFree(DWALPager* self, RemappedPage p, VersionToPageMapT *m, VersionToPageMapT::iterator i) {
|
||||
debug_printf("DWALPager(%s) remapCleanup copyAndFree %s\n", self->filename.c_str(), p.toString().c_str());
|
||||
|
||||
// Read the data from the page that the original was mapped to
|
||||
Reference<IPage> data = wait(self->readPage(m.newPageID, false));
|
||||
Reference<IPage> data = wait(self->readPage(p.newPageID, false));
|
||||
|
||||
// Write the data to the original page so it can be read using its original pageID
|
||||
self->updatePage(m.originalPageID, data);
|
||||
self->updatePage(p.originalPageID, data);
|
||||
++g_redwoodMetrics.pagerRemapCopy;
|
||||
|
||||
// Remove all remaps for the original page ID up through version
|
||||
auto i = self->remappedPages.find(m.originalPageID);
|
||||
i->second.erase(i->second.begin(), i->second.upper_bound(m.version));
|
||||
// If the version map for this page is now empty, erase it
|
||||
if (i->second.empty()) {
|
||||
self->remappedPages.erase(i);
|
||||
}
|
||||
|
||||
// Now that the remap has been undone nothing will read this page so it can be freed as of the next
|
||||
// commit.
|
||||
self->freeUnmappedPage(m.newPageID, 0);
|
||||
// Now that the page data has been copied to the original page, the versioned page map entry is no longer
|
||||
// needed and the new page ID can be freed as of the next commit.
|
||||
m->erase(i);
|
||||
self->freeUnmappedPage(p.newPageID, 0);
|
||||
++g_redwoodMetrics.pagerRemapFree;
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Version> getRemapLag(DWALPager* self) {
|
||||
Optional<RemappedPage> head = wait(self->remapQueue.peek());
|
||||
if (head.present()) {
|
||||
return self->effectiveOldestVersion() - head.get().version;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> remapCleanup(DWALPager* self) {
|
||||
state ActorCollection copies(true);
|
||||
state Promise<Void> signal;
|
||||
copies.add(signal.getFuture());
|
||||
|
||||
self->remapCleanupStop = false;
|
||||
|
||||
// The oldest retained version cannot change during the cleanup run as this would allow multiple read/copy
|
||||
// operations with the same original page ID destination to be started and they could complete out of order.
|
||||
state Version oldestRetainedVersion = self->effectiveOldestVersion();
|
||||
|
||||
// Cutoff is the version we can pop to
|
||||
state RemappedPage cutoff;
|
||||
cutoff.version = self->effectiveOldestVersion();
|
||||
cutoff.version = oldestRetainedVersion - self->remapCleanupWindow;
|
||||
|
||||
// Each page is only updated at most once per version, so in order to coalesce multiple updates
|
||||
// to the same page and skip some page writes we have to accumulate multiple versions worth of
|
||||
// poppable entries.
|
||||
Version lag = wait(getRemapLag(self));
|
||||
debug_printf("DWALPager(%s) remapCleanup versionLag=%" PRId64 "\n", self->filename.c_str(), lag);
|
||||
if (lag < SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_VERSION_LAG_MIN) {
|
||||
debug_printf("DWALPager(%s) not starting, lag too low\n", self->filename.c_str());
|
||||
return Void();
|
||||
}
|
||||
// Minimum version we must pop to before obeying stop command.
|
||||
state Version minStopVersion = cutoff.version - (self->remapCleanupWindow * SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_LAG);
|
||||
|
||||
loop {
|
||||
// Pop up to the pop size limit from the queue, but only keep the latest remap queue entry per
|
||||
// original page ID. This will coalesce multiple remaps of the same LogicalPageID within the
|
||||
// interval of pages being unmapped to a single page copy.
|
||||
state int toPop = SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_BATCH_SIZE;
|
||||
state std::unordered_map<LogicalPageID, RemappedPage> toCopy;
|
||||
toCopy.reserve(toPop);
|
||||
|
||||
// Take up to batch size pages from front of queue
|
||||
while (toPop > 0) {
|
||||
state Optional<RemappedPage> p = wait(self->remapQueue.pop(cutoff));
|
||||
debug_printf("DWALPager(%s) remapCleanup popped %s\n", self->filename.c_str(), ::toString(p).c_str());
|
||||
if (!p.present()) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Get the existing remap entry for the original page, which could be newly initialized
|
||||
auto& m = toCopy[p.get().originalPageID];
|
||||
// If version is invalid then this is a newly constructed RemappedPage, so copy p.get() over it
|
||||
if (m.version != invalidVersion) {
|
||||
ASSERT(m.version < p.get().version);
|
||||
ASSERT(m.newPageID != invalidLogicalPageID);
|
||||
// We're replacing a previously popped item so we can avoid copying it over the original.
|
||||
debug_printf("DWALPager(%s) remapCleanup elided %s\n", self->filename.c_str(),
|
||||
m.toString().c_str());
|
||||
// The remapped pages entries will be cleaned up below.
|
||||
self->freeUnmappedPage(m.newPageID, 0);
|
||||
++g_redwoodMetrics.pagerRemapFree;
|
||||
++g_redwoodMetrics.pagerRemapSkip;
|
||||
}
|
||||
m = p.get();
|
||||
|
||||
--toPop;
|
||||
}
|
||||
|
||||
std::vector<Future<Void>> copies;
|
||||
|
||||
for (auto& e : toCopy) {
|
||||
const RemappedPage& m = e.second;
|
||||
// If newPageID is invalid, originalPageID page was freed at version, not remapped
|
||||
if (m.newPageID == invalidLogicalPageID) {
|
||||
debug_printf("DWALPager(%s) remapCleanup freeNoCopy %s\n", self->filename.c_str(),
|
||||
m.toString().c_str());
|
||||
self->remappedPages.erase(m.originalPageID);
|
||||
self->freeUnmappedPage(m.originalPageID, 0);
|
||||
++g_redwoodMetrics.pagerRemapFree;
|
||||
} else {
|
||||
copies.push_back(remapCopyAndFree(self, m));
|
||||
}
|
||||
}
|
||||
|
||||
wait(waitForAll(copies));
|
||||
|
||||
// Stop if there was nothing more that could be popped
|
||||
if (toPop > 0) {
|
||||
state Optional<RemappedPage> p = wait(self->remapQueue.pop(cutoff));
|
||||
debug_printf("DWALPager(%s) remapCleanup popped %s\n", self->filename.c_str(), ::toString(p).c_str());
|
||||
if (!p.present()) {
|
||||
break;
|
||||
}
|
||||
|
||||
// If the stop flag is set then stop but only if the remap lag is below the maximum allowed
|
||||
if (self->remapCleanupStop) {
|
||||
Version lag = wait(getRemapLag(self));
|
||||
if (lag <= SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_VERSION_LAG_MAX) {
|
||||
break;
|
||||
} else {
|
||||
debug_printf("DWALPager(%s) remapCleanup refusing to stop, versionLag=%" PRId64 "\n",
|
||||
self->filename.c_str(), lag);
|
||||
// Get iterator to the versioned page map entry for the original page
|
||||
auto iPageMapPair = self->remappedPages.find(p.get().originalPageID);
|
||||
// The iterator must be valid and not empty and its first page map entry must match p's version
|
||||
ASSERT(iPageMapPair != self->remappedPages.end());
|
||||
ASSERT(!iPageMapPair->second.empty());
|
||||
auto iVersionPagePair = iPageMapPair->second.begin();
|
||||
ASSERT(iVersionPagePair->first == p.get().version);
|
||||
|
||||
// If this is a free page entry then free the original page ID
|
||||
if(p.get().isFree()) {
|
||||
debug_printf("DWALPager(%s) remapCleanup free %s\n", self->filename.c_str(),
|
||||
p.get().toString().c_str());
|
||||
self->freeUnmappedPage(p.get().originalPageID, 0);
|
||||
++g_redwoodMetrics.pagerRemapFree;
|
||||
|
||||
// There can't be any more entries in the page map after this one so verify that
|
||||
// the map size is 1 and erase the map for p's original page ID.
|
||||
ASSERT(iPageMapPair->second.size() == 1);
|
||||
self->remappedPages.erase(iPageMapPair);
|
||||
}
|
||||
else {
|
||||
// If there is no next page map entry or there is but it is after the oldest retained version
|
||||
// then p must be copied to unmap it.
|
||||
auto iNextVersionPagePair = iVersionPagePair;
|
||||
++iNextVersionPagePair;
|
||||
if(iNextVersionPagePair == iPageMapPair->second.end() || iNextVersionPagePair->first > oldestRetainedVersion) {
|
||||
// Copy the remapped page to the original so it can be freed.
|
||||
copies.add(remapCopyAndFree(self, p.get(), &iPageMapPair->second, iVersionPagePair));
|
||||
}
|
||||
else {
|
||||
debug_printf("DWALPager(%s) remapCleanup skipAndFree %s\n", self->filename.c_str(), p.get().toString().c_str());
|
||||
self->freeUnmappedPage(p.get().newPageID, 0);
|
||||
++g_redwoodMetrics.pagerRemapFree;
|
||||
++g_redwoodMetrics.pagerRemapSkip;
|
||||
iPageMapPair->second.erase(iVersionPagePair);
|
||||
}
|
||||
}
|
||||
|
||||
// If the stop flag is set and we've reached the minimum stop version according the the allowed lag then stop.
|
||||
if (self->remapCleanupStop && p.get().version >= minStopVersion) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
debug_printf("DWALPager(%s) remapCleanup stopped (stop=%d)\n", self->filename.c_str(), self->remapCleanupStop);
|
||||
signal.send(Void());
|
||||
wait(copies.getResult());
|
||||
return Void();
|
||||
}
|
||||
|
||||
|
@ -1918,7 +1889,9 @@ public:
|
|||
Future<int64_t> getUserPageCount() override {
|
||||
return map(getUserPageCount_cleanup(this), [=](Void) {
|
||||
int64_t userPages = pHeader->pageCount - 2 - freeList.numPages - freeList.numEntries -
|
||||
delayedFreeList.numPages - delayedFreeList.numEntries - remapQueue.numPages;
|
||||
delayedFreeList.numPages - delayedFreeList.numEntries - remapQueue.numPages
|
||||
- remapQueue.numEntries;
|
||||
|
||||
debug_printf("DWALPager(%s) userPages=%" PRId64 " totalPageCount=%" PRId64 " freeQueuePages=%" PRId64
|
||||
" freeQueueCount=%" PRId64 " delayedFreeQueuePages=%" PRId64 " delayedFreeQueueCount=%" PRId64
|
||||
" remapQueuePages=%" PRId64 " remapQueueCount=%" PRId64 "\n",
|
||||
|
@ -2029,6 +2002,7 @@ private:
|
|||
DelayedFreePageQueueT delayedFreeList;
|
||||
|
||||
RemapQueueT remapQueue;
|
||||
Version remapCleanupWindow;
|
||||
|
||||
struct SnapshotEntry {
|
||||
Version version;
|
||||
|
@ -2043,7 +2017,7 @@ private:
|
|||
};
|
||||
|
||||
// TODO: Better data structure
|
||||
std::unordered_map<LogicalPageID, std::map<Version, LogicalPageID>> remappedPages;
|
||||
PageToVersionedMapT remappedPages;
|
||||
|
||||
std::deque<SnapshotEntry> snapshots;
|
||||
};
|
||||
|
@ -2249,7 +2223,7 @@ struct RedwoodRecordRef {
|
|||
inline RedwoodRecordRef withoutValue() const { return RedwoodRecordRef(key, version); }
|
||||
|
||||
inline RedwoodRecordRef withMaxPageID() const {
|
||||
return RedwoodRecordRef(key, version, StringRef((uint8_t *)&maxPageID, sizeof(maxPageID)));
|
||||
return RedwoodRecordRef(key, version, StringRef((uint8_t*)&maxPageID, sizeof(maxPageID)));
|
||||
}
|
||||
|
||||
// Truncate (key, version, part) tuple to len bytes.
|
||||
|
@ -3175,11 +3149,6 @@ public:
|
|||
ACTOR static Future<Void> destroyAndCheckSanity_impl(VersionedBTree* self) {
|
||||
ASSERT(g_network->isSimulated());
|
||||
|
||||
// This isn't pretty but remap cleanup is controlled by knobs and for this test we need the entire remap queue
|
||||
// to be processed.
|
||||
const_cast<ServerKnobs*>(SERVER_KNOBS)->REDWOOD_REMAP_CLEANUP_VERSION_LAG_MIN = 0;
|
||||
const_cast<ServerKnobs*>(SERVER_KNOBS)->REDWOOD_REMAP_CLEANUP_VERSION_LAG_MAX = 0;
|
||||
|
||||
debug_printf("Clearing tree.\n");
|
||||
self->setWriteVersion(self->getLatestVersion() + 1);
|
||||
self->clear(KeyRangeRef(dbBegin.key, dbEnd.key));
|
||||
|
@ -5168,7 +5137,7 @@ public:
|
|||
debug_printf("move%s() first loop cursor=%s\n", forward ? "Next" : "Prev", self->toString().c_str());
|
||||
auto& entry = self->path.back();
|
||||
bool success;
|
||||
if(entry.cursor.valid()) {
|
||||
if (entry.cursor.valid()) {
|
||||
success = forward ? entry.cursor.moveNext() : entry.cursor.movePrev();
|
||||
} else {
|
||||
success = forward ? entry.cursor.moveFirst() : false;
|
||||
|
@ -5443,7 +5412,13 @@ public:
|
|||
KeyValueStoreRedwoodUnversioned(std::string filePrefix, UID logID)
|
||||
: m_filePrefix(filePrefix), m_concurrentReads(new FlowLock(SERVER_KNOBS->REDWOOD_KVSTORE_CONCURRENT_READS)) {
|
||||
// TODO: This constructor should really just take an IVersionedStore
|
||||
IPager2* pager = new DWALPager(SERVER_KNOBS->REDWOOD_DEFAULT_PAGE_SIZE, filePrefix, 0);
|
||||
|
||||
int64_t pageCacheBytes = g_network->isSimulated()
|
||||
? (BUGGIFY ? FLOW_KNOBS->BUGGIFY_SIM_PAGE_CACHE_4K : FLOW_KNOBS->SIM_PAGE_CACHE_4K)
|
||||
: FLOW_KNOBS->PAGE_CACHE_4K;
|
||||
Version remapCleanupWindow = BUGGIFY ? deterministicRandom()->randomInt64(0, 1000) : SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_WINDOW;
|
||||
|
||||
IPager2* pager = new DWALPager(SERVER_KNOBS->REDWOOD_DEFAULT_PAGE_SIZE, filePrefix, pageCacheBytes, remapCleanupWindow);
|
||||
m_tree = new VersionedBTree(pager, filePrefix);
|
||||
m_init = catchError(init_impl(this));
|
||||
}
|
||||
|
@ -5540,9 +5515,9 @@ public:
|
|||
// Read page contents without using waits
|
||||
bool isRoot = cur.inRoot();
|
||||
BTreePage::BinaryTree::Cursor leafCursor = cur.popPath();
|
||||
while(leafCursor.valid()) {
|
||||
while (leafCursor.valid()) {
|
||||
KeyValueRef kv = leafCursor.get().toKeyValueRef();
|
||||
if(kv.key >= keys.end) {
|
||||
if (kv.key >= keys.end) {
|
||||
break;
|
||||
}
|
||||
accumulatedBytes += kv.expectedSize();
|
||||
|
@ -5554,7 +5529,7 @@ public:
|
|||
}
|
||||
// Stop if the leaf cursor is still valid which means we hit a key or size limit or
|
||||
// if we started in the root page
|
||||
if(leafCursor.valid() || isRoot) {
|
||||
if (leafCursor.valid() || isRoot) {
|
||||
break;
|
||||
}
|
||||
wait(cur.moveNext());
|
||||
|
@ -5565,9 +5540,9 @@ public:
|
|||
// Read page contents without using waits
|
||||
bool isRoot = cur.inRoot();
|
||||
BTreePage::BinaryTree::Cursor leafCursor = cur.popPath();
|
||||
while(leafCursor.valid()) {
|
||||
while (leafCursor.valid()) {
|
||||
KeyValueRef kv = leafCursor.get().toKeyValueRef();
|
||||
if(kv.key < keys.begin) {
|
||||
if (kv.key < keys.begin) {
|
||||
break;
|
||||
}
|
||||
accumulatedBytes += kv.expectedSize();
|
||||
|
@ -5579,7 +5554,7 @@ public:
|
|||
}
|
||||
// Stop if the leaf cursor is still valid which means we hit a key or size limit or
|
||||
// if we started in the root page
|
||||
if(leafCursor.valid() || isRoot) {
|
||||
if (leafCursor.valid() || isRoot) {
|
||||
break;
|
||||
}
|
||||
wait(cur.movePrev());
|
||||
|
@ -6045,7 +6020,8 @@ ACTOR Future<int> seekAll(VersionedBTree* btree, Version v,
|
|||
|
||||
// Verify the result of point reads for every set or cleared key at the given version
|
||||
ACTOR Future<int> seekAllBTreeCursor(VersionedBTree* btree, Version v,
|
||||
std::map<std::pair<std::string, Version>, Optional<std::string>>* written, int* pErrorCount) {
|
||||
std::map<std::pair<std::string, Version>, Optional<std::string>>* written,
|
||||
int* pErrorCount) {
|
||||
state std::map<std::pair<std::string, Version>, Optional<std::string>>::const_iterator i = written->cbegin();
|
||||
state std::map<std::pair<std::string, Version>, Optional<std::string>>::const_iterator iEnd = written->cend();
|
||||
state int errors = 0;
|
||||
|
@ -6072,22 +6048,19 @@ ACTOR Future<int> seekAllBTreeCursor(VersionedBTree* btree, Version v,
|
|||
if (!foundKey) {
|
||||
printf("Verify ERROR: key_not_found: '%s' -> '%s' @%" PRId64 "\n", key.c_str(),
|
||||
val.get().c_str(), ver);
|
||||
}
|
||||
else if (!hasValue) {
|
||||
} else if (!hasValue) {
|
||||
printf("Verify ERROR: value_not_found: '%s' -> '%s' @%" PRId64 "\n", key.c_str(),
|
||||
val.get().c_str(), ver);
|
||||
}
|
||||
else if (!valueMatch) {
|
||||
} else if (!valueMatch) {
|
||||
printf("Verify ERROR: value_incorrect: for '%s' found '%s' expected '%s' @%" PRId64 "\n",
|
||||
key.c_str(), cur.get().value.get().toString().c_str(), val.get().c_str(),
|
||||
ver);
|
||||
key.c_str(), cur.get().value.get().toString().c_str(), val.get().c_str(), ver);
|
||||
}
|
||||
}
|
||||
} else if (foundKey && hasValue) {
|
||||
++errors;
|
||||
++*pErrorCount;
|
||||
printf("Verify ERROR: cleared_key_found: '%s' -> '%s' @%" PRId64 "\n", key.c_str(),
|
||||
cur.get().value.get().toString().c_str(), ver);
|
||||
cur.get().value.get().toString().c_str(), ver);
|
||||
}
|
||||
}
|
||||
++i;
|
||||
|
@ -6125,12 +6098,12 @@ ACTOR Future<Void> verify(VersionedBTree* btree, FutureStream<Version> vStream,
|
|||
state Reference<IStoreCursor> cur = btree->readAtVersion(v);
|
||||
|
||||
debug_printf("Verifying entire key range at version %" PRId64 "\n", v);
|
||||
if(deterministicRandom()->coinflip()) {
|
||||
fRangeAll = verifyRange(btree, LiteralStringRef(""), LiteralStringRef("\xff\xff"), v, written,
|
||||
pErrorCount);
|
||||
if (deterministicRandom()->coinflip()) {
|
||||
fRangeAll =
|
||||
verifyRange(btree, LiteralStringRef(""), LiteralStringRef("\xff\xff"), v, written, pErrorCount);
|
||||
} else {
|
||||
fRangeAll = verifyRangeBTreeCursor(btree, LiteralStringRef(""), LiteralStringRef("\xff\xff"), v, written,
|
||||
pErrorCount);
|
||||
fRangeAll = verifyRangeBTreeCursor(btree, LiteralStringRef(""), LiteralStringRef("\xff\xff"), v,
|
||||
written, pErrorCount);
|
||||
}
|
||||
if (serial) {
|
||||
wait(success(fRangeAll));
|
||||
|
@ -6140,7 +6113,7 @@ ACTOR Future<Void> verify(VersionedBTree* btree, FutureStream<Version> vStream,
|
|||
Key end = randomKV().key;
|
||||
debug_printf("Verifying range (%s, %s) at version %" PRId64 "\n", toString(begin).c_str(),
|
||||
toString(end).c_str(), v);
|
||||
if(deterministicRandom()->coinflip()) {
|
||||
if (deterministicRandom()->coinflip()) {
|
||||
fRangeRandom = verifyRange(btree, begin, end, v, written, pErrorCount);
|
||||
} else {
|
||||
fRangeRandom = verifyRangeBTreeCursor(btree, begin, end, v, written, pErrorCount);
|
||||
|
@ -6150,7 +6123,7 @@ ACTOR Future<Void> verify(VersionedBTree* btree, FutureStream<Version> vStream,
|
|||
}
|
||||
|
||||
debug_printf("Verifying seeks to each changed key at version %" PRId64 "\n", v);
|
||||
if(deterministicRandom()->coinflip()) {
|
||||
if (deterministicRandom()->coinflip()) {
|
||||
fSeekAll = seekAll(btree, v, written, pErrorCount);
|
||||
} else {
|
||||
fSeekAll = seekAllBTreeCursor(btree, v, written, pErrorCount);
|
||||
|
@ -6991,7 +6964,8 @@ TEST_CASE("!/redwood/correctness/btree") {
|
|||
state int maxKeySize = deterministicRandom()->randomInt(1, pageSize * 2);
|
||||
state int maxValueSize = randomSize(pageSize * 25);
|
||||
state int maxCommitSize = shortTest ? 1000 : randomSize(std::min<int>((maxKeySize + maxValueSize) * 20000, 10e6));
|
||||
state int mutationBytesTarget = shortTest ? 100000 : randomSize(std::min<int>(maxCommitSize * 100, pageSize * 100000));
|
||||
state int mutationBytesTarget =
|
||||
shortTest ? 100000 : randomSize(std::min<int>(maxCommitSize * 100, pageSize * 100000));
|
||||
state double clearProbability = deterministicRandom()->random01() * .1;
|
||||
state double clearSingleKeyProbability = deterministicRandom()->random01();
|
||||
state double clearPostSetProbability = deterministicRandom()->random01() * .1;
|
||||
|
@ -7000,6 +6974,8 @@ TEST_CASE("!/redwood/correctness/btree") {
|
|||
state double maxDuration = 60;
|
||||
state int64_t cacheSizeBytes =
|
||||
pagerMemoryOnly ? 2e9 : (BUGGIFY ? deterministicRandom()->randomInt(1, 10 * pageSize) : 0);
|
||||
state Version versionIncrement = deterministicRandom()->randomInt64(1, 1e8);
|
||||
state Version remapCleanupWindow = deterministicRandom()->randomInt64(0, versionIncrement * 50);
|
||||
|
||||
printf("\n");
|
||||
printf("pagerMemoryOnly: %d\n", pagerMemoryOnly);
|
||||
|
@ -7016,6 +6992,8 @@ TEST_CASE("!/redwood/correctness/btree") {
|
|||
printf("coldStartProbability: %f\n", coldStartProbability);
|
||||
printf("advanceOldVersionProbability: %f\n", advanceOldVersionProbability);
|
||||
printf("cacheSizeBytes: %s\n", cacheSizeBytes == 0 ? "default" : format("%" PRId64, cacheSizeBytes).c_str());
|
||||
printf("versionIncrement: %" PRId64 "\n", versionIncrement);
|
||||
printf("remapCleanupWindow: %" PRId64 "\n", remapCleanupWindow);
|
||||
printf("\n");
|
||||
|
||||
printf("Deleting existing test data...\n");
|
||||
|
@ -7024,7 +7002,7 @@ TEST_CASE("!/redwood/correctness/btree") {
|
|||
printf("Initializing...\n");
|
||||
state double startTime = now();
|
||||
|
||||
pager = new DWALPager(pageSize, pagerFile, cacheSizeBytes, pagerMemoryOnly);
|
||||
pager = new DWALPager(pageSize, pagerFile, cacheSizeBytes, remapCleanupWindow, pagerMemoryOnly);
|
||||
state VersionedBTree* btree = new VersionedBTree(pager, pagerFile);
|
||||
wait(btree->init());
|
||||
|
||||
|
@ -7058,7 +7036,7 @@ TEST_CASE("!/redwood/correctness/btree") {
|
|||
mutationBytesTarget = mutationBytes.get();
|
||||
}
|
||||
|
||||
// Sometimes advance the version
|
||||
// Sometimes increment the version
|
||||
if (deterministicRandom()->random01() < 0.10) {
|
||||
++version;
|
||||
btree->setWriteVersion(version);
|
||||
|
@ -7169,7 +7147,7 @@ TEST_CASE("!/redwood/correctness/btree") {
|
|||
// amount.
|
||||
if (deterministicRandom()->random01() < advanceOldVersionProbability) {
|
||||
btree->setOldestVersion(btree->getLastCommittedVersion() -
|
||||
deterministicRandom()->randomInt(0, btree->getLastCommittedVersion() -
|
||||
deterministicRandom()->randomInt64(0, btree->getLastCommittedVersion() -
|
||||
btree->getOldestVersion() + 1));
|
||||
}
|
||||
|
||||
|
@ -7212,7 +7190,7 @@ TEST_CASE("!/redwood/correctness/btree") {
|
|||
wait(closedFuture);
|
||||
|
||||
printf("Reopening btree from disk.\n");
|
||||
IPager2* pager = new DWALPager(pageSize, pagerFile, 0);
|
||||
IPager2* pager = new DWALPager(pageSize, pagerFile, cacheSizeBytes, remapCleanupWindow);
|
||||
btree = new VersionedBTree(pager, pagerFile);
|
||||
wait(btree->init());
|
||||
|
||||
|
@ -7226,7 +7204,7 @@ TEST_CASE("!/redwood/correctness/btree") {
|
|||
randomTask = randomReader(btree) || btree->getError();
|
||||
}
|
||||
|
||||
++version;
|
||||
version += versionIncrement;
|
||||
btree->setWriteVersion(version);
|
||||
}
|
||||
|
||||
|
@ -7305,7 +7283,7 @@ TEST_CASE("!/redwood/correctness/pager/cow") {
|
|||
deleteFile(pagerFile);
|
||||
|
||||
int pageSize = 4096;
|
||||
state IPager2* pager = new DWALPager(pageSize, pagerFile, 0);
|
||||
state IPager2* pager = new DWALPager(pageSize, pagerFile, 0, 0);
|
||||
|
||||
wait(success(pager->init()));
|
||||
state LogicalPageID id = wait(pager->newPageID());
|
||||
|
@ -7343,10 +7321,6 @@ TEST_CASE("!/redwood/performance/set") {
|
|||
|
||||
state int pageSize = SERVER_KNOBS->REDWOOD_DEFAULT_PAGE_SIZE;
|
||||
state int64_t pageCacheBytes = FLOW_KNOBS->PAGE_CACHE_4K;
|
||||
DWALPager* pager = new DWALPager(pageSize, pagerFile, pageCacheBytes);
|
||||
state VersionedBTree* btree = new VersionedBTree(pager, pagerFile);
|
||||
wait(btree->init());
|
||||
|
||||
state int nodeCount = 1e9;
|
||||
state int maxRecordsPerCommit = 20000;
|
||||
state int maxKVBytesPerCommit = 20e6;
|
||||
|
@ -7359,6 +7333,7 @@ TEST_CASE("!/redwood/performance/set") {
|
|||
state int maxConsecutiveRun = 10;
|
||||
state char firstKeyChar = 'a';
|
||||
state char lastKeyChar = 'm';
|
||||
state Version remapCleanupWindow = SERVER_KNOBS->REDWOOD_REMAP_CLEANUP_WINDOW;
|
||||
|
||||
printf("pageSize: %d\n", pageSize);
|
||||
printf("pageCacheBytes: %" PRId64 "\n", pageCacheBytes);
|
||||
|
@ -7373,6 +7348,11 @@ TEST_CASE("!/redwood/performance/set") {
|
|||
printf("maxCommitSize: %d\n", maxKVBytesPerCommit);
|
||||
printf("kvBytesTarget: %" PRId64 "\n", kvBytesTarget);
|
||||
printf("KeyLexicon '%c' to '%c'\n", firstKeyChar, lastKeyChar);
|
||||
printf("remapCleanupWindow: %" PRId64 "\n", remapCleanupWindow);
|
||||
|
||||
DWALPager* pager = new DWALPager(pageSize, pagerFile, pageCacheBytes, remapCleanupWindow);
|
||||
state VersionedBTree* btree = new VersionedBTree(pager, pagerFile);
|
||||
wait(btree->init());
|
||||
|
||||
state int64_t kvBytesThisCommit = 0;
|
||||
state int64_t kvBytesTotal = 0;
|
||||
|
|
|
@ -716,7 +716,7 @@ ACTOR Future<Void> storageServer(IKeyValueStore* persistentData, StorageServerIn
|
|||
Reference<AsyncVar<ServerDBInfo>> db, std::string folder,
|
||||
Promise<Void> recovered,
|
||||
Reference<ClusterConnectionFile> connFile ); // changes pssi->id() to be the recovered ID); // changes pssi->id() to be the recovered ID
|
||||
ACTOR Future<Void> masterServer(MasterInterface mi, Reference<AsyncVar<ServerDBInfo>> db,
|
||||
ACTOR Future<Void> masterServer(MasterInterface mi, Reference<AsyncVar<ServerDBInfo>> db, Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
|
||||
ServerCoordinators serverCoordinators, LifetimeToken lifetime, bool forceRecovery);
|
||||
ACTOR Future<Void> masterProxyServer(MasterProxyInterface proxy, InitializeMasterProxyRequest req,
|
||||
Reference<AsyncVar<ServerDBInfo>> db, std::string whitelistBinPaths);
|
||||
|
|
|
@ -1968,7 +1968,8 @@ int main(int argc, char* argv[]) {
|
|||
<< FastAllocator<1024>::pageCount << " "
|
||||
<< FastAllocator<2048>::pageCount << " "
|
||||
<< FastAllocator<4096>::pageCount << " "
|
||||
<< FastAllocator<8192>::pageCount << std::endl;
|
||||
<< FastAllocator<8192>::pageCount << " "
|
||||
<< FastAllocator<16384>::pageCount << std::endl;
|
||||
|
||||
vector< std::pair<std::string, const char*> > typeNames;
|
||||
for( auto i = allocInstr.begin(); i != allocInstr.end(); ++i ) {
|
||||
|
|
|
@ -1650,8 +1650,19 @@ ACTOR Future<Void> masterCore( Reference<MasterData> self ) {
|
|||
throw internal_error();
|
||||
}
|
||||
|
||||
ACTOR Future<Void> masterServer( MasterInterface mi, Reference<AsyncVar<ServerDBInfo>> db, ServerCoordinators coordinators, LifetimeToken lifetime, bool forceRecovery )
|
||||
ACTOR Future<Void> masterServer( MasterInterface mi, Reference<AsyncVar<ServerDBInfo>> db, Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface, ServerCoordinators coordinators, LifetimeToken lifetime, bool forceRecovery )
|
||||
{
|
||||
state Future<Void> ccTimeout = delay(SERVER_KNOBS->CC_INTERFACE_TIMEOUT);
|
||||
while(!ccInterface->get().present() || db->get().clusterInterface != ccInterface->get().get()) {
|
||||
wait(ccInterface->onChange() || db->onChange() || ccTimeout);
|
||||
if(ccTimeout.isReady()) {
|
||||
TraceEvent("MasterTerminated", mi.id()).detail("Reason", "Timeout")
|
||||
.detail("CCInterface", ccInterface->get().present() ? ccInterface->get().get().id() : UID())
|
||||
.detail("DBInfoInterface", db->get().clusterInterface.id());
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
|
||||
state Future<Void> onDBChange = Void();
|
||||
state PromiseStream<Future<Void>> addActor;
|
||||
state Reference<MasterData> self( new MasterData( db, mi, coordinators, db->get().clusterInterface, LiteralStringRef(""), addActor, forceRecovery ) );
|
||||
|
|
|
@ -21,6 +21,9 @@
|
|||
#include "fdbserver/NetworkTest.h"
|
||||
#include "flow/Knobs.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
#include "flow/ActorCollection.h"
|
||||
#include "flow/UnitTest.h"
|
||||
#include <inttypes.h>
|
||||
|
||||
UID WLTOKEN_NETWORKTEST( -1, 2 );
|
||||
|
||||
|
@ -233,3 +236,338 @@ ACTOR Future<Void> networkTestClient( std:: string testServers ) {
|
|||
wait( waitForAll( clients ) );
|
||||
return Void();
|
||||
}
|
||||
|
||||
struct RandomIntRange {
|
||||
int min;
|
||||
int max;
|
||||
|
||||
RandomIntRange(int low = 0, int high = 0) : min(low), max(high) {
|
||||
}
|
||||
|
||||
// Accepts strings of the form "min:max" or "N"
|
||||
// where N will be used for both min and max
|
||||
RandomIntRange(std::string str) {
|
||||
StringRef high = str;
|
||||
StringRef low = high.eat(":");
|
||||
if(high.size() == 0) {
|
||||
high = low;
|
||||
}
|
||||
min = low.size() == 0 ? 0 : atol(low.toString().c_str());
|
||||
max = high.size() == 0 ? 0 : atol(high.toString().c_str());
|
||||
if(min > max) {
|
||||
std::swap(min, max);
|
||||
}
|
||||
}
|
||||
|
||||
int get() const {
|
||||
return (max == 0) ? 0 : nondeterministicRandom()->randomInt(min, max + 1);
|
||||
}
|
||||
|
||||
std::string toString() const {
|
||||
return format("%d:%d", min, max);
|
||||
}
|
||||
};
|
||||
|
||||
struct P2PNetworkTest {
|
||||
// Addresses to listen on
|
||||
std::vector<Reference<IListener>> listeners;
|
||||
// Addresses to randomly connect to
|
||||
std::vector<NetworkAddress> remotes;
|
||||
// Number of outgoing connections to maintain
|
||||
int connectionsOut;
|
||||
// Message size range to send on outgoing established connections
|
||||
RandomIntRange requestBytes;
|
||||
// Message size to reply with on incoming established connections
|
||||
RandomIntRange replyBytes;
|
||||
// Number of requests/replies per session
|
||||
RandomIntRange requests;
|
||||
// Delay after message send and receive are complete before closing connection
|
||||
RandomIntRange idleMilliseconds;
|
||||
// Random delay before socket reads
|
||||
RandomIntRange waitReadMilliseconds;
|
||||
// Random delay before socket writes
|
||||
RandomIntRange waitWriteMilliseconds;
|
||||
|
||||
double startTime;
|
||||
int64_t bytesSent;
|
||||
int64_t bytesReceived;
|
||||
int sessionsIn;
|
||||
int sessionsOut;
|
||||
int connectErrors;
|
||||
int acceptErrors;
|
||||
int sessionErrors;
|
||||
|
||||
Standalone<StringRef> msgBuffer;
|
||||
|
||||
std::string statsString() {
|
||||
double elapsed = now() - startTime;
|
||||
std::string s = format("%.2f MB/s bytes in %.2f MB/s bytes out %.2f/s completed sessions in %.2f/s completed sessions out ",
|
||||
bytesReceived / elapsed / 1e6, bytesSent / elapsed / 1e6, sessionsIn / elapsed, sessionsOut / elapsed);
|
||||
s += format("Total Errors %d connect=%d accept=%d session=%d",
|
||||
connectErrors + acceptErrors + sessionErrors, connectErrors, acceptErrors, sessionErrors);
|
||||
bytesSent = 0;
|
||||
bytesReceived = 0;
|
||||
sessionsIn = 0;
|
||||
sessionsOut = 0;
|
||||
startTime = now();
|
||||
return s;
|
||||
}
|
||||
|
||||
P2PNetworkTest() {}
|
||||
|
||||
P2PNetworkTest(std::string listenerAddresses, std::string remoteAddresses, int connectionsOut, RandomIntRange sendMsgBytes, RandomIntRange recvMsgBytes, RandomIntRange requests, RandomIntRange idleMilliseconds, RandomIntRange waitReadMilliseconds, RandomIntRange waitWriteMilliseconds)
|
||||
: connectionsOut(connectionsOut), requestBytes(sendMsgBytes), replyBytes(recvMsgBytes), requests(requests), idleMilliseconds(idleMilliseconds), waitReadMilliseconds(waitReadMilliseconds), waitWriteMilliseconds(waitWriteMilliseconds) {
|
||||
bytesSent = 0;
|
||||
bytesReceived = 0;
|
||||
sessionsIn = 0;
|
||||
sessionsOut = 0;
|
||||
connectErrors = 0;
|
||||
acceptErrors = 0;
|
||||
sessionErrors = 0;
|
||||
msgBuffer = makeString(std::max(sendMsgBytes.max, recvMsgBytes.max));
|
||||
|
||||
if(!remoteAddresses.empty()) {
|
||||
remotes = NetworkAddress::parseList(remoteAddresses);
|
||||
}
|
||||
|
||||
if(!listenerAddresses.empty()) {
|
||||
for(auto a : NetworkAddress::parseList(listenerAddresses)) {
|
||||
listeners.push_back(INetworkConnections::net()->listen(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NetworkAddress randomRemote() {
|
||||
return remotes[nondeterministicRandom()->randomInt(0, remotes.size())];
|
||||
}
|
||||
|
||||
ACTOR static Future<Standalone<StringRef>> readMsg(P2PNetworkTest *self, Reference<IConnection> conn) {
|
||||
state Standalone<StringRef> buffer = makeString(sizeof(int));
|
||||
state int writeOffset = 0;
|
||||
state bool gotHeader = false;
|
||||
|
||||
// Fill buffer sequentially until the initial bytesToRead is read (or more), then read
|
||||
// intended message size and add it to bytesToRead, continue if needed until bytesToRead is 0.
|
||||
loop {
|
||||
int stutter = self->waitReadMilliseconds.get();
|
||||
if(stutter > 0) {
|
||||
wait(delay(stutter / 1e3));
|
||||
}
|
||||
|
||||
int len = conn->read((uint8_t *)buffer.begin() + writeOffset, (uint8_t *)buffer.end());
|
||||
writeOffset += len;
|
||||
self->bytesReceived += len;
|
||||
|
||||
// If buffer is complete, either process it as a header or return it
|
||||
if(writeOffset == buffer.size()) {
|
||||
if(gotHeader) {
|
||||
return buffer;
|
||||
} else {
|
||||
gotHeader = true;
|
||||
int msgSize = *(int *)buffer.begin();
|
||||
if(msgSize == 0) {
|
||||
return Standalone<StringRef>();
|
||||
}
|
||||
buffer = makeString(msgSize);
|
||||
writeOffset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if(len == 0) {
|
||||
wait(conn->onReadable());
|
||||
wait( delay( 0, TaskPriority::ReadSocket ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> writeMsg(P2PNetworkTest *self, Reference<IConnection> conn, StringRef msg) {
|
||||
state UnsentPacketQueue packets;
|
||||
PacketWriter writer(packets.getWriteBuffer(msg.size()), nullptr, Unversioned());
|
||||
writer.serializeBinaryItem((int)msg.size());
|
||||
writer.serializeBytes(msg);
|
||||
|
||||
loop {
|
||||
int stutter = self->waitWriteMilliseconds.get();
|
||||
if(stutter > 0) {
|
||||
wait(delay(stutter / 1e3));
|
||||
}
|
||||
int sent = conn->write(packets.getUnsent(), FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
|
||||
|
||||
if(sent != 0) {
|
||||
self->bytesSent += sent;
|
||||
packets.sent(sent);
|
||||
}
|
||||
|
||||
if(packets.empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
wait(conn->onWritable());
|
||||
wait(yield(TaskPriority::WriteSocket));
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> doSession(P2PNetworkTest *self, Reference<IConnection> conn, bool incoming) {
|
||||
state int numRequests;
|
||||
|
||||
try {
|
||||
if(incoming) {
|
||||
wait(conn->acceptHandshake());
|
||||
|
||||
// Read the number of requests for the session
|
||||
Standalone<StringRef> buf = wait(readMsg(self, conn));
|
||||
ASSERT(buf.size() == sizeof(int));
|
||||
numRequests = *(int *)buf.begin();
|
||||
} else {
|
||||
wait(conn->connectHandshake());
|
||||
|
||||
// Pick the number of requests for the session and send it to remote
|
||||
numRequests = self->requests.get();
|
||||
wait(writeMsg(self, conn, StringRef((const uint8_t *)&numRequests, sizeof(int))));
|
||||
}
|
||||
|
||||
while(numRequests > 0) {
|
||||
if(incoming) {
|
||||
// Wait for a request
|
||||
wait(success(readMsg(self, conn)));
|
||||
// Send a reply
|
||||
wait(writeMsg(self, conn, self->msgBuffer.substr(0, self->replyBytes.get())));
|
||||
}
|
||||
else {
|
||||
// Send a request
|
||||
wait(writeMsg(self, conn, self->msgBuffer.substr(0, self->requestBytes.get())));
|
||||
// Wait for a reply
|
||||
wait(success(readMsg(self, conn)));
|
||||
}
|
||||
|
||||
if(--numRequests == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
wait(delay(self->idleMilliseconds.get() / 1e3));
|
||||
conn->close();
|
||||
|
||||
if(incoming) {
|
||||
++self->sessionsIn;
|
||||
} else {
|
||||
++self->sessionsOut;
|
||||
}
|
||||
} catch(Error &e) {
|
||||
++self->sessionErrors;
|
||||
TraceEvent(SevError, incoming ? "P2PIncomingSessionError" : "P2POutgoingSessionError")
|
||||
.detail("Remote", conn->getPeerAddress())
|
||||
.error(e);
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> outgoing(P2PNetworkTest *self) {
|
||||
loop {
|
||||
wait(delay(0, TaskPriority::WriteSocket));
|
||||
state NetworkAddress remote = self->randomRemote();
|
||||
|
||||
try {
|
||||
state Reference<IConnection> conn = wait(INetworkConnections::net()->connect(remote));
|
||||
//printf("Connected to %s\n", remote.toString().c_str());
|
||||
wait(doSession(self, conn, false));
|
||||
} catch(Error &e) {
|
||||
++self->connectErrors;
|
||||
TraceEvent(SevError, "P2POutgoingError")
|
||||
.detail("Remote", remote)
|
||||
.error(e);
|
||||
wait(delay(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> incoming(P2PNetworkTest *self, Reference<IListener> listener) {
|
||||
state ActorCollection sessions(false);
|
||||
|
||||
loop {
|
||||
wait(delay(0, TaskPriority::AcceptSocket));
|
||||
|
||||
try {
|
||||
state Reference<IConnection> conn = wait(listener->accept());
|
||||
//printf("Connected from %s\n", conn->getPeerAddress().toString().c_str());
|
||||
sessions.add(doSession(self, conn, true));
|
||||
} catch(Error &e) {
|
||||
++self->acceptErrors;
|
||||
TraceEvent(SevError, "P2PIncomingError")
|
||||
.detail("Listener", listener->getListenAddress())
|
||||
.error(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> run_impl(P2PNetworkTest *self) {
|
||||
state ActorCollection actors(false);
|
||||
|
||||
self->startTime = now();
|
||||
|
||||
printf("%d listeners, %d remotes, %d outgoing connections\n", self->listeners.size(), self->remotes.size(), self->connectionsOut);
|
||||
printf("Request size: %s\n", self->requestBytes.toString().c_str());
|
||||
printf("Response size: %s\n", self->replyBytes.toString().c_str());
|
||||
printf("Requests per outgoing session: %d\n", self->requests.toString().c_str());
|
||||
printf("Delay before socket read: %s\n", self->waitReadMilliseconds.toString().c_str());
|
||||
printf("Delay before socket write: %s\n", self->waitWriteMilliseconds.toString().c_str());
|
||||
printf("Delay before session close: %s\n", self->idleMilliseconds.toString().c_str());
|
||||
printf("Send/Recv size %d bytes\n", FLOW_KNOBS->MAX_PACKET_SEND_BYTES);
|
||||
|
||||
for(auto n : self->remotes) {
|
||||
printf("Remote: %s\n", n.toString().c_str());
|
||||
}
|
||||
|
||||
for(auto el : self->listeners) {
|
||||
printf("Listener: %s\n", el->getListenAddress().toString().c_str());
|
||||
actors.add(incoming(self, el));
|
||||
}
|
||||
|
||||
if(!self->remotes.empty()) {
|
||||
for(int i = 0; i < self->connectionsOut; ++i) {
|
||||
actors.add(outgoing(self));
|
||||
}
|
||||
}
|
||||
|
||||
loop {
|
||||
wait(delay(1.0, TaskPriority::Max));
|
||||
printf("%s\n", self->statsString().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
Future<Void> run() {
|
||||
return run_impl(this);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
int getEnvInt(const char *name, int defaultValue = 0) {
|
||||
const char *val = getenv(name);
|
||||
return val != nullptr ? atol(val) : defaultValue;
|
||||
}
|
||||
|
||||
std::string getEnvStr(const char *name, std::string defaultValue = "") {
|
||||
const char *val = getenv(name);
|
||||
return val != nullptr ? val : defaultValue;
|
||||
}
|
||||
|
||||
// TODO: Remove this hacky thing and make a "networkp2ptest" role in fdbserver
|
||||
TEST_CASE("!p2ptest") {
|
||||
state P2PNetworkTest p2p(
|
||||
getEnvStr("listenerAddresses", ""),
|
||||
getEnvStr("remoteAddresses", ""),
|
||||
getEnvInt("connectionsOut", 0),
|
||||
getEnvStr("requestBytes", "0"),
|
||||
getEnvStr("replyBytes", "0"),
|
||||
getEnvStr("requests", "0"),
|
||||
getEnvStr("idleMilliseconds", "0"),
|
||||
getEnvStr("waitReadMilliseconds", "0"),
|
||||
getEnvStr("waitWriteMilliseconds", "0")
|
||||
);
|
||||
|
||||
wait(p2p.run());
|
||||
return Void();
|
||||
}
|
||||
|
|
|
@ -182,7 +182,7 @@ private:
|
|||
struct StorageServer* data;
|
||||
IKeyValueStore* storage;
|
||||
|
||||
void writeMutations( MutationListRef mutations, Version debugVersion, const char* debugContext );
|
||||
void writeMutations(const VectorRef<MutationRef>& mutations, Version debugVersion, const char* debugContext);
|
||||
|
||||
ACTOR static Future<Key> readFirstKey( IKeyValueStore* storage, KeyRangeRef range ) {
|
||||
Standalone<RangeResultRef> r = wait( storage->readRange( range, 1 ) );
|
||||
|
@ -246,8 +246,12 @@ struct UpdateEagerReadInfo {
|
|||
}
|
||||
};
|
||||
|
||||
const int VERSION_OVERHEAD = 64 + sizeof(Version) + sizeof(Standalone<VersionUpdateRef>) + //mutationLog, 64b overhead for map
|
||||
2 * (64 + sizeof(Version) + sizeof(Reference<VersionedMap<KeyRef, ValueOrClearToRef>::PTreeT>)); //versioned map [ x2 for createNewVersion(version+1) ], 64b overhead for map
|
||||
const int VERSION_OVERHEAD =
|
||||
64 + sizeof(Version) + sizeof(Standalone<VerUpdateRef>) + // mutationLog, 64b overhead for map
|
||||
2 * (64 + sizeof(Version) +
|
||||
sizeof(Reference<VersionedMap<KeyRef, ValueOrClearToRef>::PTreeT>)); // versioned map [ x2 for
|
||||
// createNewVersion(version+1) ], 64b
|
||||
// overhead for map
|
||||
static int mvccStorageBytes( MutationRef const& m ) { return VersionedMap<KeyRef, ValueOrClearToRef>::overheadPerItem * 2 + (MutationRef::OVERHEAD_BYTES + m.param1.size() + m.param2.size()) * 2; }
|
||||
|
||||
struct FetchInjectionInfo {
|
||||
|
@ -281,7 +285,7 @@ private:
|
|||
// at older versions may contain older items which are also in storage (this is OK because of idempotency)
|
||||
|
||||
VersionedData versionedData;
|
||||
std::map<Version, Standalone<VersionUpdateRef>> mutationLog; // versions (durableVersion, version]
|
||||
std::map<Version, Standalone<VerUpdateRef>> mutationLog; // versions (durableVersion, version]
|
||||
|
||||
public:
|
||||
Tag tag;
|
||||
|
@ -293,8 +297,8 @@ public:
|
|||
double cpuUsage;
|
||||
double diskUsage;
|
||||
|
||||
std::map<Version, Standalone<VersionUpdateRef>> const& getMutationLog() const { return mutationLog; }
|
||||
std::map<Version, Standalone<VersionUpdateRef>>& getMutableMutationLog() { return mutationLog; }
|
||||
std::map<Version, Standalone<VerUpdateRef>> const& getMutationLog() const { return mutationLog; }
|
||||
std::map<Version, Standalone<VerUpdateRef>>& getMutableMutationLog() { return mutationLog; }
|
||||
VersionedData const& data() const { return versionedData; }
|
||||
VersionedData& mutableData() { return versionedData; }
|
||||
|
||||
|
@ -352,7 +356,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
Standalone<VersionUpdateRef>& addVersionToMutationLog(Version v) {
|
||||
Standalone<VerUpdateRef>& addVersionToMutationLog(Version v) {
|
||||
// return existing version...
|
||||
auto m = mutationLog.find(v);
|
||||
if (m != mutationLog.end())
|
||||
|
@ -367,10 +371,10 @@ public:
|
|||
return u;
|
||||
}
|
||||
|
||||
MutationRef addMutationToMutationLog(Standalone<VersionUpdateRef> &mLV, MutationRef const& m){
|
||||
MutationRef addMutationToMutationLog(Standalone<VerUpdateRef>& mLV, MutationRef const& m) {
|
||||
byteSampleApplyMutation(m, mLV.version);
|
||||
counters.bytesInput += mvccStorageBytes(m);
|
||||
return mLV.mutations.push_back_deep( mLV.arena(), m );
|
||||
return mLV.push_back_deep(mLV.arena(), m);
|
||||
}
|
||||
|
||||
StorageServerDisk storage;
|
||||
|
@ -1782,18 +1786,18 @@ bool changeDurableVersion( StorageServer* data, Version desiredDurableVersion )
|
|||
verData.createNewVersion( data->version.get()+1 );
|
||||
|
||||
int64_t bytesDurable = VERSION_OVERHEAD;
|
||||
for(auto m = v.mutations.begin(); m; ++m) {
|
||||
bytesDurable += mvccStorageBytes(*m);
|
||||
auto i = verData.atLatest().find(m->param1);
|
||||
for (const auto& m : v.mutations) {
|
||||
bytesDurable += mvccStorageBytes(m);
|
||||
auto i = verData.atLatest().find(m.param1);
|
||||
if (i) {
|
||||
ASSERT( i.key() == m->param1 );
|
||||
ASSERT(i.key() == m.param1);
|
||||
ASSERT( i.insertVersion() >= nextDurableVersion );
|
||||
if (i.insertVersion() == nextDurableVersion)
|
||||
verData.erase(i);
|
||||
}
|
||||
if (m->type == MutationRef::SetValue) {
|
||||
if (m.type == MutationRef::SetValue) {
|
||||
// A set can split a clear, so there might be another entry immediately after this one that should also be cleaned up
|
||||
i = verData.atLatest().upper_bound(m->param1);
|
||||
i = verData.atLatest().upper_bound(m.param1);
|
||||
if (i) {
|
||||
ASSERT( i.insertVersion() >= nextDurableVersion );
|
||||
if (i.insertVersion() == nextDurableVersion)
|
||||
|
@ -1965,7 +1969,8 @@ void applyMutation( StorageServer *self, MutationRef const& m, Arena& arena, Sto
|
|||
|
||||
}
|
||||
|
||||
void removeDataRange( StorageServer *ss, Standalone<VersionUpdateRef> &mLV, KeyRangeMap<Reference<ShardInfo>>& shards, KeyRangeRef range ) {
|
||||
void removeDataRange(StorageServer* ss, Standalone<VerUpdateRef>& mLV, KeyRangeMap<Reference<ShardInfo>>& shards,
|
||||
KeyRangeRef range) {
|
||||
// modify the latest version of data to remove all sets and trim all clears to exclude range.
|
||||
// Add a clear to mLV (mutationLog[data.getLatestVersion()]) that ensures all keys in range are removed from the disk when this latest version becomes durable
|
||||
// mLV is also modified if necessary to ensure that split clears can be forgotten
|
||||
|
@ -2610,7 +2615,7 @@ void StorageServer::addMutation(Version version, MutationRef const& mutation, Ke
|
|||
}
|
||||
|
||||
struct OrderByVersion {
|
||||
bool operator()( const VersionUpdateRef& a, const VersionUpdateRef& b ) {
|
||||
bool operator()(const VerUpdateRef& a, const VerUpdateRef& b) {
|
||||
if (a.version != b.version) return a.version < b.version;
|
||||
if (a.isPrivateData != b.isPrivateData) return a.isPrivateData;
|
||||
return false;
|
||||
|
@ -3209,13 +3214,14 @@ void StorageServerDisk::writeMutation( MutationRef mutation ) {
|
|||
ASSERT(false);
|
||||
}
|
||||
|
||||
void StorageServerDisk::writeMutations( MutationListRef mutations, Version debugVersion, const char* debugContext ) {
|
||||
for(auto m = mutations.begin(); m; ++m) {
|
||||
DEBUG_MUTATION(debugContext, debugVersion, *m).detail("UID", data->thisServerID);
|
||||
if (m->type == MutationRef::SetValue) {
|
||||
storage->set( KeyValueRef(m->param1, m->param2) );
|
||||
} else if (m->type == MutationRef::ClearRange) {
|
||||
storage->clear( KeyRangeRef(m->param1, m->param2) );
|
||||
void StorageServerDisk::writeMutations(const VectorRef<MutationRef>& mutations, Version debugVersion,
|
||||
const char* debugContext) {
|
||||
for (const auto& m : mutations) {
|
||||
DEBUG_MUTATION(debugContext, debugVersion, m).detail("UID", data->thisServerID);
|
||||
if (m.type == MutationRef::SetValue) {
|
||||
storage->set(KeyValueRef(m.param1, m.param2));
|
||||
} else if (m.type == MutationRef::ClearRange) {
|
||||
storage->clear(KeyRangeRef(m.param1, m.param2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3226,13 +3232,12 @@ bool StorageServerDisk::makeVersionMutationsDurable( Version& prevStorageVersion
|
|||
// Apply mutations from the mutationLog
|
||||
auto u = data->getMutationLog().upper_bound(prevStorageVersion);
|
||||
if (u != data->getMutationLog().end() && u->first <= newStorageVersion) {
|
||||
VersionUpdateRef const& v = u->second;
|
||||
VerUpdateRef const& v = u->second;
|
||||
ASSERT( v.version > prevStorageVersion && v.version <= newStorageVersion );
|
||||
// TODO(alexmiller): Update to version tracking.
|
||||
DEBUG_KEY_RANGE("makeVersionMutationsDurable", v.version, KeyRangeRef());
|
||||
writeMutations(v.mutations, v.version, "makeVersionDurable");
|
||||
for(auto m=v.mutations.begin(); m; ++m)
|
||||
bytesLeft -= mvccStorageBytes(*m);
|
||||
for (const auto& m : v.mutations) bytesLeft -= mvccStorageBytes(m);
|
||||
prevStorageVersion = v.version;
|
||||
return false;
|
||||
} else {
|
||||
|
|
|
@ -1221,7 +1221,7 @@ ACTOR Future<Void> workerServer(
|
|||
DUMPTOKEN( recruited.notifyBackupWorkerDone);
|
||||
|
||||
//printf("Recruited as masterServer\n");
|
||||
Future<Void> masterProcess = masterServer( recruited, dbInfo, ServerCoordinators( connFile ), req.lifetime, req.forceRecovery );
|
||||
Future<Void> masterProcess = masterServer( recruited, dbInfo, ccInterface, ServerCoordinators( connFile ), req.lifetime, req.forceRecovery );
|
||||
errorForwarders.add( zombie(recruited, forwardError( errors, Role::MASTER, recruited.id(), masterProcess )) );
|
||||
req.reply.send(recruited);
|
||||
}
|
||||
|
|
|
@ -27,15 +27,7 @@
|
|||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
//An enum of API operation types used in the random test
|
||||
enum OperationType {
|
||||
SET,
|
||||
GET,
|
||||
GET_RANGE,
|
||||
GET_RANGE_SELECTOR,
|
||||
GET_KEY,
|
||||
CLEAR,
|
||||
CLEAR_RANGE
|
||||
};
|
||||
enum OperationType { SET, GET, GET_RANGE, GET_RANGE_SELECTOR, GET_KEY, CLEAR, CLEAR_RANGE, UNINITIALIZED };
|
||||
|
||||
//A workload that executes the NativeAPIs functions and verifies that their outcomes are correct
|
||||
struct ApiCorrectnessWorkload : ApiWorkload {
|
||||
|
@ -230,7 +222,7 @@ public:
|
|||
int pdfArray[] = { 0, (int)(100 * setProbability), 100, 50, 50, 20, (int)(100 * (1 - setProbability)), (int)(10 * (1 - setProbability)) };
|
||||
vector<int> pdf = vector<int>(pdfArray, pdfArray + 8);
|
||||
|
||||
OperationType operation;
|
||||
OperationType operation = UNINITIALIZED;
|
||||
|
||||
//Choose a random operation type (SET, GET, GET_RANGE, GET_RANGE_SELECTOR, GET_KEY, CLEAR, CLEAR_RANGE).
|
||||
int totalDensity = 0;
|
||||
|
@ -247,6 +239,7 @@ public:
|
|||
|
||||
cumulativeDensity += pdf[i];
|
||||
}
|
||||
ASSERT(operation != UNINITIALIZED);
|
||||
|
||||
++self->numRandomOperations;
|
||||
|
||||
|
|
|
@ -300,18 +300,14 @@ struct AsyncFileCorrectnessWorkload : public AsyncFileWorkload
|
|||
{
|
||||
int64_t maxOffset;
|
||||
|
||||
//Reads should not exceed the extent of written data
|
||||
if(info.operation == READ)
|
||||
{
|
||||
// Reads should not exceed the extent of written data
|
||||
if (info.operation == READ) {
|
||||
maxOffset = fileSize - 1;
|
||||
if(maxOffset < 0)
|
||||
info.operation = WRITE;
|
||||
if (maxOffset < 0) info.operation = WRITE;
|
||||
// Only allow reads once the file has gotten large enough (to prevent blocking on locks)
|
||||
if (maxOffset < targetFileSize / 2) info.operation = WRITE;
|
||||
}
|
||||
|
||||
//Only allow reads once the file has gotten large enough (to prevent blocking on locks)
|
||||
if(maxOffset < targetFileSize / 2)
|
||||
info.operation = WRITE;
|
||||
|
||||
//Writes can be up to the target file size or the current file size (the current file size could be larger than the target as a result of a truncate)
|
||||
if(info.operation == WRITE)
|
||||
maxOffset = std::max(fileSize, targetFileSize) - 1;
|
||||
|
|
|
@ -495,6 +495,9 @@ struct BackupToDBCorrectnessWorkload : TestWorkload {
|
|||
state Transaction tr3(cx);
|
||||
loop {
|
||||
try {
|
||||
// Run on the first proxy to ensure data is cleared
|
||||
// when submitting the backup request below.
|
||||
tr3.setOption(FDBTransactionOptions::COMMIT_ON_FIRST_PROXY);
|
||||
for (auto r : self->backupRanges) {
|
||||
if(!r.empty()) {
|
||||
tr3.addReadConflictRange(r);
|
||||
|
|
|
@ -0,0 +1,170 @@
|
|||
/*
|
||||
* Downgrade.actor.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/NativeAPI.actor.h"
|
||||
#include "fdbserver/TesterInterface.actor.h"
|
||||
#include "fdbserver/workloads/workloads.actor.h"
|
||||
#include "flow/serialize.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
struct DowngradeWorkload : TestWorkload {
|
||||
|
||||
static constexpr const char* NAME = "Downgrade";
|
||||
Key oldKey, newKey;
|
||||
int numObjects;
|
||||
|
||||
DowngradeWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) {
|
||||
oldKey = getOption(options, LiteralStringRef("oldKey"), LiteralStringRef("oldKey"));
|
||||
newKey = getOption(options, LiteralStringRef("newKey"), LiteralStringRef("newKey"));
|
||||
numObjects = getOption(options, LiteralStringRef("numOptions"), deterministicRandom()->randomInt(0,100));
|
||||
}
|
||||
|
||||
struct _Struct {
|
||||
static constexpr FileIdentifier file_identifier = 2340487;
|
||||
int oldField = 0;
|
||||
};
|
||||
|
||||
struct OldStruct : public _Struct {
|
||||
void setFields() { oldField = 1; }
|
||||
bool isSet() const { return oldField == 1; }
|
||||
|
||||
template <class Archive>
|
||||
void serialize(Archive& ar) {
|
||||
serializer(ar, oldField);
|
||||
}
|
||||
};
|
||||
|
||||
struct NewStruct : public _Struct {
|
||||
int newField = 0;
|
||||
|
||||
bool isSet() const {
|
||||
return oldField == 1 && newField == 2;
|
||||
}
|
||||
void setFields() {
|
||||
oldField = 1;
|
||||
newField = 2;
|
||||
}
|
||||
|
||||
template <class Archive>
|
||||
void serialize(Archive& ar) {
|
||||
serializer(ar, oldField, newField);
|
||||
}
|
||||
};
|
||||
|
||||
ACTOR static Future<Void> writeOld(Database cx, int numObjects, Key key) {
|
||||
BinaryWriter writer(IncludeVersion(currentProtocolVersion));
|
||||
std::vector<OldStruct> data(numObjects);
|
||||
for (auto& oldObject : data) {
|
||||
oldObject.setFields();
|
||||
}
|
||||
writer << data;
|
||||
state Value value = writer.toValue();
|
||||
|
||||
state Transaction tr(cx);
|
||||
loop {
|
||||
try {
|
||||
tr.set(key, value);
|
||||
wait(tr.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> writeNew(Database cx, int numObjects, Key key) {
|
||||
ProtocolVersion protocolVersion = currentProtocolVersion;
|
||||
protocolVersion.addObjectSerializerFlag();
|
||||
ObjectWriter writer(IncludeVersion(protocolVersion));
|
||||
std::vector<NewStruct> data(numObjects);
|
||||
for (auto& newObject : data) {
|
||||
newObject.setFields();
|
||||
}
|
||||
writer.serialize(data);
|
||||
state Value value = writer.toStringRef();
|
||||
|
||||
state Transaction tr(cx);
|
||||
loop {
|
||||
try {
|
||||
tr.set(key, value);
|
||||
wait(tr.commit());
|
||||
return Void();
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR static Future<Void> readData(Database cx, int numObjects, Key key) {
|
||||
state Transaction tr(cx);
|
||||
state Value value;
|
||||
|
||||
loop {
|
||||
try {
|
||||
Optional<Value> _value = wait(tr.get(key));
|
||||
ASSERT(_value.present());
|
||||
value = _value.get();
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// use BinaryReader
|
||||
BinaryReader reader(value, IncludeVersion());
|
||||
std::vector<OldStruct> data;
|
||||
reader >> data;
|
||||
ASSERT(data.size() == numObjects);
|
||||
for (const auto& oldObject : data) {
|
||||
ASSERT(oldObject.isSet());
|
||||
}
|
||||
}
|
||||
{
|
||||
// use ArenaReader
|
||||
ArenaReader reader(Arena(), value, IncludeVersion());
|
||||
std::vector<OldStruct> data;
|
||||
reader >> data;
|
||||
ASSERT(data.size() == numObjects);
|
||||
for (const auto& oldObject : data) {
|
||||
ASSERT(oldObject.isSet());
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
std::string description() override { return NAME; }
|
||||
|
||||
Future<Void> setup(Database const& cx) override {
|
||||
return clientId ? Void() : (writeOld(cx, numObjects, oldKey) && writeNew(cx, numObjects, newKey));
|
||||
}
|
||||
|
||||
Future<Void> start(Database const& cx) override {
|
||||
return clientId ? Void() : (readData(cx, numObjects, oldKey) && readData(cx, numObjects, newKey));
|
||||
}
|
||||
|
||||
Future<bool> check(Database const& cx) override {
|
||||
// Failures are checked with assertions
|
||||
return true;
|
||||
}
|
||||
void getMetrics(vector<PerfMetric>& m) override {}
|
||||
};
|
||||
|
||||
WorkloadFactory<DowngradeWorkload> DowngradeWorkloadFactory(DowngradeWorkload::NAME);
|
|
@ -123,7 +123,7 @@ struct ReportConflictingKeysWorkload : TestWorkload {
|
|||
} while (deterministicRandom()->random01() < addWriteConflictRangeProb);
|
||||
}
|
||||
|
||||
void emptyConflictingKeysTest(Reference<ReadYourWritesTransaction> ryw) {
|
||||
void emptyConflictingKeysTest(const Reference<ReadYourWritesTransaction>& ryw) {
|
||||
// This test is called when you want to make sure there is no conflictingKeys,
|
||||
// which means you will get an empty result form getRange(\xff\xff/transaction/conflicting_keys/,
|
||||
// \xff\xff/transaction/conflicting_keys0)
|
||||
|
@ -134,42 +134,42 @@ struct ReportConflictingKeysWorkload : TestWorkload {
|
|||
|
||||
ACTOR Future<Void> conflictingClient(Database cx, ReportConflictingKeysWorkload* self) {
|
||||
|
||||
state ReadYourWritesTransaction tr1(cx);
|
||||
state ReadYourWritesTransaction tr2(cx);
|
||||
state Reference<ReadYourWritesTransaction> tr1(new ReadYourWritesTransaction(cx));
|
||||
state Reference<ReadYourWritesTransaction> tr2(new ReadYourWritesTransaction(cx));
|
||||
state std::vector<KeyRange> readConflictRanges;
|
||||
state std::vector<KeyRange> writeConflictRanges;
|
||||
|
||||
loop {
|
||||
try {
|
||||
// set the flag for empty key range testing
|
||||
tr1.setOption(FDBTransactionOptions::REPORT_CONFLICTING_KEYS);
|
||||
tr1->setOption(FDBTransactionOptions::REPORT_CONFLICTING_KEYS);
|
||||
// tr1 should never have conflicting keys, the result should always be empty
|
||||
self->emptyConflictingKeysTest(Reference<ReadYourWritesTransaction>::addRef(&tr1));
|
||||
self->emptyConflictingKeysTest(tr1);
|
||||
|
||||
tr2.setOption(FDBTransactionOptions::REPORT_CONFLICTING_KEYS);
|
||||
tr2->setOption(FDBTransactionOptions::REPORT_CONFLICTING_KEYS);
|
||||
// If READ_YOUR_WRITES_DISABLE set, it behaves like native transaction object
|
||||
// where overlapped conflict ranges are not merged.
|
||||
if (deterministicRandom()->coinflip()) tr1.setOption(FDBTransactionOptions::READ_YOUR_WRITES_DISABLE);
|
||||
if (deterministicRandom()->coinflip()) tr2.setOption(FDBTransactionOptions::READ_YOUR_WRITES_DISABLE);
|
||||
if (deterministicRandom()->coinflip()) tr1->setOption(FDBTransactionOptions::READ_YOUR_WRITES_DISABLE);
|
||||
if (deterministicRandom()->coinflip()) tr2->setOption(FDBTransactionOptions::READ_YOUR_WRITES_DISABLE);
|
||||
// We have the two tx with same grv, then commit the first
|
||||
// If the second one is not able to commit due to conflicts, verify the returned conflicting keys
|
||||
// Otherwise, there is no conflicts between tr1's writeConflictRange and tr2's readConflictRange
|
||||
Version readVersion = wait(tr1.getReadVersion());
|
||||
tr2.setVersion(readVersion);
|
||||
self->addRandomReadConflictRange(&tr1, nullptr);
|
||||
self->addRandomWriteConflictRange(&tr1, &writeConflictRanges);
|
||||
Version readVersion = wait(tr1->getReadVersion());
|
||||
tr2->setVersion(readVersion);
|
||||
self->addRandomReadConflictRange(tr1.getPtr(), nullptr);
|
||||
self->addRandomWriteConflictRange(tr1.getPtr(), &writeConflictRanges);
|
||||
++self->commits;
|
||||
wait(tr1.commit());
|
||||
wait(tr1->commit());
|
||||
++self->xacts;
|
||||
// tr1 should never have conflicting keys, test again after the commit
|
||||
self->emptyConflictingKeysTest(Reference<ReadYourWritesTransaction>::addRef(&tr1));
|
||||
self->emptyConflictingKeysTest(tr1);
|
||||
|
||||
state bool foundConflict = false;
|
||||
try {
|
||||
self->addRandomReadConflictRange(&tr2, &readConflictRanges);
|
||||
self->addRandomWriteConflictRange(&tr2, nullptr);
|
||||
self->addRandomReadConflictRange(tr2.getPtr(), &readConflictRanges);
|
||||
self->addRandomWriteConflictRange(tr2.getPtr(), nullptr);
|
||||
++self->commits;
|
||||
wait(tr2.commit());
|
||||
wait(tr2->commit());
|
||||
++self->xacts;
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_not_committed) throw e;
|
||||
|
@ -188,8 +188,11 @@ struct ReportConflictingKeysWorkload : TestWorkload {
|
|||
// The getRange here using the special key prefix "\xff\xff/transaction/conflicting_keys/" happens
|
||||
// locally Thus, the error handling is not needed here
|
||||
Future<Standalone<RangeResultRef>> conflictingKeyRangesFuture =
|
||||
tr2.getRange(ckr, CLIENT_KNOBS->TOO_MANY);
|
||||
tr2->getRange(ckr, CLIENT_KNOBS->TOO_MANY);
|
||||
ASSERT(conflictingKeyRangesFuture.isReady());
|
||||
|
||||
tr2 = Reference<ReadYourWritesTransaction>(new ReadYourWritesTransaction(cx));
|
||||
|
||||
const Standalone<RangeResultRef> conflictingKeyRanges = conflictingKeyRangesFuture.get();
|
||||
ASSERT(conflictingKeyRanges.size() &&
|
||||
(conflictingKeyRanges.size() <= readConflictRanges.size() * 2));
|
||||
|
@ -275,13 +278,13 @@ struct ReportConflictingKeysWorkload : TestWorkload {
|
|||
}
|
||||
} catch (Error& e) {
|
||||
state Error e2 = e;
|
||||
wait(tr1.onError(e2));
|
||||
wait(tr2.onError(e2));
|
||||
wait(tr1->onError(e2));
|
||||
wait(tr2->onError(e2));
|
||||
}
|
||||
readConflictRanges.clear();
|
||||
writeConflictRanges.clear();
|
||||
tr1.reset();
|
||||
tr2.reset();
|
||||
tr1->reset();
|
||||
tr2->reset();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -55,7 +55,6 @@ private:
|
|||
#ifdef __linux__
|
||||
class EventFD : public IEventFD {
|
||||
int fd;
|
||||
ASIOReactor* reactor;
|
||||
boost::asio::posix::stream_descriptor sd;
|
||||
int64_t fdVal;
|
||||
|
||||
|
@ -66,7 +65,7 @@ private:
|
|||
}
|
||||
|
||||
public:
|
||||
EventFD(ASIOReactor* reactor) : reactor(reactor), sd(reactor->ios, open()) {}
|
||||
EventFD(ASIOReactor* reactor) : sd(reactor->ios, open()) {}
|
||||
~EventFD() {
|
||||
sd.close(); // Also closes the fd, I assume...
|
||||
}
|
||||
|
|
|
@ -530,6 +530,7 @@ void releaseAllThreadMagazines() {
|
|||
FastAllocator<2048>::releaseThreadMagazines();
|
||||
FastAllocator<4096>::releaseThreadMagazines();
|
||||
FastAllocator<8192>::releaseThreadMagazines();
|
||||
FastAllocator<16384>::releaseThreadMagazines();
|
||||
}
|
||||
|
||||
int64_t getTotalUnusedAllocatedMemory() {
|
||||
|
@ -546,6 +547,7 @@ int64_t getTotalUnusedAllocatedMemory() {
|
|||
unusedMemory += FastAllocator<2048>::getApproximateMemoryUnused();
|
||||
unusedMemory += FastAllocator<4096>::getApproximateMemoryUnused();
|
||||
unusedMemory += FastAllocator<8192>::getApproximateMemoryUnused();
|
||||
unusedMemory += FastAllocator<16384>::getApproximateMemoryUnused();
|
||||
|
||||
return unusedMemory;
|
||||
}
|
||||
|
@ -561,3 +563,4 @@ template class FastAllocator<1024>;
|
|||
template class FastAllocator<2048>;
|
||||
template class FastAllocator<4096>;
|
||||
template class FastAllocator<8192>;
|
||||
template class FastAllocator<16384>;
|
||||
|
|
|
@ -216,6 +216,7 @@ public:
|
|||
if (size <= 2048) return FastAllocator<2048>::allocate();
|
||||
if (size <= 4096) return FastAllocator<4096>::allocate();
|
||||
if (size <= 8192) return FastAllocator<8192>::allocate();
|
||||
if (size <= 16384) return FastAllocator<16384>::allocate();
|
||||
return new uint8_t[size];
|
||||
}
|
||||
|
||||
|
@ -231,6 +232,7 @@ inline void freeFast(int size, void* ptr) {
|
|||
if (size <= 2048) return FastAllocator<2048>::release(ptr);
|
||||
if (size <= 4096) return FastAllocator<4096>::release(ptr);
|
||||
if (size <= 8192) return FastAllocator<8192>::release(ptr);
|
||||
if (size <= 16384) return FastAllocator<16384>::release(ptr);
|
||||
delete[](uint8_t*)ptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include <unordered_map>
|
||||
#endif
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
// Until we move to C++20, we'll need something to take the place of operator<=>.
|
||||
// This is as good a place as any, I guess.
|
||||
|
@ -140,7 +141,9 @@ public:
|
|||
|
||||
// The following functions have fixed implementations for now:
|
||||
template <class C>
|
||||
decltype((fake<const C>()[0])) randomChoice( const C& c ) { return c[randomInt(0,(int)c.size())]; }
|
||||
decltype((std::declval<const C>()[0])) randomChoice(const C& c) {
|
||||
return c[randomInt(0, (int)c.size())];
|
||||
}
|
||||
|
||||
template <class C>
|
||||
void randomShuffle( C& container ) {
|
||||
|
@ -161,13 +164,13 @@ extern FILE* randLog;
|
|||
// Sets the seed for the deterministic random number generator on the current thread
|
||||
void setThreadLocalDeterministicRandomSeed(uint32_t seed);
|
||||
|
||||
// Returns the random number generator that can be seeded. This generator should only
|
||||
// Returns the random number generator that can be seeded. This generator should only
|
||||
// be used in contexts where the choice to call it is deterministic.
|
||||
//
|
||||
// This generator is only deterministic if given a seed using setThreadLocalDeterministicRandomSeed
|
||||
Reference<IRandom> deterministicRandom();
|
||||
|
||||
// A random number generator that cannot be manually seeded and may be called in
|
||||
// A random number generator that cannot be manually seeded and may be called in
|
||||
// non-deterministic contexts.
|
||||
Reference<IRandom> nondeterministicRandom();
|
||||
|
||||
|
|
|
@ -43,11 +43,12 @@ class ThreadPool : public IThreadPool, public ReferenceCounted<ThreadPool> {
|
|||
threadUserObject = userObject;
|
||||
try {
|
||||
userObject->init();
|
||||
while (pool->ios.run_one() && !pool->mode);
|
||||
while (pool->ios.run_one() && (pool->mode == Mode::Run));
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevError, "ThreadPoolError").error(e);
|
||||
}
|
||||
delete userObject; userObject = 0;
|
||||
delete userObject;
|
||||
userObject = nullptr;
|
||||
stopped.set();
|
||||
}
|
||||
static void dispatch( PThreadAction action ) {
|
||||
|
@ -64,6 +65,7 @@ class ThreadPool : public IThreadPool, public ReferenceCounted<ThreadPool> {
|
|||
boost::asio::io_service::work dontstop;
|
||||
enum Mode { Run=0, Shutdown=2 };
|
||||
volatile int mode;
|
||||
int stackSize;
|
||||
|
||||
struct ActionWrapper {
|
||||
PThreadAction action;
|
||||
|
@ -76,7 +78,7 @@ class ThreadPool : public IThreadPool, public ReferenceCounted<ThreadPool> {
|
|||
ActionWrapper &operator=(ActionWrapper const&);
|
||||
};
|
||||
public:
|
||||
ThreadPool() : dontstop(ios), mode(Run) {}
|
||||
ThreadPool(int stackSize) : dontstop(ios), mode(Run), stackSize(stackSize) {}
|
||||
~ThreadPool() {}
|
||||
Future<Void> stop(Error const& e = success()) {
|
||||
if (mode == Shutdown) return Void();
|
||||
|
@ -95,7 +97,7 @@ public:
|
|||
virtual void delref() { if (ReferenceCounted<ThreadPool>::delref_no_destroy()) stop(); }
|
||||
void addThread( IThreadPoolReceiver* userData ) {
|
||||
threads.push_back(new Thread(this, userData));
|
||||
startThread(start, threads.back());
|
||||
startThread(start, threads.back(), stackSize);
|
||||
}
|
||||
void post( PThreadAction action ) {
|
||||
ios.post( ActionWrapper( action ) );
|
||||
|
@ -103,9 +105,9 @@ public:
|
|||
};
|
||||
|
||||
|
||||
Reference<IThreadPool> createGenericThreadPool()
|
||||
Reference<IThreadPool> createGenericThreadPool(int stackSize)
|
||||
{
|
||||
return Reference<IThreadPool>( new ThreadPool );
|
||||
return Reference<IThreadPool>( new ThreadPool(stackSize) );
|
||||
}
|
||||
|
||||
thread_local IThreadPoolReceiver* ThreadPool::Thread::threadUserObject;
|
||||
|
|
|
@ -107,7 +107,7 @@ private:
|
|||
Promise<T> promise;
|
||||
};
|
||||
|
||||
Reference<IThreadPool> createGenericThreadPool();
|
||||
Reference<IThreadPool> createGenericThreadPool(int stackSize = 0);
|
||||
|
||||
class DummyThreadPool : public IThreadPool, ReferenceCounted<DummyThreadPool> {
|
||||
public:
|
||||
|
|
|
@ -86,7 +86,12 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) {
|
|||
init( TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT, 9.0 );
|
||||
init( TLS_CLIENT_CONNECTION_THROTTLE_TIMEOUT, 11.0 );
|
||||
init( TLS_SERVER_CONNECTION_THROTTLE_ATTEMPTS, 1 );
|
||||
init( TLS_CLIENT_CONNECTION_THROTTLE_ATTEMPTS, 0 );
|
||||
init( TLS_CLIENT_CONNECTION_THROTTLE_ATTEMPTS, 1 );
|
||||
init( TLS_CLIENT_HANDSHAKE_THREADS, 0 );
|
||||
init( TLS_SERVER_HANDSHAKE_THREADS, 1000 );
|
||||
init( TLS_HANDSHAKE_THREAD_STACKSIZE, 64 * 1024 );
|
||||
init( TLS_MALLOC_ARENA_MAX, 6 );
|
||||
init( TLS_HANDSHAKE_LIMIT, 1000 );
|
||||
|
||||
init( NETWORK_TEST_CLIENT_COUNT, 30 );
|
||||
init( NETWORK_TEST_REPLY_SIZE, 600e3 );
|
||||
|
@ -140,13 +145,11 @@ void FlowKnobs::initialize(bool randomize, bool isSimulated) {
|
|||
init( PACKET_LIMIT, 100LL<<20 );
|
||||
init( PACKET_WARNING, 2LL<<20 ); // 2MB packet warning quietly allows for 1MB system messages
|
||||
init( TIME_OFFSET_LOGGING_INTERVAL, 60.0 );
|
||||
init( MAX_PACKET_SEND_BYTES, 256 * 1024 );
|
||||
init( MAX_PACKET_SEND_BYTES, 128 * 1024 );
|
||||
init( MIN_PACKET_BUFFER_BYTES, 4 * 1024 );
|
||||
init( MIN_PACKET_BUFFER_FREE_BYTES, 256 );
|
||||
init( FLOW_TCP_NODELAY, 1 );
|
||||
init( FLOW_TCP_QUICKACK, 0 );
|
||||
init( UNRESTRICTED_HANDSHAKE_LIMIT, 15 );
|
||||
init( BOUNDED_HANDSHAKE_LIMIT, 400 );
|
||||
|
||||
//Sim2
|
||||
init( MIN_OPEN_TIME, 0.0002 );
|
||||
|
|
|
@ -101,6 +101,11 @@ public:
|
|||
double TLS_CLIENT_CONNECTION_THROTTLE_TIMEOUT;
|
||||
int TLS_SERVER_CONNECTION_THROTTLE_ATTEMPTS;
|
||||
int TLS_CLIENT_CONNECTION_THROTTLE_ATTEMPTS;
|
||||
int TLS_CLIENT_HANDSHAKE_THREADS;
|
||||
int TLS_SERVER_HANDSHAKE_THREADS;
|
||||
int TLS_HANDSHAKE_THREAD_STACKSIZE;
|
||||
int TLS_MALLOC_ARENA_MAX;
|
||||
int TLS_HANDSHAKE_LIMIT;
|
||||
|
||||
int NETWORK_TEST_CLIENT_COUNT;
|
||||
int NETWORK_TEST_REPLY_SIZE;
|
||||
|
@ -163,8 +168,6 @@ public:
|
|||
int MIN_PACKET_BUFFER_FREE_BYTES;
|
||||
int FLOW_TCP_NODELAY;
|
||||
int FLOW_TCP_QUICKACK;
|
||||
int UNRESTRICTED_HANDSHAKE_LIMIT;
|
||||
int BOUNDED_HANDSHAKE_LIMIT;
|
||||
|
||||
//Sim2
|
||||
//FIMXE: more parameters could be factored out
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
#include "flow/ProtocolVersion.h"
|
||||
#include "flow/TLSConfig.actor.h"
|
||||
#include "flow/genericactors.actor.h"
|
||||
#include "flow/Util.h"
|
||||
|
||||
// See the comment in TLSConfig.actor.h for the explanation of why this module breaking include was done.
|
||||
#include "fdbrpc/IAsyncFile.h"
|
||||
|
@ -118,7 +119,7 @@ class Net2 sealed : public INetwork, public INetworkConnections {
|
|||
|
||||
public:
|
||||
Net2(const TLSConfig& tlsConfig, bool useThreadPool, bool useMetrics);
|
||||
void initTLS();
|
||||
void initTLS(ETLSInitState targetState);
|
||||
void run();
|
||||
void initMetrics();
|
||||
|
||||
|
@ -168,15 +169,19 @@ public:
|
|||
virtual bool checkRunnable() override;
|
||||
|
||||
bool useThreadPool;
|
||||
|
||||
//private:
|
||||
|
||||
ASIOReactor reactor;
|
||||
#ifndef TLS_DISABLED
|
||||
AsyncVar<Reference<ReferencedObject<boost::asio::ssl::context>>> sslContextVar;
|
||||
Reference<IThreadPool> sslHandshakerPool;
|
||||
int sslHandshakerThreadsStarted;
|
||||
int sslPoolHandshakesInProgress;
|
||||
#endif
|
||||
TLSConfig tlsConfig;
|
||||
Future<Void> backgroundCertRefresh;
|
||||
bool tlsInitialized;
|
||||
ETLSInitState tlsInitializedState;
|
||||
|
||||
INetworkConnections *network; // initially this, but can be changed
|
||||
|
||||
|
@ -528,6 +533,47 @@ private:
|
|||
#ifndef TLS_DISABLED
|
||||
typedef boost::asio::ssl::stream<boost::asio::ip::tcp::socket&> ssl_socket;
|
||||
|
||||
struct SSLHandshakerThread : IThreadPoolReceiver {
|
||||
SSLHandshakerThread() {}
|
||||
virtual void init() {}
|
||||
|
||||
struct Handshake : TypedAction<SSLHandshakerThread,Handshake> {
|
||||
Handshake(ssl_socket &socket, ssl_socket::handshake_type type) : socket(socket), type(type) {
|
||||
}
|
||||
virtual double getTimeEstimate() { return 0.001; }
|
||||
|
||||
ThreadReturnPromise<Void> done;
|
||||
ssl_socket &socket;
|
||||
ssl_socket::handshake_type type;
|
||||
boost::system::error_code err;
|
||||
};
|
||||
|
||||
void action( Handshake &h) {
|
||||
try {
|
||||
h.socket.next_layer().non_blocking(false, h.err);
|
||||
if(!h.err.failed()) {
|
||||
h.socket.handshake(h.type, h.err);
|
||||
}
|
||||
if(!h.err.failed()) {
|
||||
h.socket.next_layer().non_blocking(true, h.err);
|
||||
}
|
||||
if(h.err.failed()) {
|
||||
TraceEvent(SevWarn, h.type == ssl_socket::handshake_type::client ? "N2_ConnectHandshakeError" : "N2_AcceptHandshakeError")
|
||||
.detail("ErrorCode", h.err.value())
|
||||
.detail("ErrorMsg", h.err.message().c_str())
|
||||
.detail("BackgroundThread", true);
|
||||
h.done.sendError(connection_failed());
|
||||
} else {
|
||||
h.done.send(Void());
|
||||
}
|
||||
} catch(...) {
|
||||
TraceEvent(SevWarn, h.type == ssl_socket::handshake_type::client ? "N2_ConnectHandshakeUnknownError" : "N2_AcceptHandshakeUnknownError")
|
||||
.detail("BackgroundThread", true);
|
||||
h.done.sendError(connection_failed());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class SSLConnection : public IConnection, ReferenceCounted<SSLConnection> {
|
||||
public:
|
||||
virtual void addref() { ReferenceCounted<SSLConnection>::addref(); }
|
||||
|
@ -584,8 +630,11 @@ public:
|
|||
}
|
||||
|
||||
ACTOR static void doAcceptHandshake( Reference<SSLConnection> self, Promise<Void> connected) {
|
||||
state std::pair<IPAddress,uint16_t> peerIP;
|
||||
state Hold<int> holder;
|
||||
|
||||
try {
|
||||
state std::pair<IPAddress,uint16_t> peerIP = std::make_pair(self->getPeerAddress().ip, static_cast<uint16_t>(0));
|
||||
peerIP = std::make_pair(self->getPeerAddress().ip, static_cast<uint16_t>(0));
|
||||
auto iter(g_network->networkInfo.serverTLSConnectionThrottler.find(peerIP));
|
||||
if(iter != g_network->networkInfo.serverTLSConnectionThrottler.end()) {
|
||||
if (now() < iter->second.second) {
|
||||
|
@ -601,12 +650,24 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
int64_t permitNumber = wait(g_network->networkInfo.handshakeLock->take());
|
||||
state BoundedFlowLock::Releaser releaser(g_network->networkInfo.handshakeLock, permitNumber);
|
||||
wait(g_network->networkInfo.handshakeLock->take());
|
||||
state FlowLock::Releaser releaser(*g_network->networkInfo.handshakeLock);
|
||||
|
||||
BindPromise p("N2_AcceptHandshakeError", UID());
|
||||
auto onHandshook = p.getFuture();
|
||||
self->getSSLSocket().async_handshake( boost::asio::ssl::stream_base::server, std::move(p) );
|
||||
Future<Void> onHandshook;
|
||||
|
||||
// If the background handshakers are not all busy, use one
|
||||
if(N2::g_net2->sslPoolHandshakesInProgress < N2::g_net2->sslHandshakerThreadsStarted) {
|
||||
holder = Hold(&N2::g_net2->sslPoolHandshakesInProgress);
|
||||
auto handshake = new SSLHandshakerThread::Handshake(self->ssl_sock, boost::asio::ssl::stream_base::server);
|
||||
onHandshook = handshake->done.getFuture();
|
||||
N2::g_net2->sslHandshakerPool->post(handshake);
|
||||
}
|
||||
else {
|
||||
// Otherwise use flow network thread
|
||||
BindPromise p("N2_AcceptHandshakeError", UID());
|
||||
onHandshook = p.getFuture();
|
||||
self->ssl_sock.async_handshake( boost::asio::ssl::stream_base::server, std::move(p) );
|
||||
}
|
||||
wait( onHandshook );
|
||||
wait(delay(0, TaskPriority::Handshake));
|
||||
connected.send(Void());
|
||||
|
@ -640,13 +701,26 @@ public:
|
|||
}
|
||||
|
||||
ACTOR static void doConnectHandshake( Reference<SSLConnection> self, Promise<Void> connected) {
|
||||
try {
|
||||
int64_t permitNumber = wait(g_network->networkInfo.handshakeLock->take());
|
||||
state BoundedFlowLock::Releaser releaser(g_network->networkInfo.handshakeLock, permitNumber);
|
||||
state Hold<int> holder;
|
||||
|
||||
BindPromise p("N2_ConnectHandshakeError", self->id);
|
||||
Future<Void> onHandshook = p.getFuture();
|
||||
self->ssl_sock.async_handshake( boost::asio::ssl::stream_base::client, std::move(p) );
|
||||
try {
|
||||
wait(g_network->networkInfo.handshakeLock->take());
|
||||
state FlowLock::Releaser releaser(*g_network->networkInfo.handshakeLock);
|
||||
|
||||
Future<Void> onHandshook;
|
||||
// If the background handshakers are not all busy, use one
|
||||
if(N2::g_net2->sslPoolHandshakesInProgress < N2::g_net2->sslHandshakerThreadsStarted) {
|
||||
holder = Hold(&N2::g_net2->sslPoolHandshakesInProgress);
|
||||
auto handshake = new SSLHandshakerThread::Handshake(self->ssl_sock, boost::asio::ssl::stream_base::client);
|
||||
onHandshook = handshake->done.getFuture();
|
||||
N2::g_net2->sslHandshakerPool->post(handshake);
|
||||
}
|
||||
else {
|
||||
// Otherwise use flow network thread
|
||||
BindPromise p("N2_ConnectHandshakeError", self->id);
|
||||
onHandshook = p.getFuture();
|
||||
self->ssl_sock.async_handshake( boost::asio::ssl::stream_base::client, std::move(p) );
|
||||
}
|
||||
wait( onHandshook );
|
||||
wait(delay(0, TaskPriority::Handshake));
|
||||
connected.send(Void());
|
||||
|
@ -861,11 +935,12 @@ Net2::Net2(const TLSConfig& tlsConfig, bool useThreadPool, bool useMetrics)
|
|||
tscBegin(0), tscEnd(0), taskBegin(0), currentTaskID(TaskPriority::DefaultYield),
|
||||
numYields(0),
|
||||
lastPriorityStats(nullptr),
|
||||
tlsInitialized(false),
|
||||
tlsInitializedState(ETLSInitState::NONE),
|
||||
tlsConfig(tlsConfig),
|
||||
started(false)
|
||||
#ifndef TLS_DISABLED
|
||||
,sslContextVar({ReferencedObject<boost::asio::ssl::context>::from(boost::asio::ssl::context(boost::asio::ssl::context::tls))})
|
||||
,sslContextVar({ReferencedObject<boost::asio::ssl::context>::from(boost::asio::ssl::context(boost::asio::ssl::context::tls))}),
|
||||
sslPoolHandshakesInProgress(0), sslHandshakerThreadsStarted(0)
|
||||
#endif
|
||||
|
||||
{
|
||||
|
@ -962,29 +1037,66 @@ ACTOR static Future<Void> reloadCertificatesOnChange( TLSConfig config, std::fun
|
|||
}
|
||||
#endif
|
||||
|
||||
void Net2::initTLS() {
|
||||
if(tlsInitialized) {
|
||||
void Net2::initTLS(ETLSInitState targetState) {
|
||||
if(tlsInitializedState >= targetState) {
|
||||
return;
|
||||
}
|
||||
#ifndef TLS_DISABLED
|
||||
auto onPolicyFailure = [this]() { this->countTLSPolicyFailures++; };
|
||||
try {
|
||||
boost::asio::ssl::context newContext(boost::asio::ssl::context::tls);
|
||||
const LoadedTLSConfig& loaded = tlsConfig.loadSync();
|
||||
TraceEvent("Net2TLSConfig")
|
||||
.detail("CAPath", tlsConfig.getCAPathSync())
|
||||
.detail("CertificatePath", tlsConfig.getCertificatePathSync())
|
||||
.detail("KeyPath", tlsConfig.getKeyPathSync())
|
||||
.detail("HasPassword", !loaded.getPassword().empty())
|
||||
.detail("VerifyPeers", boost::algorithm::join(loaded.getVerifyPeers(), "|"));
|
||||
ConfigureSSLContext( tlsConfig.loadSync(), &newContext, onPolicyFailure );
|
||||
sslContextVar.set(ReferencedObject<boost::asio::ssl::context>::from(std::move(newContext)));
|
||||
} catch (Error& e) {
|
||||
TraceEvent("Net2TLSInitError").error(e);
|
||||
// Any target state must be higher than NONE so if the current state is NONE
|
||||
// then initialize the TLS config
|
||||
if(tlsInitializedState == ETLSInitState::NONE) {
|
||||
auto onPolicyFailure = [this]() { this->countTLSPolicyFailures++; };
|
||||
try {
|
||||
boost::asio::ssl::context newContext(boost::asio::ssl::context::tls);
|
||||
const LoadedTLSConfig& loaded = tlsConfig.loadSync();
|
||||
TraceEvent("Net2TLSConfig")
|
||||
.detail("CAPath", tlsConfig.getCAPathSync())
|
||||
.detail("CertificatePath", tlsConfig.getCertificatePathSync())
|
||||
.detail("KeyPath", tlsConfig.getKeyPathSync())
|
||||
.detail("HasPassword", !loaded.getPassword().empty())
|
||||
.detail("VerifyPeers", boost::algorithm::join(loaded.getVerifyPeers(), "|"));
|
||||
ConfigureSSLContext( tlsConfig.loadSync(), &newContext, onPolicyFailure );
|
||||
sslContextVar.set(ReferencedObject<boost::asio::ssl::context>::from(std::move(newContext)));
|
||||
} catch (Error& e) {
|
||||
TraceEvent("Net2TLSInitError").error(e);
|
||||
}
|
||||
backgroundCertRefresh = reloadCertificatesOnChange( tlsConfig, onPolicyFailure, &sslContextVar );
|
||||
}
|
||||
|
||||
// If a TLS connection is actually going to be used then start background threads if configured
|
||||
if(targetState > ETLSInitState::CONFIG) {
|
||||
int threadsToStart;
|
||||
switch(targetState) {
|
||||
case ETLSInitState::CONNECT:
|
||||
threadsToStart = FLOW_KNOBS->TLS_CLIENT_HANDSHAKE_THREADS;
|
||||
break;
|
||||
case ETLSInitState::LISTEN:
|
||||
threadsToStart = FLOW_KNOBS->TLS_SERVER_HANDSHAKE_THREADS;
|
||||
break;
|
||||
default:
|
||||
threadsToStart = 0;
|
||||
};
|
||||
threadsToStart -= sslHandshakerThreadsStarted;
|
||||
|
||||
if(threadsToStart > 0) {
|
||||
if(sslHandshakerThreadsStarted == 0) {
|
||||
#if defined(__linux__)
|
||||
if(mallopt(M_ARENA_MAX, FLOW_KNOBS->TLS_MALLOC_ARENA_MAX) != 1) {
|
||||
TraceEvent(SevWarn, "TLSMallocSetMaxArenasFailure").detail("MaxArenas", FLOW_KNOBS->TLS_MALLOC_ARENA_MAX);
|
||||
};
|
||||
#endif
|
||||
sslHandshakerPool = createGenericThreadPool(FLOW_KNOBS->TLS_HANDSHAKE_THREAD_STACKSIZE);
|
||||
}
|
||||
|
||||
for(int i = 0; i < threadsToStart; ++i) {
|
||||
++sslHandshakerThreadsStarted;
|
||||
sslHandshakerPool->addThread(new SSLHandshakerThread());
|
||||
}
|
||||
}
|
||||
}
|
||||
backgroundCertRefresh = reloadCertificatesOnChange( tlsConfig, onPolicyFailure, &sslContextVar );
|
||||
#endif
|
||||
tlsInitialized = true;
|
||||
|
||||
tlsInitializedState = targetState;
|
||||
}
|
||||
|
||||
ACTOR Future<Void> Net2::logTimeOffset() {
|
||||
|
@ -1382,7 +1494,7 @@ THREAD_HANDLE Net2::startThread( THREAD_FUNC_RETURN (*func) (void*), void *arg )
|
|||
|
||||
Future< Reference<IConnection> > Net2::connect( NetworkAddress toAddr, std::string host ) {
|
||||
#ifndef TLS_DISABLED
|
||||
initTLS();
|
||||
initTLS(ETLSInitState::CONNECT);
|
||||
if ( toAddr.isTLS() ) {
|
||||
return SSLConnection::connect(&this->reactor.ios, this->sslContextVar.get(), toAddr);
|
||||
}
|
||||
|
@ -1462,7 +1574,7 @@ bool Net2::isAddressOnThisHost(NetworkAddress const& addr) const {
|
|||
Reference<IListener> Net2::listen( NetworkAddress localAddr ) {
|
||||
try {
|
||||
#ifndef TLS_DISABLED
|
||||
initTLS();
|
||||
initTLS(ETLSInitState::LISTEN);
|
||||
if ( localAddr.isTLS() ) {
|
||||
return Reference<IListener>(new SSLListener( reactor.ios, &this->sslContextVar, localAddr ));
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ void PacketWriter::serializeBytesAcrossBoundary(const void* data, int bytes) {
|
|||
if (!bytes) break;
|
||||
|
||||
data = (uint8_t*)data + b;
|
||||
nextBuffer();
|
||||
nextBuffer(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -69,6 +69,8 @@ void PacketWriter::nextBuffer(size_t size) {
|
|||
}
|
||||
}
|
||||
|
||||
// Adds exactly bytes of unwritten length to the buffer, possibly across packet buffer boundaries,
|
||||
// and initializes buf to point to the packet buffer(s) that contain the unwritten space
|
||||
void PacketWriter::writeAhead( int bytes, struct SplitBuffer* buf ) {
|
||||
if (bytes <= buffer->bytes_unwritten()) {
|
||||
buf->begin = buffer->data() + buffer->bytes_written;
|
||||
|
@ -79,9 +81,10 @@ void PacketWriter::writeAhead( int bytes, struct SplitBuffer* buf ) {
|
|||
buf->begin = buffer->data() + buffer->bytes_written;
|
||||
buf->first_length = buffer->bytes_unwritten();
|
||||
buffer->bytes_written = buffer->size();
|
||||
nextBuffer();
|
||||
size_t remaining = bytes - buf->first_length;
|
||||
nextBuffer(remaining);
|
||||
buf->next = buffer->data();
|
||||
buffer->bytes_written = bytes - buf->first_length;
|
||||
buffer->bytes_written = remaining;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -172,7 +175,7 @@ PacketBuffer* ReliablePacketList::compact(PacketBuffer* into, PacketBuffer* end)
|
|||
if (c->buffer == end /*&& c->begin>=c->buffer->bytes_written*/) // quit when we hit the unsent range
|
||||
return into;
|
||||
if (into->bytes_written == into->size()) {
|
||||
into->next = PacketBuffer::create();
|
||||
into->next = PacketBuffer::create(into->size());
|
||||
into = into->nextPacketBuffer();
|
||||
}
|
||||
|
||||
|
|
|
@ -44,10 +44,10 @@ public:
|
|||
~UnsentPacketQueue() { discardAll(); }
|
||||
|
||||
// Get a PacketBuffer to write new packets into
|
||||
PacketBuffer* getWriteBuffer() {
|
||||
PacketBuffer* getWriteBuffer(size_t sizeHint = 0) {
|
||||
if (!unsent_last) {
|
||||
ASSERT(!unsent_first);
|
||||
unsent_first = unsent_last = PacketBuffer::create();
|
||||
unsent_first = unsent_last = PacketBuffer::create(sizeHint);
|
||||
};
|
||||
return unsent_last;
|
||||
}
|
||||
|
|
|
@ -68,6 +68,7 @@ struct SaveContext {
|
|||
|
||||
template <class ReaderImpl>
|
||||
class _ObjectReader {
|
||||
protected:
|
||||
ProtocolVersion mProtocolVersion;
|
||||
public:
|
||||
|
||||
|
@ -79,8 +80,19 @@ public:
|
|||
const uint8_t* data = static_cast<ReaderImpl*>(this)->data();
|
||||
LoadContext<ReaderImpl> context(static_cast<ReaderImpl*>(this));
|
||||
if(read_file_identifier(data) != file_identifier) {
|
||||
TraceEvent(SevError, "MismatchedFileIdentifier").detail("Expected", file_identifier).detail("Read", read_file_identifier(data));
|
||||
ASSERT(false);
|
||||
// Some file identifiers are changed in 7.0, so file identifier mismatches
|
||||
// are expected during a downgrade from 7.0 to 6.3
|
||||
bool expectMismatch = mProtocolVersion >= ProtocolVersion(0x0FDB00B070000000LL);
|
||||
{
|
||||
TraceEvent te(expectMismatch ? SevInfo : SevError, "MismatchedFileIdentifier");
|
||||
if (expectMismatch) {
|
||||
te.suppressFor(1.0);
|
||||
}
|
||||
te.detail("Expected", file_identifier).detail("Read", read_file_identifier(data));
|
||||
}
|
||||
if (!expectMismatch) {
|
||||
ASSERT(false);
|
||||
}
|
||||
}
|
||||
load_members(data, context, items...);
|
||||
}
|
||||
|
|
|
@ -2566,13 +2566,29 @@ void setCloseOnExec( int fd ) {
|
|||
} // namespace platform
|
||||
|
||||
#ifdef _WIN32
|
||||
THREAD_HANDLE startThread(void (*func) (void *), void *arg) {
|
||||
return (void *)_beginthread(func, 0, arg);
|
||||
THREAD_HANDLE startThread(void (*func) (void *), void *arg, int stackSize) {
|
||||
return (void *)_beginthread(func, stackSize, arg);
|
||||
}
|
||||
#elif (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__))
|
||||
THREAD_HANDLE startThread(void *(*func) (void *), void *arg) {
|
||||
THREAD_HANDLE startThread(void *(*func) (void *), void *arg, int stackSize) {
|
||||
pthread_t t;
|
||||
pthread_create(&t, NULL, func, arg);
|
||||
pthread_attr_t attr;
|
||||
|
||||
pthread_attr_init(&attr);
|
||||
if(stackSize != 0) {
|
||||
if(pthread_attr_setstacksize(&attr, stackSize) != 0) {
|
||||
// If setting the stack size fails the default stack size will be used, so failure to set
|
||||
// the stack size is treated as a warning.
|
||||
// Logging a trace event here is a bit risky because startThread() could be used early
|
||||
// enough that TraceEvent can't be used yet, though currently it is not used with a nonzero
|
||||
// stack size that early in execution.
|
||||
TraceEvent(SevWarnAlways, "StartThreadInvalidStackSize").detail("StackSize", stackSize);
|
||||
};
|
||||
}
|
||||
|
||||
pthread_create(&t, &attr, func, arg);
|
||||
pthread_attr_destroy(&attr);
|
||||
|
||||
return t;
|
||||
}
|
||||
#else
|
||||
|
|
|
@ -135,9 +135,6 @@ do { \
|
|||
#include <functional>
|
||||
#endif
|
||||
|
||||
// fake<T>() is for use in decltype expressions only - there is no implementation
|
||||
template <class T> T fake();
|
||||
|
||||
// g++ requires that non-dependent names have to be looked up at
|
||||
// template definition, which makes circular dependencies a royal
|
||||
// pain. (For whatever it's worth, g++ appears to be adhering to spec
|
||||
|
@ -156,13 +153,13 @@ inline static T& makeDependent(T& value) { return value; }
|
|||
#define THREAD_FUNC static void __cdecl
|
||||
#define THREAD_FUNC_RETURN void
|
||||
#define THREAD_HANDLE void *
|
||||
THREAD_HANDLE startThread(void (func) (void *), void *arg);
|
||||
THREAD_HANDLE startThread(void (func) (void *), void *arg, int stackSize = 0);
|
||||
#define THREAD_RETURN return
|
||||
#elif defined(__unixish__)
|
||||
#define THREAD_FUNC static void *
|
||||
#define THREAD_FUNC_RETURN void *
|
||||
#define THREAD_HANDLE pthread_t
|
||||
THREAD_HANDLE startThread(void *(func) (void *), void *arg);
|
||||
THREAD_HANDLE startThread(void *(func) (void *), void *arg, int stackSize = 0);
|
||||
#define THREAD_RETURN return NULL
|
||||
#else
|
||||
#error How do I start a new thread on this platform?
|
||||
|
|
|
@ -141,3 +141,6 @@ constexpr ProtocolVersion currentProtocolVersion(0x0FDB00B070010001LL);
|
|||
// This assert is intended to help prevent incrementing the leftmost digits accidentally. It will probably need to
|
||||
// change when we reach version 10.
|
||||
static_assert(currentProtocolVersion.version() < 0x0FDB00B100000000LL, "Unexpected protocol version");
|
||||
|
||||
// Downgrades are only supported for one minor version
|
||||
constexpr ProtocolVersion minInvalidProtocolVersion(0x0FDB00B071000000LL);
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
#elif !defined(FLOW_THREADHELPER_ACTOR_H)
|
||||
#define FLOW_THREADHELPER_ACTOR_H
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "flow/flow.h"
|
||||
#include "flow/actorcompiler.h" // This must be the last #include.
|
||||
|
||||
|
@ -163,12 +165,12 @@ public:
|
|||
Error error;
|
||||
ThreadCallback *callback;
|
||||
|
||||
bool isReady() {
|
||||
bool isReady() {
|
||||
ThreadSpinLockHolder holder(mutex);
|
||||
return isReadyUnsafe();
|
||||
}
|
||||
|
||||
bool isError() {
|
||||
bool isError() {
|
||||
ThreadSpinLockHolder holder(mutex);
|
||||
return isErrorUnsafe();
|
||||
}
|
||||
|
@ -180,7 +182,7 @@ public:
|
|||
return error.code();
|
||||
}
|
||||
|
||||
bool canBeSet() {
|
||||
bool canBeSet() {
|
||||
ThreadSpinLockHolder holder(mutex);
|
||||
return canBeSetUnsafe();
|
||||
}
|
||||
|
@ -203,8 +205,8 @@ public:
|
|||
}
|
||||
|
||||
ThreadSingleAssignmentVarBase() : status(Unset), callback(NULL), valueReferenceCount(0) {} //, referenceCount(1) {}
|
||||
~ThreadSingleAssignmentVarBase() {
|
||||
this->mutex.assertNotEntered();
|
||||
~ThreadSingleAssignmentVarBase() {
|
||||
this->mutex.assertNotEntered();
|
||||
|
||||
if(callback)
|
||||
callback->destroy();
|
||||
|
@ -229,7 +231,7 @@ public:
|
|||
ASSERT(false); // Promise fulfilled twice
|
||||
}
|
||||
error = err;
|
||||
status = ErrorSet;
|
||||
status = ErrorSet;
|
||||
if (!callback) {
|
||||
this->mutex.leave();
|
||||
return;
|
||||
|
@ -573,14 +575,16 @@ ACTOR template <class F> void doOnMainThreadVoid( Future<Void> signal, F f, Erro
|
|||
}
|
||||
}
|
||||
|
||||
template <class F> ThreadFuture< decltype(fake<F>()().getValue()) > onMainThread( F f ) {
|
||||
template <class F>
|
||||
ThreadFuture<decltype(std::declval<F>()().getValue())> onMainThread(F f) {
|
||||
Promise<Void> signal;
|
||||
auto returnValue = new ThreadSingleAssignmentVar< decltype(fake<F>()().getValue()) >();
|
||||
auto returnValue = new ThreadSingleAssignmentVar<decltype(std::declval<F>()().getValue())>();
|
||||
returnValue->addref(); // For the ThreadFuture we return
|
||||
Future<Void> cancelFuture = doOnMainThread<decltype(fake<F>()().getValue()), F>( signal.getFuture(), f, returnValue );
|
||||
Future<Void> cancelFuture =
|
||||
doOnMainThread<decltype(std::declval<F>()().getValue()), F>(signal.getFuture(), f, returnValue);
|
||||
returnValue->setCancel( std::move(cancelFuture) );
|
||||
g_network->onMainThread( std::move(signal), TaskPriority::DefaultOnMainThread );
|
||||
return ThreadFuture<decltype(fake<F>()().getValue())>( returnValue );
|
||||
return ThreadFuture<decltype(std::declval<F>()().getValue())>(returnValue);
|
||||
}
|
||||
|
||||
template <class V>
|
||||
|
|
|
@ -101,7 +101,7 @@ void treeBenchmark(T& tree, F generateKey) {
|
|||
keys.resize(std::unique(keys.begin(), keys.end()) - keys.begin());
|
||||
|
||||
auto iter = tree.lower_bound(*keys.begin());
|
||||
timedRun("scan", keys, [&tree, &iter](key const& k) {
|
||||
timedRun("scan", keys, [&iter](key const& k) {
|
||||
ASSERT(k == *iter);
|
||||
++iter;
|
||||
});
|
||||
|
@ -124,4 +124,4 @@ static inline int randomInt() {
|
|||
return deterministicRandom()->randomInt(0, INT32_MAX);
|
||||
}
|
||||
|
||||
#endif // FLOW_TREEBENCHMARK_H
|
||||
#endif // FLOW_TREEBENCHMARK_H
|
||||
|
|
43
flow/Util.h
43
flow/Util.h
|
@ -34,4 +34,47 @@ void swapAndPop(C* container, int index) {
|
|||
container->pop_back();
|
||||
}
|
||||
|
||||
// Adds n to pCount upon construction, subtracts in upon destruction
|
||||
template<typename T>
|
||||
struct Hold {
|
||||
Hold(T *pCount = nullptr, T n = 1) : pCount(pCount), n(n) {
|
||||
if(pCount != nullptr) {
|
||||
*pCount += n;
|
||||
}
|
||||
}
|
||||
~Hold() {
|
||||
if(pCount != nullptr) {
|
||||
*pCount -= n;
|
||||
}
|
||||
}
|
||||
|
||||
Hold(Hold &&other) {
|
||||
pCount = other.pCount;
|
||||
other.pCount = nullptr;
|
||||
n = other.n;
|
||||
}
|
||||
|
||||
Hold & operator=(Hold &&other) {
|
||||
if(pCount != nullptr) {
|
||||
*pCount -= n;
|
||||
}
|
||||
pCount = other.pCount;
|
||||
other.pCount = nullptr;
|
||||
n = other.n;
|
||||
return *this;
|
||||
};
|
||||
|
||||
void release() {
|
||||
if(pCount != nullptr) {
|
||||
*pCount -= n;
|
||||
pCount = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
T *pCount;
|
||||
T n;
|
||||
|
||||
void operator=(const Hold &other) = delete;
|
||||
};
|
||||
|
||||
#endif // _FLOW_UTIL_H_
|
||||
|
|
|
@ -713,7 +713,7 @@ namespace actorcompiler
|
|||
}
|
||||
|
||||
var iter = getIteratorName(cx);
|
||||
state.Add(new StateVar { SourceLine = stmt.FirstSourceLine, name = iter, type = "decltype(std::begin(fake<" + container.type + ">()))", initializer = null });
|
||||
state.Add(new StateVar { SourceLine = stmt.FirstSourceLine, name = iter, type = "decltype(std::begin(std::declval<" + container.type + ">()))", initializer = null });
|
||||
var equivalent = new ForStatement {
|
||||
initExpression = iter + " = std::begin(" + stmt.rangeExpression + ")",
|
||||
condExpression = iter + " != std::end(" + stmt.rangeExpression + ")",
|
||||
|
|
12
flow/flow.h
12
flow/flow.h
|
@ -878,20 +878,20 @@ private:
|
|||
};
|
||||
|
||||
template <class Request>
|
||||
decltype(fake<Request>().reply) const& getReplyPromise(Request const& r) { return r.reply; }
|
||||
|
||||
|
||||
decltype(std::declval<Request>().reply) const& getReplyPromise(Request const& r) {
|
||||
return r.reply;
|
||||
}
|
||||
|
||||
// Neither of these implementations of REPLY_TYPE() works on both MSVC and g++, so...
|
||||
#ifdef __GNUG__
|
||||
#define REPLY_TYPE(RequestType) decltype( getReplyPromise( fake<RequestType>() ).getFuture().getValue() )
|
||||
//#define REPLY_TYPE(RequestType) decltype( getReplyFuture( fake<RequestType>() ).getValue() )
|
||||
#define REPLY_TYPE(RequestType) decltype(getReplyPromise(std::declval<RequestType>()).getFuture().getValue())
|
||||
//#define REPLY_TYPE(RequestType) decltype( getReplyFuture( std::declval<RequestType>() ).getValue() )
|
||||
#else
|
||||
template <class T>
|
||||
struct ReplyType {
|
||||
// Doing this calculation directly in the return value declaration for PromiseStream<T>::getReply()
|
||||
// breaks IntelliSense in VS2010; this is a workaround.
|
||||
typedef decltype(fake<T>().reply.getFuture().getValue()) Type;
|
||||
typedef decltype(std::declval<T>().reply.getFuture().getValue()) Type;
|
||||
};
|
||||
template <class T> class ReplyPromise;
|
||||
template <class T>
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#define GENERICACTORS_ACTOR_H
|
||||
|
||||
#include <list>
|
||||
#include <utility>
|
||||
|
||||
#include "flow/flow.h"
|
||||
#include "flow/Knobs.h"
|
||||
|
@ -299,9 +300,8 @@ Future<Void> storeOrThrow(T &out, Future<Optional<T>> what, Error e = key_not_fo
|
|||
}
|
||||
|
||||
//Waits for a future to be ready, and then applies an asynchronous function to it.
|
||||
ACTOR template<class T, class F, class U = decltype( fake<F>()(fake<T>()).getValue() )>
|
||||
Future<U> mapAsync(Future<T> what, F actorFunc)
|
||||
{
|
||||
ACTOR template <class T, class F, class U = decltype(std::declval<F>()(std::declval<T>()).getValue())>
|
||||
Future<U> mapAsync(Future<T> what, F actorFunc) {
|
||||
T val = wait(what);
|
||||
U ret = wait(actorFunc(val));
|
||||
return ret;
|
||||
|
@ -318,8 +318,8 @@ std::vector<Future<std::invoke_result_t<F, T>>> mapAsync(std::vector<Future<T>>
|
|||
}
|
||||
|
||||
//maps a stream with an asynchronous function
|
||||
ACTOR template<class T, class F, class U = decltype( fake<F>()(fake<T>()).getValue() )>
|
||||
Future<Void> mapAsync( FutureStream<T> input, F actorFunc, PromiseStream<U> output ) {
|
||||
ACTOR template <class T, class F, class U = decltype(std::declval<F>()(std::declval<T>()).getValue())>
|
||||
Future<Void> mapAsync(FutureStream<T> input, F actorFunc, PromiseStream<U> output) {
|
||||
state Deque<Future<U>> futures;
|
||||
|
||||
loop {
|
||||
|
@ -861,7 +861,7 @@ Future<T> ioTimeoutError( Future<T> what, double time ) {
|
|||
Future<Void> end = lowPriorityDelay( time );
|
||||
choose {
|
||||
when( T t = wait( what ) ) { return t; }
|
||||
when( wait( end ) ) {
|
||||
when(wait(end)) {
|
||||
Error err = io_timeout();
|
||||
if(g_network->isSimulated()) {
|
||||
err = err.asInjectedFault();
|
||||
|
@ -1364,8 +1364,7 @@ struct NotifiedInt {
|
|||
NotifiedInt( int64_t val = 0 ) : val(val) {}
|
||||
|
||||
Future<Void> whenAtLeast( int64_t limit ) {
|
||||
if (val >= limit)
|
||||
return Void();
|
||||
if (val >= limit) return Void();
|
||||
Promise<Void> p;
|
||||
waiting.push( std::make_pair(limit,p) );
|
||||
return p.getFuture();
|
||||
|
|
|
@ -213,4 +213,4 @@ TEST_CASE("/flow/network/ipaddress") {
|
|||
return Void();
|
||||
}
|
||||
|
||||
NetworkInfo::NetworkInfo() : handshakeLock( new BoundedFlowLock(FLOW_KNOBS->UNRESTRICTED_HANDSHAKE_LIMIT, FLOW_KNOBS->BOUNDED_HANDSHAKE_LIMIT) ) {}
|
||||
NetworkInfo::NetworkInfo() : handshakeLock( new FlowLock(FLOW_KNOBS->TLS_HANDSHAKE_LIMIT) ) {}
|
||||
|
|
|
@ -353,7 +353,7 @@ struct NetworkMetrics {
|
|||
}
|
||||
};
|
||||
|
||||
struct BoundedFlowLock;
|
||||
struct FlowLock;
|
||||
|
||||
struct NetworkInfo {
|
||||
NetworkMetrics metrics;
|
||||
|
@ -362,7 +362,7 @@ struct NetworkInfo {
|
|||
double lastAlternativesFailureSkipDelay = 0;
|
||||
|
||||
std::map<std::pair<IPAddress, uint16_t>, std::pair<int,double>> serverTLSConnectionThrottler;
|
||||
BoundedFlowLock* handshakeLock;
|
||||
FlowLock *handshakeLock;
|
||||
|
||||
NetworkInfo();
|
||||
};
|
||||
|
@ -510,8 +510,9 @@ public:
|
|||
virtual void initMetrics() {}
|
||||
// Metrics must be initialized after FlowTransport::createInstance has been called
|
||||
|
||||
virtual void initTLS() {}
|
||||
// TLS must be initialized before using the network
|
||||
enum ETLSInitState { NONE = 0, CONFIG = 1, CONNECT = 2, LISTEN = 3};
|
||||
virtual void initTLS(ETLSInitState targetState = CONFIG) {}
|
||||
|
||||
virtual const TLSConfig& getTLSConfig() const = 0;
|
||||
// Return the TLS Configuration
|
||||
|
|
186
flow/serialize.h
186
flow/serialize.h
|
@ -78,7 +78,7 @@ inline typename Archive::WRITER& operator << (Archive& ar, const Item& item ) {
|
|||
|
||||
template <class Archive, class Item>
|
||||
inline typename Archive::READER& operator >> (Archive& ar, Item& item ) {
|
||||
load(ar, item);
|
||||
ar.deserialize(item);
|
||||
return ar;
|
||||
}
|
||||
|
||||
|
@ -276,10 +276,8 @@ struct _IncludeVersion {
|
|||
TraceEvent(SevWarnAlways, "InvalidSerializationVersion").error(err).detailf("Version", "%llx", v.versionWithFlags());
|
||||
throw err;
|
||||
}
|
||||
if (v > currentProtocolVersion) {
|
||||
// For now, no forward compatibility whatsoever is supported. In the future, this check may be weakened for
|
||||
// particular data structures (e.g. to support mismatches between client and server versions when the client
|
||||
// must deserialize zookeeper and database structures)
|
||||
if (v >= minInvalidProtocolVersion) {
|
||||
// Downgrades are only supported for one minor version
|
||||
auto err = incompatible_protocol_version();
|
||||
TraceEvent(SevError, "FutureProtocolVersion").error(err).detailf("Version", "%llx", v.versionWithFlags());
|
||||
throw err;
|
||||
|
@ -510,51 +508,28 @@ private:
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
class ArenaReader {
|
||||
template<class Impl>
|
||||
class _Reader {
|
||||
public:
|
||||
static const int isDeserializing = 1;
|
||||
static constexpr bool isSerializing = false;
|
||||
typedef ArenaReader READER;
|
||||
using READER = Impl;
|
||||
|
||||
const void* readBytes( int bytes ) {
|
||||
const char* b = begin;
|
||||
const char* e = b + bytes;
|
||||
ASSERT( e <= end );
|
||||
begin = e;
|
||||
return b;
|
||||
}
|
||||
|
||||
const void* peekBytes(int bytes) const {
|
||||
ASSERT( begin + bytes <= end );
|
||||
const void *peekBytes(int bytes) const {
|
||||
ASSERT(begin + bytes <= end);
|
||||
return begin;
|
||||
}
|
||||
|
||||
void serializeBytes(void* data, int bytes) {
|
||||
memcpy(data, readBytes(bytes), bytes);
|
||||
}
|
||||
|
||||
const uint8_t* arenaRead( int bytes ) {
|
||||
return (const uint8_t*)readBytes(bytes);
|
||||
}
|
||||
|
||||
StringRef arenaReadAll() const {
|
||||
return StringRef(reinterpret_cast<const uint8_t*>(begin), end - begin);
|
||||
void serializeBytes(void *data, int bytes) {
|
||||
memcpy(data, static_cast<Impl*>(this)->readBytes(bytes), bytes);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void serializeBinaryItem( T& t ) {
|
||||
t = *(T*)readBytes(sizeof(T));
|
||||
t = *(T*)(static_cast<Impl*>(this)->readBytes(sizeof(T)));
|
||||
}
|
||||
|
||||
template <class VersionOptions>
|
||||
ArenaReader( Arena const& arena, const StringRef& input, VersionOptions vo ) : m_pool(arena), check(NULL) {
|
||||
begin = (const char*)input.begin();
|
||||
end = begin + input.size();
|
||||
vo.read(*this);
|
||||
}
|
||||
|
||||
Arena& arena() { return m_pool; }
|
||||
Arena &arena() { return m_pool; }
|
||||
|
||||
ProtocolVersion protocolVersion() const { return m_protocolVersion; }
|
||||
void setProtocolVersion(ProtocolVersion pv) { m_protocolVersion = pv; }
|
||||
|
@ -566,63 +541,85 @@ public:
|
|||
}
|
||||
|
||||
void rewind() {
|
||||
ASSERT(check != NULL);
|
||||
ASSERT(check != nullptr);
|
||||
begin = check;
|
||||
check = NULL;
|
||||
check = nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
const char *begin, *end, *check;
|
||||
protected:
|
||||
_Reader(const char* begin, const char* end) : begin(begin), end(end) {}
|
||||
_Reader(const char* begin, const char* end, const Arena& arena) : begin(begin), end(end), m_pool(arena) {}
|
||||
|
||||
const char *begin, *end;
|
||||
const char* check = nullptr;
|
||||
Arena m_pool;
|
||||
ProtocolVersion m_protocolVersion;
|
||||
};
|
||||
|
||||
class BinaryReader {
|
||||
public:
|
||||
static const int isDeserializing = 1;
|
||||
static constexpr bool isSerializing = false;
|
||||
typedef BinaryReader READER;
|
||||
class ArenaReader : public _Reader<ArenaReader> {
|
||||
Optional<ArenaObjectReader> arenaObjectReader;
|
||||
|
||||
const void* readBytes( int bytes );
|
||||
public:
|
||||
const void* readBytes( int bytes ) {
|
||||
const char* b = begin;
|
||||
const char* e = b + bytes;
|
||||
ASSERT( e <= end );
|
||||
begin = e;
|
||||
return b;
|
||||
}
|
||||
|
||||
const uint8_t* arenaRead( int bytes ) {
|
||||
return (const uint8_t*)readBytes(bytes);
|
||||
}
|
||||
|
||||
const void* peekBytes(int bytes) const {
|
||||
ASSERT( begin + bytes <= end );
|
||||
return begin;
|
||||
}
|
||||
|
||||
void serializeBytes(void* data, int bytes) {
|
||||
memcpy(data, readBytes(bytes), bytes);
|
||||
StringRef arenaReadAll() const {
|
||||
return StringRef(reinterpret_cast<const uint8_t*>(begin), end - begin);
|
||||
}
|
||||
|
||||
|
||||
template <class VersionOptions>
|
||||
ArenaReader(Arena const& arena, const StringRef& input, VersionOptions vo)
|
||||
: _Reader(reinterpret_cast<const char*>(input.begin()), reinterpret_cast<const char*>(input.end()), arena) {
|
||||
vo.read(*this);
|
||||
if (m_protocolVersion.hasObjectSerializerFlag()) {
|
||||
arenaObjectReader = ArenaObjectReader(arena, input, vo);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void serializeBinaryItem( T& t ) {
|
||||
t = *(T*)readBytes(sizeof(T));
|
||||
void deserialize(T& t) {
|
||||
if constexpr (HasFileIdentifier<T>::value) {
|
||||
if (arenaObjectReader.present()) {
|
||||
arenaObjectReader.get().deserialize(t);
|
||||
} else {
|
||||
load(*this, t);
|
||||
}
|
||||
} else {
|
||||
load(*this, t);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class BinaryReader : public _Reader<BinaryReader> {
|
||||
Optional<ObjectReader> objectReader;
|
||||
|
||||
public:
|
||||
const void* readBytes( int bytes );
|
||||
|
||||
const uint8_t* arenaRead( int bytes ) {
|
||||
// Reads and returns the next bytes.
|
||||
// The returned pointer has the lifetime of this.arena()
|
||||
// Could be implemented zero-copy if [begin,end) was in this.arena() already; for now is a copy
|
||||
if (!bytes) return NULL;
|
||||
if (!bytes) return nullptr;
|
||||
uint8_t* dat = new (arena()) uint8_t[ bytes ];
|
||||
serializeBytes( dat, bytes );
|
||||
return dat;
|
||||
}
|
||||
|
||||
template <class VersionOptions>
|
||||
BinaryReader( const void* data, int length, VersionOptions vo ) {
|
||||
begin = (const char*)data;
|
||||
end = begin + length;
|
||||
check = nullptr;
|
||||
vo.read(*this);
|
||||
}
|
||||
template <class VersionOptions>
|
||||
BinaryReader( const StringRef& s, VersionOptions vo ) { begin = (const char*)s.begin(); end = begin + s.size(); vo.read(*this); }
|
||||
template <class VersionOptions>
|
||||
BinaryReader( const std::string& v, VersionOptions vo ) { begin = v.c_str(); end = begin + v.size(); vo.read(*this); }
|
||||
|
||||
Arena& arena() { return m_pool; }
|
||||
|
||||
template <class T, class VersionOptions>
|
||||
static T fromStringRef( StringRef sr, VersionOptions vo ) {
|
||||
T t;
|
||||
|
@ -638,21 +635,42 @@ public:
|
|||
|
||||
bool empty() const { return begin == end; }
|
||||
|
||||
void checkpoint() {
|
||||
check = begin;
|
||||
template <class VersionOptions>
|
||||
BinaryReader(const void* data, int length, VersionOptions vo)
|
||||
: _Reader(reinterpret_cast<const char*>(data), reinterpret_cast<const char*>(data) + length) {
|
||||
readVersion(vo);
|
||||
}
|
||||
template <class VersionOptions>
|
||||
BinaryReader(const StringRef& s, VersionOptions vo)
|
||||
: _Reader(reinterpret_cast<const char*>(s.begin()), reinterpret_cast<const char*>(s.end())) {
|
||||
readVersion(vo);
|
||||
}
|
||||
template <class VersionOptions>
|
||||
BinaryReader(const std::string& s, VersionOptions vo) : _Reader(s.c_str(), s.c_str() + s.size()) {
|
||||
readVersion(vo);
|
||||
}
|
||||
|
||||
void rewind() {
|
||||
ASSERT(check != nullptr);
|
||||
begin = check;
|
||||
check = nullptr;
|
||||
template<class T>
|
||||
void deserialize(T &t) {
|
||||
if constexpr (HasFileIdentifier<T>::value) {
|
||||
if (objectReader.present()) {
|
||||
objectReader.get().deserialize(t);
|
||||
} else {
|
||||
load(*this, t);
|
||||
}
|
||||
} else {
|
||||
load(*this, t);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
const char *begin, *end, *check;
|
||||
Arena m_pool;
|
||||
ProtocolVersion m_protocolVersion;
|
||||
template <class VersionOptions>
|
||||
void readVersion(VersionOptions vo) {
|
||||
vo.read(*this);
|
||||
if (m_protocolVersion.hasObjectSerializerFlag()) {
|
||||
objectReader = ObjectReader(reinterpret_cast<const uint8_t*>(begin), AssumeVersion(m_protocolVersion));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class SendBuffer {
|
||||
|
@ -664,12 +682,16 @@ public:
|
|||
inline uint8_t* data() { return _data; }
|
||||
SendBuffer* next;
|
||||
int bytes_written, bytes_sent;
|
||||
int bytes_unsent() const {
|
||||
return bytes_written - bytes_sent;
|
||||
}
|
||||
};
|
||||
|
||||
struct PacketBuffer : SendBuffer {
|
||||
private:
|
||||
int reference_count;
|
||||
uint32_t size_;
|
||||
static constexpr size_t PACKET_BUFFER_MIN_SIZE = 16384;
|
||||
static constexpr size_t PACKET_BUFFER_OVERHEAD = 32;
|
||||
|
||||
public:
|
||||
|
@ -685,9 +707,9 @@ private:
|
|||
|
||||
public:
|
||||
static PacketBuffer* create(size_t size = 0) {
|
||||
size = std::max(size, 4096 - PACKET_BUFFER_OVERHEAD);
|
||||
if (size == 4096 - PACKET_BUFFER_OVERHEAD) {
|
||||
return new (FastAllocator<4096>::allocate()) PacketBuffer{ size };
|
||||
size = std::max(size, PACKET_BUFFER_MIN_SIZE - PACKET_BUFFER_OVERHEAD);
|
||||
if (size == PACKET_BUFFER_MIN_SIZE - PACKET_BUFFER_OVERHEAD) {
|
||||
return new (FastAllocator<PACKET_BUFFER_MIN_SIZE>::allocate()) PacketBuffer{ size };
|
||||
}
|
||||
uint8_t* mem = new uint8_t[size + PACKET_BUFFER_OVERHEAD];
|
||||
return new (mem) PacketBuffer{ size };
|
||||
|
@ -696,8 +718,8 @@ public:
|
|||
void addref() { ++reference_count; }
|
||||
void delref() {
|
||||
if (!--reference_count) {
|
||||
if (size_ == 4096 - PACKET_BUFFER_OVERHEAD) {
|
||||
FastAllocator<4096>::release(this);
|
||||
if (size_ == PACKET_BUFFER_MIN_SIZE - PACKET_BUFFER_OVERHEAD) {
|
||||
FastAllocator<PACKET_BUFFER_MIN_SIZE>::release(this);
|
||||
} else {
|
||||
delete[] this;
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
|
||||
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
|
||||
<Product Name='$(var.Title)'
|
||||
Id='{C2791390-0993-4F6B-9708-ED2A4558A013}'
|
||||
Id='{409BDCD0-ECF7-4CCA-A3F9-EEEAF0C79A42}'
|
||||
UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
|
||||
Version='$(var.Version)'
|
||||
Manufacturer='$(var.Manufacturer)'
|
||||
|
|
|
@ -154,6 +154,7 @@ if(WITH_PYTHON)
|
|||
add_fdb_test(TEST_FILES rare/ConflictRangeRYOWCheck.toml)
|
||||
add_fdb_test(TEST_FILES rare/CycleRollbackClogged.toml)
|
||||
add_fdb_test(TEST_FILES rare/CycleWithKills.toml)
|
||||
add_fdb_test(TEST_FILES rare/Downgrade.toml)
|
||||
add_fdb_test(TEST_FILES rare/FuzzTest.toml)
|
||||
add_fdb_test(TEST_FILES rare/InventoryTestHeavyWrites.toml)
|
||||
add_fdb_test(TEST_FILES rare/LargeApiCorrectness.toml)
|
||||
|
@ -192,6 +193,9 @@ if(WITH_PYTHON)
|
|||
add_fdb_test(
|
||||
TEST_FILES restarting/from_5.2.0/ClientTransactionProfilingCorrectness-1.txt
|
||||
restarting/from_5.2.0/ClientTransactionProfilingCorrectness-2.txt)
|
||||
add_fdb_test(
|
||||
TEST_FILES restarting/CycleTestRestart-1.txt
|
||||
restarting/CycleTestRestart-2.txt)
|
||||
add_fdb_test(TEST_FILES slow/ApiCorrectness.toml)
|
||||
add_fdb_test(TEST_FILES slow/ApiCorrectnessAtomicRestore.toml)
|
||||
add_fdb_test(TEST_FILES slow/ApiCorrectnessSwitchover.toml)
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
[[test]]
|
||||
testTitle = 'Downgrade'
|
||||
|
||||
[[test.workload]]
|
||||
testName = 'Downgrade'
|
||||
oldKey = 'oldKey'
|
||||
newKey = 'newKey'
|
|
@ -0,0 +1,30 @@
|
|||
testTitle=Clogged
|
||||
clearAfterTest=false
|
||||
testName=Cycle
|
||||
transactionsPerSecond=500.0
|
||||
nodeCount=2500
|
||||
testDuration=10.0
|
||||
expectedRate=0
|
||||
|
||||
testName=RandomClogging
|
||||
testDuration=10.0
|
||||
|
||||
testName=Rollback
|
||||
meanDelay=10.0
|
||||
testDuration=10.0
|
||||
|
||||
testName=Attrition
|
||||
machinesToKill=10
|
||||
machinesToLeave=3
|
||||
reboot=true
|
||||
testDuration=10.0
|
||||
|
||||
testName=Attrition
|
||||
machinesToKill=10
|
||||
machinesToLeave=3
|
||||
reboot=true
|
||||
testDuration=10.0
|
||||
|
||||
testName=SaveAndKill
|
||||
restartInfoLocation=simfdb/restartInfo.ini
|
||||
testDuration=10.0
|
|
@ -0,0 +1,26 @@
|
|||
testTitle=Clogged
|
||||
runSetup=false
|
||||
testName=Cycle
|
||||
transactionsPerSecond=2500.0
|
||||
nodeCount=2500
|
||||
testDuration=10.0
|
||||
expectedRate=0
|
||||
|
||||
testName=RandomClogging
|
||||
testDuration=10.0
|
||||
|
||||
testName=Rollback
|
||||
meanDelay=10.0
|
||||
testDuration=10.0
|
||||
|
||||
testName=Attrition
|
||||
machinesToKill=10
|
||||
machinesToLeave=3
|
||||
reboot=true
|
||||
testDuration=10.0
|
||||
|
||||
testName=Attrition
|
||||
machinesToKill=10
|
||||
machinesToLeave=3
|
||||
reboot=true
|
||||
testDuration=10.0
|
|
@ -0,0 +1,7 @@
|
|||
<?xml version="1.0"?>
|
||||
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<Version>6.2.25</Version>
|
||||
<PackageName>6.2</PackageName>
|
||||
</PropertyGroup>
|
||||
</Project>
|
Loading…
Reference in New Issue