Merge remote-tracking branch 'origin/main' into authz-security-tests
This commit is contained in:
commit
66e3050062
2
.flake8
2
.flake8
|
@ -1,5 +1,5 @@
|
|||
[flake8]
|
||||
ignore = E203, E266, E501, W503, F403, F401, E711
|
||||
ignore = E203, E266, E501, W503, F403, F401, E711, C901, W605
|
||||
max-line-length = 79
|
||||
max-complexity = 18
|
||||
select = B,C,E,F,W,T4,B9
|
|
@ -80,7 +80,7 @@ class Result:
|
|||
if len(t1) != len(t2):
|
||||
return False
|
||||
|
||||
return all([Result.elements_equal(x,y) for x,y in zip(t1, t2)])
|
||||
return all([Result.elements_equal(x, y) for x, y in zip(t1, t2)])
|
||||
|
||||
def matches_key(self, rhs, specification):
|
||||
if not isinstance(rhs, Result):
|
||||
|
|
|
@ -464,7 +464,7 @@ def parse_args(argv):
|
|||
# SOMEDAY: this applies only to the scripted test. Should we invoke test files specifically (as in circus),
|
||||
# or invoke them here and allow tests to add arguments?
|
||||
parser.add_argument('--no-threads', action='store_true', help='Disables the START_THREAD instruction in the scripted test.')
|
||||
|
||||
|
||||
parser.add_argument('--no-directory-snapshot-ops', action='store_true', help='Disables snapshot operations for directory instructions.')
|
||||
|
||||
parser.add_argument('--no-tenants', action='store_true', help='Disables tenant operations.')
|
||||
|
|
|
@ -577,7 +577,7 @@ class ApiTest(Test):
|
|||
key1, key2 = key2, key1
|
||||
|
||||
# TODO: randomize chunkSize but should not exceed 100M(shard limit)
|
||||
chunkSize = 10000000 # 10M
|
||||
chunkSize = 10000000 # 10M
|
||||
instructions.push_args(key1, key2, chunkSize)
|
||||
instructions.append(op)
|
||||
self.add_strings(1)
|
||||
|
|
|
@ -114,7 +114,7 @@ class DirectoryTest(Test):
|
|||
instructions.push_args(layer)
|
||||
instructions.push_args(*test_util.with_length(path))
|
||||
instructions.append('DIRECTORY_OPEN')
|
||||
self.dir_list.append(self.root.add_child(path, DirectoryStateTreeNode(True, True, has_known_prefix=False, is_partition=(layer==b'partition'))))
|
||||
self.dir_list.append(self.root.add_child(path, DirectoryStateTreeNode(True, True, has_known_prefix=False, is_partition=(layer == b'partition'))))
|
||||
# print('%d. Selected %s, dir=%s, dir_id=%s, has_known_prefix=%s, dir_list_len=%d' \
|
||||
# % (len(instructions), 'DIRECTORY_OPEN', repr(self.dir_index), self.dir_list[-1].dir_id, False, len(self.dir_list)-1))
|
||||
|
||||
|
@ -163,8 +163,8 @@ class DirectoryTest(Test):
|
|||
|
||||
elif root_op == 'DIRECTORY_CREATE_LAYER':
|
||||
indices = []
|
||||
|
||||
prefixes = [generate_prefix(require_unique=args.concurrency==1, is_partition=True) for i in range(2)]
|
||||
|
||||
prefixes = [generate_prefix(require_unique=args.concurrency == 1, is_partition=True) for i in range(2)]
|
||||
for i in range(2):
|
||||
instructions.push_args(prefixes[i])
|
||||
instructions.push_args(*test_util.with_length(generate_path()))
|
||||
|
@ -184,9 +184,9 @@ class DirectoryTest(Test):
|
|||
test_util.blocking_commit(instructions)
|
||||
|
||||
path = generate_path()
|
||||
# Partitions that use the high-contention allocator can result in non-determinism if they fail to commit,
|
||||
# Partitions that use the high-contention allocator can result in non-determinism if they fail to commit,
|
||||
# so we disallow them in comparison tests
|
||||
op_args = test_util.with_length(path) + (self.generate_layer(allow_partition=args.concurrency>1),)
|
||||
op_args = test_util.with_length(path) + (self.generate_layer(allow_partition=args.concurrency > 1),)
|
||||
directory_util.push_instruction_and_record_prefix(instructions, op, op_args, path, len(self.dir_list), self.random, self.prefix_log)
|
||||
|
||||
if not op.endswith('_DATABASE') and args.concurrency == 1:
|
||||
|
@ -196,14 +196,14 @@ class DirectoryTest(Test):
|
|||
if child_entry is None:
|
||||
child_entry = DirectoryStateTreeNode(True, True)
|
||||
|
||||
child_entry.state.has_known_prefix = False
|
||||
child_entry.state.has_known_prefix = False
|
||||
self.dir_list.append(dir_entry.add_child(path, child_entry))
|
||||
|
||||
elif root_op == 'DIRECTORY_CREATE':
|
||||
layer = self.generate_layer()
|
||||
is_partition = layer == b'partition'
|
||||
|
||||
prefix = generate_prefix(require_unique=is_partition and args.concurrency==1, is_partition=is_partition, min_length=0)
|
||||
prefix = generate_prefix(require_unique=is_partition and args.concurrency == 1, is_partition=is_partition, min_length=0)
|
||||
|
||||
# Because allocated prefixes are non-deterministic, we cannot have overlapping
|
||||
# transactions that allocate/remove these prefixes in a comparison test
|
||||
|
@ -409,7 +409,7 @@ def generate_prefix(require_unique=False, is_partition=False, min_length=1):
|
|||
if require_unique:
|
||||
min_length = max(min_length, 16)
|
||||
|
||||
length = random.randint(min_length, min_length+5)
|
||||
length = random.randint(min_length, min_length + 5)
|
||||
if length == 0:
|
||||
return b''
|
||||
|
||||
|
@ -419,6 +419,6 @@ def generate_prefix(require_unique=False, is_partition=False, min_length=1):
|
|||
else:
|
||||
return bytes([random.randrange(ord('\x02'), ord('\x14')) for i in range(0, length)])
|
||||
else:
|
||||
prefix = fixed_prefix
|
||||
prefix = fixed_prefix
|
||||
generated = prefix[0:random.randrange(min_length, len(prefix))]
|
||||
return generated
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import sys
|
||||
|
||||
|
||||
class TreeNodeState:
|
||||
def __init__(self, node, dir_id, is_directory, is_subspace, has_known_prefix, root, is_partition):
|
||||
self.dir_id = dir_id
|
||||
|
@ -9,10 +10,11 @@ class TreeNodeState:
|
|||
self.root = root
|
||||
self.is_partition = is_partition
|
||||
|
||||
self.parents = { node }
|
||||
self.parents = {node}
|
||||
self.children = {}
|
||||
self.deleted = False
|
||||
|
||||
|
||||
# Represents an element of the directory hierarchy. As a result of various operations (e.g. moves) that
|
||||
# may or may not have succeeded, a node can represent multiple possible states.
|
||||
class DirectoryStateTreeNode:
|
||||
|
@ -25,7 +27,7 @@ class DirectoryStateTreeNode:
|
|||
default_directory = None
|
||||
|
||||
# Used for debugging
|
||||
dir_id = 0
|
||||
dir_id = 0
|
||||
|
||||
@classmethod
|
||||
def reset(cls):
|
||||
|
@ -62,7 +64,7 @@ class DirectoryStateTreeNode:
|
|||
if default is not None:
|
||||
default_child = default.state.children.get(subpath[0])
|
||||
|
||||
self_child = self.state.children.get(subpath[0])
|
||||
self_child = self.state.children.get(subpath[0])
|
||||
|
||||
if self_child is None:
|
||||
if default_child is None:
|
||||
|
@ -143,13 +145,15 @@ class DirectoryStateTreeNode:
|
|||
child = self.get_descendent(path)
|
||||
if child:
|
||||
child._delete_impl()
|
||||
|
||||
|
||||
|
||||
def validate_dir(dir, root):
|
||||
if dir.state.is_directory:
|
||||
assert dir.state.root == root
|
||||
else:
|
||||
assert dir.state.root == dir
|
||||
|
||||
|
||||
def run_test():
|
||||
all_entries = []
|
||||
|
||||
|
@ -249,11 +253,11 @@ def run_test():
|
|||
# Test moving an entry
|
||||
assert not entry.state.has_known_prefix
|
||||
assert not entry.state.is_subspace
|
||||
assert list(entry.state.children.keys()) == ['1']
|
||||
assert list(entry.state.children.keys()) == ['1']
|
||||
|
||||
for e in all_entries:
|
||||
validate_dir(e, root)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(run_test())
|
||||
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
# limitations under the License.
|
||||
#
|
||||
|
||||
import random
|
||||
import struct
|
||||
|
||||
import fdb
|
||||
|
@ -35,6 +34,7 @@ DEFAULT_DIRECTORY_INDEX = 4
|
|||
DEFAULT_DIRECTORY_PREFIX = b'default'
|
||||
DIRECTORY_ERROR_STRING = b'DIRECTORY_ERROR'
|
||||
|
||||
|
||||
def setup_directories(instructions, default_path, random):
|
||||
# Clients start with the default directory layer in the directory list
|
||||
DirectoryStateTreeNode.reset()
|
||||
|
|
|
@ -107,7 +107,7 @@ class RandomGenerator(object):
|
|||
user_version = random.randint(0, 0xffff)
|
||||
tup.append(fdb.tuple.Versionstamp(tr_version, user_version))
|
||||
else:
|
||||
assert false
|
||||
assert False
|
||||
|
||||
return tuple(tup)
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ from bindingtester.tests import test_util
|
|||
|
||||
fdb.api_version(FDB_API_VERSION)
|
||||
|
||||
|
||||
class TupleTest(Test):
|
||||
def __init__(self, subspace):
|
||||
super(TupleTest, self).__init__(subspace)
|
||||
|
@ -44,14 +45,14 @@ class TupleTest(Test):
|
|||
def generate(self, args, thread_number):
|
||||
instructions = InstructionSet()
|
||||
|
||||
min_value = -2**self.max_int_bits+1
|
||||
max_value = 2**self.max_int_bits-1
|
||||
min_value = -2**self.max_int_bits + 1
|
||||
max_value = 2**self.max_int_bits - 1
|
||||
|
||||
instructions.append('NEW_TRANSACTION')
|
||||
|
||||
# Test integer encoding
|
||||
mutations = 0
|
||||
for i in range(0, self.max_int_bits+1):
|
||||
for i in range(0, self.max_int_bits + 1):
|
||||
for sign in [-1, 1]:
|
||||
sign_str = '' if sign == 1 else '-'
|
||||
for offset in range(-10, 11):
|
||||
|
|
|
@ -285,7 +285,7 @@ if(NOT WIN32)
|
|||
--api-tester-bin $<TARGET_FILE:fdb_c_api_tester>
|
||||
--external-client-library ${CMAKE_CURRENT_BINARY_DIR}/libfdb_c_external.so
|
||||
--test-file ${test_file}
|
||||
--knob delete-native-lib-after-loading=false
|
||||
--retain-client-lib-copies
|
||||
)
|
||||
set_tests_properties("${test_name}" PROPERTIES TIMEOUT 300)
|
||||
endforeach()
|
||||
|
@ -442,7 +442,7 @@ elseif(NOT WIN32 AND NOT APPLE AND NOT USE_SANITIZER) # Linux Only, non-santizer
|
|||
DEPENDS ${IMPLIBSO_SRC} fdb_c
|
||||
COMMENT "Generating source code for C shim library")
|
||||
|
||||
add_library(fdb_c_shim STATIC ${SHIM_LIB_GEN_SRC} foundationdb/fdb_c_shim.h fdb_c_shim.cpp)
|
||||
add_library(fdb_c_shim SHARED ${SHIM_LIB_GEN_SRC} foundationdb/fdb_c_shim.h fdb_c_shim.cpp)
|
||||
target_link_options(fdb_c_shim PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/fdb_c.map,-z,nodelete,-z,noexecstack")
|
||||
target_link_libraries(fdb_c_shim PUBLIC dl)
|
||||
target_include_directories(fdb_c_shim PUBLIC
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#include "fdbclient/FDBTypes.h"
|
||||
#include "flow/ProtocolVersion.h"
|
||||
#include <cstdint>
|
||||
#define FDB_API_VERSION 720
|
||||
#define FDB_API_VERSION 730
|
||||
#define FDB_INCLUDE_LEGACY_TYPES
|
||||
|
||||
#include "fdbclient/MultiVersionTransaction.h"
|
||||
|
@ -905,6 +905,10 @@ extern "C" DLLEXPORT fdb_error_t fdb_transaction_get_committed_version(FDBTransa
|
|||
CATCH_AND_RETURN(*out_version = TXN(tr)->getCommittedVersion(););
|
||||
}
|
||||
|
||||
extern "C" DLLEXPORT FDBFuture* fdb_transaction_get_total_cost(FDBTransaction* tr) {
|
||||
return (FDBFuture*)TXN(tr)->getTotalCost().extractPtr();
|
||||
}
|
||||
|
||||
extern "C" DLLEXPORT FDBFuture* fdb_transaction_get_approximate_size(FDBTransaction* tr) {
|
||||
return (FDBFuture*)TXN(tr)->getApproximateSize().extractPtr();
|
||||
}
|
||||
|
|
|
@ -27,10 +27,10 @@
|
|||
#endif
|
||||
|
||||
#if !defined(FDB_API_VERSION)
|
||||
#error You must #define FDB_API_VERSION prior to including fdb_c.h (current version is 720)
|
||||
#error You must #define FDB_API_VERSION prior to including fdb_c.h (current version is 730)
|
||||
#elif FDB_API_VERSION < 13
|
||||
#error API version no longer supported (upgrade to 13)
|
||||
#elif FDB_API_VERSION > 720
|
||||
#elif FDB_API_VERSION > 730
|
||||
#error Requested API version requires a newer version of this header
|
||||
#endif
|
||||
|
||||
|
@ -514,12 +514,14 @@ DLLEXPORT WARN_UNUSED_RESULT fdb_error_t fdb_transaction_get_committed_version(F
|
|||
int64_t* out_version);
|
||||
|
||||
/*
|
||||
* This function intentionally returns an FDBFuture instead of an integer
|
||||
* directly, so that calling this API can see the effect of previous
|
||||
* These functions intentionally return an FDBFuture instead of an integer
|
||||
* directly, so that calling the API can see the effect of previous
|
||||
* mutations on the transaction. Specifically, mutations are applied
|
||||
* asynchronously by the main thread. In order to see them, this call has to
|
||||
* be serviced by the main thread too.
|
||||
*/
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_total_cost(FDBTransaction* tr);
|
||||
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_approximate_size(FDBTransaction* tr);
|
||||
|
||||
DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_versionstamp(FDBTransaction* tr);
|
||||
|
|
|
@ -31,8 +31,8 @@ func_re = re.compile(
|
|||
"^\s*FDB_API_(?:CHANGED|REMOVED)\s*\(\s*([^,]*),\s*([^)]*)\).*")
|
||||
|
||||
with open(source, 'r') as srcfile:
|
||||
for l in srcfile:
|
||||
m = func_re.match(l)
|
||||
for line in srcfile:
|
||||
m = func_re.match(line)
|
||||
if m:
|
||||
func, ver = m.groups()
|
||||
if func not in functions:
|
||||
|
@ -59,7 +59,7 @@ def write_windows_asm(asmfile, functions):
|
|||
|
||||
|
||||
def write_unix_asm(asmfile, functions, prefix):
|
||||
if cpu != "aarch64" and cpu!= "ppc64le":
|
||||
if cpu != "aarch64" and cpu != "ppc64le":
|
||||
asmfile.write(".intel_syntax noprefix\n")
|
||||
|
||||
i = 0
|
||||
|
@ -132,7 +132,7 @@ def write_unix_asm(asmfile, functions, prefix):
|
|||
asmfile.write("\tstd 31, -8(1)\n")
|
||||
asmfile.write("\tstd 0,16(1)\n")
|
||||
asmfile.write("\tstdu 1,-192(1)\n")
|
||||
#asmfile.write("\tstd 2,24(1)\n")
|
||||
# asmfile.write("\tstd 2,24(1)\n")
|
||||
asmfile.write("\taddis 11,2,.LC%d@toc@ha\n" % (i))
|
||||
asmfile.write("\tld 11,.LC%d@toc@l(11)\n" % (i))
|
||||
asmfile.write("\tld 12,0(11)\n")
|
||||
|
|
|
@ -20,11 +20,14 @@
|
|||
#include "TesterApiWorkload.h"
|
||||
#include "TesterBlobGranuleUtil.h"
|
||||
#include "TesterUtil.h"
|
||||
#include <unordered_set>
|
||||
#include <memory>
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace FdbApiTester {
|
||||
|
||||
#define BG_API_DEBUG_VERBOSE false
|
||||
|
||||
class ApiBlobGranuleCorrectnessWorkload : public ApiWorkload {
|
||||
public:
|
||||
ApiBlobGranuleCorrectnessWorkload(const WorkloadConfig& config) : ApiWorkload(config) {
|
||||
|
@ -35,7 +38,7 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
// FIXME: use other new blob granule apis!
|
||||
// FIXME: add tenant support for DB operations
|
||||
enum OpType {
|
||||
OP_INSERT,
|
||||
OP_CLEAR,
|
||||
|
@ -51,7 +54,27 @@ private:
|
|||
|
||||
// Allow reads at the start to get blob_granule_transaction_too_old if BG data isn't initialized yet
|
||||
// FIXME: should still guarantee a read succeeds eventually somehow
|
||||
bool seenReadSuccess = false;
|
||||
// FIXME: this needs to be per tenant if tenant ids are set
|
||||
std::unordered_set<std::optional<int>> tenantsWithReadSuccess;
|
||||
|
||||
inline void setReadSuccess(std::optional<int> tenantId) { tenantsWithReadSuccess.insert(tenantId); }
|
||||
|
||||
inline bool seenReadSuccess(std::optional<int> tenantId) { return tenantsWithReadSuccess.count(tenantId); }
|
||||
|
||||
std::string tenantDebugString(std::optional<int> tenantId) {
|
||||
return tenantId.has_value() ? fmt::format(" (tenant {0})", tenantId.value()) : "";
|
||||
}
|
||||
|
||||
void debugOp(std::string opName, fdb::Key begin, fdb::Key end, std::optional<int> tenantId, std::string message) {
|
||||
if (BG_API_DEBUG_VERBOSE) {
|
||||
info(fmt::format("{0}: [{1} - {2}){3}: {4}",
|
||||
opName,
|
||||
fdb::toCharsRef(begin),
|
||||
fdb::toCharsRef(end),
|
||||
tenantDebugString(tenantId),
|
||||
message));
|
||||
}
|
||||
}
|
||||
|
||||
void randomReadOp(TTaskFct cont, std::optional<int> tenantId) {
|
||||
fdb::Key begin = randomKeyName();
|
||||
|
@ -63,8 +86,10 @@ private:
|
|||
auto results = std::make_shared<std::vector<fdb::KeyValue>>();
|
||||
auto tooOld = std::make_shared<bool>(false);
|
||||
|
||||
debugOp("Read", begin, end, tenantId, "starting");
|
||||
|
||||
execTransaction(
|
||||
[this, begin, end, results, tooOld](auto ctx) {
|
||||
[this, begin, end, tenantId, results, tooOld](auto ctx) {
|
||||
ctx->tx().setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE);
|
||||
TesterGranuleContext testerContext(ctx->getBGBasePath());
|
||||
fdb::native::FDBReadBlobGranuleContext granuleContext = createGranuleContext(&testerContext);
|
||||
|
@ -74,8 +99,13 @@ private:
|
|||
auto out = fdb::Result::KeyValueRefArray{};
|
||||
fdb::Error err = res.getKeyValueArrayNothrow(out);
|
||||
if (err.code() == error_code_blob_granule_transaction_too_old) {
|
||||
info("BlobGranuleCorrectness::randomReadOp bg too old\n");
|
||||
ASSERT(!seenReadSuccess);
|
||||
bool previousSuccess = seenReadSuccess(tenantId);
|
||||
if (previousSuccess) {
|
||||
error("Read bg too old after read success!\n");
|
||||
} else {
|
||||
info("Read bg too old\n");
|
||||
}
|
||||
ASSERT(!previousSuccess);
|
||||
*tooOld = true;
|
||||
ctx->done();
|
||||
} else if (err.code() != error_code_success) {
|
||||
|
@ -85,10 +115,13 @@ private:
|
|||
auto& [resVector, out_more] = resCopy;
|
||||
ASSERT(!out_more);
|
||||
results.get()->assign(resVector.begin(), resVector.end());
|
||||
if (!seenReadSuccess) {
|
||||
info("BlobGranuleCorrectness::randomReadOp first success\n");
|
||||
bool previousSuccess = seenReadSuccess(tenantId);
|
||||
if (!previousSuccess) {
|
||||
info(fmt::format("Read{0}: first success\n", tenantDebugString(tenantId)));
|
||||
setReadSuccess(tenantId);
|
||||
} else {
|
||||
debugOp("Read", begin, end, tenantId, "complete");
|
||||
}
|
||||
seenReadSuccess = true;
|
||||
ctx->done();
|
||||
}
|
||||
},
|
||||
|
@ -97,7 +130,7 @@ private:
|
|||
std::vector<fdb::KeyValue> expected =
|
||||
stores[tenantId].getRange(begin, end, stores[tenantId].size(), false);
|
||||
if (results->size() != expected.size()) {
|
||||
error(fmt::format("randomReadOp result size mismatch. expected: {} actual: {}",
|
||||
error(fmt::format("randomReadOp result size mismatch. expected: {0} actual: {1}",
|
||||
expected.size(),
|
||||
results->size()));
|
||||
}
|
||||
|
@ -105,7 +138,7 @@ private:
|
|||
|
||||
for (int i = 0; i < results->size(); i++) {
|
||||
if ((*results)[i].key != expected[i].key) {
|
||||
error(fmt::format("randomReadOp key mismatch at {}/{}. expected: {} actual: {}",
|
||||
error(fmt::format("randomReadOp key mismatch at {0}/{1}. expected: {2} actual: {3}",
|
||||
i,
|
||||
results->size(),
|
||||
fdb::toCharsRef(expected[i].key),
|
||||
|
@ -138,6 +171,8 @@ private:
|
|||
}
|
||||
auto results = std::make_shared<std::vector<fdb::KeyRange>>();
|
||||
|
||||
debugOp("GetGranules", begin, end, tenantId, "starting");
|
||||
|
||||
execTransaction(
|
||||
[begin, end, results](auto ctx) {
|
||||
fdb::Future f = ctx->tx().getBlobGranuleRanges(begin, end, 1000).eraseType();
|
||||
|
@ -149,15 +184,17 @@ private:
|
|||
},
|
||||
true);
|
||||
},
|
||||
[this, begin, end, results, cont]() {
|
||||
this->validateRanges(results, begin, end, seenReadSuccess);
|
||||
[this, begin, end, tenantId, results, cont]() {
|
||||
debugOp(
|
||||
"GetGranules", begin, end, tenantId, fmt::format("complete with {0} granules", results->size()));
|
||||
this->validateRanges(results, begin, end, seenReadSuccess(tenantId));
|
||||
schedule(cont);
|
||||
},
|
||||
getTenant(tenantId));
|
||||
}
|
||||
|
||||
void randomSummarizeOp(TTaskFct cont, std::optional<int> tenantId) {
|
||||
if (!seenReadSuccess) {
|
||||
if (!seenReadSuccess(tenantId)) {
|
||||
// tester can't handle this throwing bg_txn_too_old, so just don't call it unless we have already seen a
|
||||
// read success
|
||||
schedule(cont);
|
||||
|
@ -169,6 +206,9 @@ private:
|
|||
std::swap(begin, end);
|
||||
}
|
||||
auto results = std::make_shared<std::vector<fdb::GranuleSummary>>();
|
||||
|
||||
debugOp("Summarize", begin, end, tenantId, "starting");
|
||||
|
||||
execTransaction(
|
||||
[begin, end, results](auto ctx) {
|
||||
fdb::Future f = ctx->tx().summarizeBlobGranules(begin, end, -2 /*latest version*/, 1000).eraseType();
|
||||
|
@ -180,10 +220,11 @@ private:
|
|||
},
|
||||
true);
|
||||
},
|
||||
[this, begin, end, results, cont]() {
|
||||
ASSERT(results->size() > 0);
|
||||
ASSERT(results->front().keyRange.beginKey <= begin);
|
||||
ASSERT(results->back().keyRange.endKey >= end);
|
||||
[this, begin, end, tenantId, results, cont]() {
|
||||
debugOp("Summarize", begin, end, tenantId, fmt::format("complete with {0} granules", results->size()));
|
||||
|
||||
// use validateRanges to share validation
|
||||
auto ranges = std::make_shared<std::vector<fdb::KeyRange>>();
|
||||
|
||||
for (int i = 0; i < results->size(); i++) {
|
||||
// TODO: could do validation of subsequent calls and ensure snapshot version never decreases
|
||||
|
@ -191,12 +232,11 @@ private:
|
|||
ASSERT((*results)[i].snapshotVersion <= (*results)[i].deltaVersion);
|
||||
ASSERT((*results)[i].snapshotSize > 0);
|
||||
ASSERT((*results)[i].deltaSize >= 0);
|
||||
|
||||
ranges->push_back((*results)[i].keyRange);
|
||||
}
|
||||
|
||||
for (int i = 1; i < results->size(); i++) {
|
||||
// ranges contain entire requested key range
|
||||
ASSERT((*results)[i].keyRange.beginKey == (*results)[i - 1].keyRange.endKey);
|
||||
}
|
||||
this->validateRanges(ranges, begin, end, true);
|
||||
|
||||
schedule(cont);
|
||||
},
|
||||
|
@ -208,18 +248,29 @@ private:
|
|||
fdb::Key end,
|
||||
bool shouldBeRanges) {
|
||||
if (shouldBeRanges) {
|
||||
if (results->size() == 0) {
|
||||
error(fmt::format(
|
||||
"ValidateRanges: [{0} - {1}): No ranges returned!", fdb::toCharsRef(begin), fdb::toCharsRef(end)));
|
||||
}
|
||||
ASSERT(results->size() > 0);
|
||||
if (results->front().beginKey > begin || results->back().endKey < end) {
|
||||
error(fmt::format("ValidateRanges: [{0} - {1}): Incomplete range(s) returned [{2} - {3})!",
|
||||
fdb::toCharsRef(begin),
|
||||
fdb::toCharsRef(end),
|
||||
fdb::toCharsRef(results->front().beginKey),
|
||||
fdb::toCharsRef(results->back().endKey)));
|
||||
}
|
||||
ASSERT(results->front().beginKey <= begin);
|
||||
ASSERT(results->back().endKey >= end);
|
||||
}
|
||||
for (int i = 0; i < results->size(); i++) {
|
||||
// no empty or inverted ranges
|
||||
if ((*results)[i].beginKey >= (*results)[i].endKey) {
|
||||
error(fmt::format("Empty/inverted range [{0} - {1}) for getBlobGranuleRanges({2} - {3})",
|
||||
fdb::toCharsRef((*results)[i].beginKey),
|
||||
fdb::toCharsRef((*results)[i].endKey),
|
||||
error(fmt::format("ValidateRanges: [{0} - {1}): Empty/inverted range [{2} - {3})",
|
||||
fdb::toCharsRef(begin),
|
||||
fdb::toCharsRef(end)));
|
||||
fdb::toCharsRef(end),
|
||||
fdb::toCharsRef((*results)[i].beginKey),
|
||||
fdb::toCharsRef((*results)[i].endKey)));
|
||||
}
|
||||
ASSERT((*results)[i].beginKey < (*results)[i].endKey);
|
||||
}
|
||||
|
@ -227,16 +278,17 @@ private:
|
|||
for (int i = 1; i < results->size(); i++) {
|
||||
// ranges contain entire requested key range
|
||||
if ((*results)[i].beginKey != (*results)[i].endKey) {
|
||||
error(fmt::format("Non-contiguous range [{0} - {1}) for getBlobGranuleRanges({2} - {3})",
|
||||
fdb::toCharsRef((*results)[i].beginKey),
|
||||
fdb::toCharsRef((*results)[i].endKey),
|
||||
error(fmt::format("ValidateRanges: [{0} - {1}): Non-covereed range [{2} - {3})",
|
||||
fdb::toCharsRef(begin),
|
||||
fdb::toCharsRef(end)));
|
||||
fdb::toCharsRef(end),
|
||||
fdb::toCharsRef((*results)[i - 1].endKey),
|
||||
fdb::toCharsRef((*results)[i].endKey)));
|
||||
}
|
||||
ASSERT((*results)[i].beginKey == (*results)[i - 1].endKey);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: tenant support
|
||||
void randomGetBlobRangesOp(TTaskFct cont) {
|
||||
fdb::Key begin = randomKeyName();
|
||||
fdb::Key end = randomKeyName();
|
||||
|
@ -244,6 +296,10 @@ private:
|
|||
if (begin > end) {
|
||||
std::swap(begin, end);
|
||||
}
|
||||
std::optional<int> tenantId = {};
|
||||
|
||||
debugOp("GetBlobRanges", begin, end, tenantId, "starting");
|
||||
|
||||
execOperation(
|
||||
[begin, end, results](auto ctx) {
|
||||
fdb::Future f = ctx->db().listBlobbifiedRanges(begin, end, 1000).eraseType();
|
||||
|
@ -252,22 +308,27 @@ private:
|
|||
ctx->done();
|
||||
});
|
||||
},
|
||||
[this, begin, end, results, cont]() {
|
||||
this->validateRanges(results, begin, end, seenReadSuccess);
|
||||
[this, begin, end, tenantId, results, cont]() {
|
||||
debugOp(
|
||||
"GetBlobRanges", begin, end, tenantId, fmt::format("complete with {0} ranges", results->size()));
|
||||
this->validateRanges(results, begin, end, seenReadSuccess(tenantId));
|
||||
schedule(cont);
|
||||
},
|
||||
/* failOnError = */ false);
|
||||
}
|
||||
|
||||
// TODO: tenant support
|
||||
void randomVerifyOp(TTaskFct cont) {
|
||||
fdb::Key begin = randomKeyName();
|
||||
fdb::Key end = randomKeyName();
|
||||
std::optional<int> tenantId;
|
||||
if (begin > end) {
|
||||
std::swap(begin, end);
|
||||
}
|
||||
|
||||
auto verifyVersion = std::make_shared<int64_t>(false);
|
||||
// info("Verify op starting");
|
||||
|
||||
debugOp("Verify", begin, end, tenantId, "starting");
|
||||
|
||||
execOperation(
|
||||
[begin, end, verifyVersion](auto ctx) {
|
||||
|
@ -277,16 +338,15 @@ private:
|
|||
ctx->done();
|
||||
});
|
||||
},
|
||||
[this, begin, end, verifyVersion, cont]() {
|
||||
[this, begin, end, tenantId, verifyVersion, cont]() {
|
||||
debugOp("Verify", begin, end, tenantId, fmt::format("Complete @ {0}", *verifyVersion));
|
||||
bool previousSuccess = seenReadSuccess(tenantId);
|
||||
if (*verifyVersion == -1) {
|
||||
ASSERT(!seenReadSuccess);
|
||||
} else {
|
||||
if (!seenReadSuccess) {
|
||||
info("BlobGranuleCorrectness::randomVerifyOp first success");
|
||||
}
|
||||
seenReadSuccess = true;
|
||||
ASSERT(!previousSuccess);
|
||||
} else if (!previousSuccess) {
|
||||
info(fmt::format("Verify{0}: first success\n", tenantDebugString(tenantId)));
|
||||
setReadSuccess(tenantId);
|
||||
}
|
||||
// info(fmt::format("verify op done @ {}", *verifyVersion));
|
||||
schedule(cont);
|
||||
},
|
||||
/* failOnError = */ false);
|
||||
|
|
|
@ -57,6 +57,7 @@ public:
|
|||
std::string tlsCertFile;
|
||||
std::string tlsKeyFile;
|
||||
std::string tlsCaFile;
|
||||
bool retainClientLibCopies = false;
|
||||
};
|
||||
|
||||
} // namespace FdbApiTester
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#define FDB_API_VERSION 720
|
||||
#define FDB_API_VERSION 730
|
||||
|
||||
namespace FdbApiTester {
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ namespace FdbApiTester {
|
|||
|
||||
namespace {
|
||||
|
||||
#define API_VERSION_CLIENT_TMP_DIR 720
|
||||
#define API_VERSION_CLIENT_TMP_DIR 730
|
||||
|
||||
enum TesterOptionId {
|
||||
OPT_CONNFILE,
|
||||
|
@ -61,6 +61,7 @@ enum TesterOptionId {
|
|||
OPT_TLS_CERT_FILE,
|
||||
OPT_TLS_KEY_FILE,
|
||||
OPT_TLS_CA_FILE,
|
||||
OPT_RETAIN_CLIENT_LIB_COPIES,
|
||||
};
|
||||
|
||||
CSimpleOpt::SOption TesterOptionDefs[] = //
|
||||
|
@ -89,6 +90,7 @@ CSimpleOpt::SOption TesterOptionDefs[] = //
|
|||
{ OPT_TLS_CERT_FILE, "--tls-cert-file", SO_REQ_SEP },
|
||||
{ OPT_TLS_KEY_FILE, "--tls-key-file", SO_REQ_SEP },
|
||||
{ OPT_TLS_CA_FILE, "--tls-ca-file", SO_REQ_SEP },
|
||||
{ OPT_RETAIN_CLIENT_LIB_COPIES, "--retain-client-lib-copies", SO_NONE },
|
||||
SO_END_OF_OPTIONS };
|
||||
|
||||
void printProgramUsage(const char* execName) {
|
||||
|
@ -140,6 +142,8 @@ void printProgramUsage(const char* execName) {
|
|||
" Path to file containing client's TLS private key\n"
|
||||
" --tls-ca-file FILE\n"
|
||||
" Path to file containing TLS CA certificate\n"
|
||||
" --retain-client-lib-copies\n"
|
||||
" Retain temporary external client library copies\n"
|
||||
" -h, --help Display this help and exit.\n",
|
||||
FDB_API_VERSION);
|
||||
}
|
||||
|
@ -251,6 +255,9 @@ bool processArg(TesterOptions& options, const CSimpleOpt& args) {
|
|||
case OPT_TLS_CA_FILE:
|
||||
options.tlsCaFile.assign(args.OptionArg());
|
||||
break;
|
||||
case OPT_RETAIN_CLIENT_LIB_COPIES:
|
||||
options.retainClientLibCopies = true;
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -348,6 +355,10 @@ void applyNetworkOptions(TesterOptions& options) {
|
|||
if (!options.tlsCaFile.empty()) {
|
||||
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_CA_PATH, options.tlsCaFile);
|
||||
}
|
||||
|
||||
if (options.retainClientLibCopies) {
|
||||
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_RETAIN_CLIENT_LIBRARY_COPIES);
|
||||
}
|
||||
}
|
||||
|
||||
void randomizeOptions(TesterOptions& options) {
|
||||
|
@ -459,8 +470,10 @@ int main(int argc, char** argv) {
|
|||
retCode = 1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Stopping FDB network thread\n");
|
||||
fdb_check(fdb::network::stop(), "Failed to stop FDB thread");
|
||||
network_thread.join();
|
||||
fprintf(stderr, "FDB network thread successfully stopped\n");
|
||||
} catch (const std::exception& err) {
|
||||
fmt::print(stderr, "ERROR: {}\n", err.what());
|
||||
retCode = 1;
|
||||
|
|
|
@ -38,6 +38,7 @@ from tmp_cluster import TempCluster
|
|||
from local_cluster import TLSConfig
|
||||
# fmt: on
|
||||
|
||||
|
||||
TESTER_STATS_INTERVAL_SEC = 5
|
||||
|
||||
|
||||
|
@ -98,6 +99,9 @@ def run_tester(args, cluster, test_file):
|
|||
external_client_library = Path(args.external_client_library).resolve()
|
||||
cmd += ["--external-client-library", external_client_library]
|
||||
|
||||
if args.retain_client_lib_copies:
|
||||
cmd += ["--retain-client-lib-copies"]
|
||||
|
||||
if cluster.blob_granules_enabled:
|
||||
cmd += [
|
||||
"--blob-granule-local-file-path",
|
||||
|
@ -209,6 +213,12 @@ def parse_args(argv):
|
|||
parser.add_argument("--build-dir", "-b", type=str, required=True, help="FDB build directory")
|
||||
parser.add_argument("--api-tester-bin", type=str, help="Path to the fdb_c_api_tester executable.", required=True)
|
||||
parser.add_argument("--external-client-library", type=str, help="Path to the external client library.")
|
||||
parser.add_argument(
|
||||
"--retain-client-lib-copies",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Retain temporary external client library copies.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cluster-file",
|
||||
type=str,
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define FDB_API_VERSION 720
|
||||
#define FDB_API_VERSION 730
|
||||
#include <foundationdb/fdb_c.h>
|
||||
|
||||
#include "unit/fdb_api.hpp"
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#pragma once
|
||||
|
||||
#ifndef FDB_API_VERSION
|
||||
#define FDB_API_VERSION 720
|
||||
#define FDB_API_VERSION 730
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
|
@ -716,6 +716,12 @@ public:
|
|||
throwError("Failed to create transaction: ", err);
|
||||
return Transaction(tx_native);
|
||||
}
|
||||
|
||||
TypedFuture<future_var::Bool> blobbifyRange(KeyRef begin, KeyRef end) {
|
||||
if (!tenant)
|
||||
throw std::runtime_error("blobbifyRange from null tenant");
|
||||
return native::fdb_tenant_blobbify_range(tenant.get(), begin.data(), intSize(begin), end.data(), intSize(end));
|
||||
}
|
||||
};
|
||||
|
||||
class Database {
|
||||
|
|
|
@ -7,8 +7,7 @@ import subprocess
|
|||
import sys
|
||||
import os
|
||||
|
||||
sys.path[:0] = [os.path.join(os.path.dirname(
|
||||
__file__), '..', '..', '..', 'tests', 'TestRunner')]
|
||||
sys.path[:0] = [os.path.join(os.path.dirname(__file__), '..', '..', '..', 'tests', 'TestRunner')]
|
||||
|
||||
# fmt: off
|
||||
from binary_download import FdbBinaryDownloader, CURRENT_VERSION
|
||||
|
|
|
@ -131,21 +131,10 @@ repeat_immediate_steps:
|
|||
iter.opName(),
|
||||
iter.step,
|
||||
err.what());
|
||||
updateErrorStats(err, iter.op);
|
||||
tx.onError(err).then([this, state = shared_from_this()](Future f) {
|
||||
const auto rc = handleForOnError(tx, f, fmt::format("{}:{}", iter.opName(), iter.step));
|
||||
if (rc == FutureRC::RETRY) {
|
||||
stats.incrErrorCount(iter.op);
|
||||
} else if (rc == FutureRC::CONFLICT) {
|
||||
stats.incrConflictCount();
|
||||
} else if (rc == FutureRC::ABORT) {
|
||||
tx.reset();
|
||||
signalEnd();
|
||||
return;
|
||||
}
|
||||
// restart this iteration from beginning
|
||||
iter = getOpBegin(args);
|
||||
needs_commit = false;
|
||||
postNextTick();
|
||||
onIterationEnd(rc);
|
||||
});
|
||||
} else {
|
||||
// async step succeeded
|
||||
|
@ -159,20 +148,9 @@ repeat_immediate_steps:
|
|||
}
|
||||
} else {
|
||||
// blob granules op error
|
||||
auto rc = handleForOnError(tx, f, "BG_ON_ERROR");
|
||||
if (rc == FutureRC::RETRY) {
|
||||
stats.incrErrorCount(iter.op);
|
||||
} else if (rc == FutureRC::CONFLICT) {
|
||||
stats.incrConflictCount();
|
||||
} else if (rc == FutureRC::ABORT) {
|
||||
tx.reset();
|
||||
stopcount.fetch_add(1);
|
||||
return;
|
||||
}
|
||||
iter = getOpBegin(args);
|
||||
needs_commit = false;
|
||||
// restart this iteration from beginning
|
||||
postNextTick();
|
||||
updateErrorStats(f.error(), iter.op);
|
||||
FutureRC rc = handleForOnError(tx, f, "BG_ON_ERROR");
|
||||
onIterationEnd(rc);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -217,23 +195,10 @@ void ResumableStateForRunWorkload::onTransactionSuccess() {
|
|||
"ERROR",
|
||||
"Post-iteration commit returned error: {}",
|
||||
err.what());
|
||||
updateErrorStats(err, OP_COMMIT);
|
||||
tx.onError(err).then([this, state = shared_from_this()](Future f) {
|
||||
const auto rc = handleForOnError(tx, f, "ON_ERROR");
|
||||
if (rc == FutureRC::CONFLICT)
|
||||
stats.incrConflictCount();
|
||||
else
|
||||
stats.incrErrorCount(OP_COMMIT);
|
||||
if (rc == FutureRC::ABORT) {
|
||||
signalEnd();
|
||||
return;
|
||||
}
|
||||
if (ended()) {
|
||||
signalEnd();
|
||||
} else {
|
||||
iter = getOpBegin(args);
|
||||
needs_commit = false;
|
||||
postNextTick();
|
||||
}
|
||||
onIterationEnd(rc);
|
||||
});
|
||||
} else {
|
||||
// commit successful
|
||||
|
@ -249,13 +214,7 @@ void ResumableStateForRunWorkload::onTransactionSuccess() {
|
|||
stats.incrOpCount(OP_TRANSACTION);
|
||||
tx.reset();
|
||||
watch_tx.startFromStop();
|
||||
if (ended()) {
|
||||
signalEnd();
|
||||
} else {
|
||||
// start next iteration
|
||||
iter = getOpBegin(args);
|
||||
postNextTick();
|
||||
}
|
||||
onIterationEnd(FutureRC::OK);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
|
@ -268,12 +227,29 @@ void ResumableStateForRunWorkload::onTransactionSuccess() {
|
|||
stats.incrOpCount(OP_TRANSACTION);
|
||||
watch_tx.startFromStop();
|
||||
tx.reset();
|
||||
if (ended()) {
|
||||
signalEnd();
|
||||
onIterationEnd(FutureRC::OK);
|
||||
}
|
||||
}
|
||||
void ResumableStateForRunWorkload::onIterationEnd(FutureRC rc) {
|
||||
// restart current iteration from beginning unless ended
|
||||
if (rc == FutureRC::OK || rc == FutureRC::ABORT) {
|
||||
total_xacts++;
|
||||
}
|
||||
if (ended()) {
|
||||
signalEnd();
|
||||
} else {
|
||||
iter = getOpBegin(args);
|
||||
needs_commit = false;
|
||||
postNextTick();
|
||||
}
|
||||
}
|
||||
|
||||
void ResumableStateForRunWorkload::updateErrorStats(fdb::Error err, int op) {
|
||||
if (err) {
|
||||
if (err.is(1020 /*not_commited*/)) {
|
||||
stats.incrConflictCount();
|
||||
} else {
|
||||
iter = getOpBegin(args);
|
||||
// start next iteration
|
||||
postNextTick();
|
||||
stats.incrErrorCount(op);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <boost/asio.hpp>
|
||||
#include "logger.hpp"
|
||||
#include "mako.hpp"
|
||||
#include "future.hpp"
|
||||
#include "shm.hpp"
|
||||
#include "stats.hpp"
|
||||
#include "time.hpp"
|
||||
|
@ -79,6 +80,7 @@ struct ResumableStateForRunWorkload : std::enable_shared_from_this<ResumableStat
|
|||
boost::asio::io_context& io_context;
|
||||
Arguments const& args;
|
||||
ThreadStatistics& stats;
|
||||
int64_t total_xacts;
|
||||
std::atomic<int>& stopcount;
|
||||
std::atomic<int> const& signal;
|
||||
int max_iters;
|
||||
|
@ -102,20 +104,20 @@ struct ResumableStateForRunWorkload : std::enable_shared_from_this<ResumableStat
|
|||
std::atomic<int> const& signal,
|
||||
int max_iters,
|
||||
OpIterator iter)
|
||||
: logr(logr), db(db), tx(tx), io_context(io_context), args(args), stats(stats), stopcount(stopcount),
|
||||
signal(signal), max_iters(max_iters), iter(iter), needs_commit(false) {
|
||||
: logr(logr), db(db), tx(tx), io_context(io_context), args(args), stats(stats), total_xacts(0),
|
||||
stopcount(stopcount), signal(signal), max_iters(max_iters), iter(iter), needs_commit(false) {
|
||||
key1.resize(args.key_length);
|
||||
key2.resize(args.key_length);
|
||||
val.resize(args.value_length);
|
||||
}
|
||||
void signalEnd() noexcept { stopcount.fetch_add(1); }
|
||||
bool ended() noexcept {
|
||||
return (max_iters != -1 && max_iters >= stats.getOpCount(OP_TRANSACTION)) || signal.load() == SIGNAL_RED;
|
||||
}
|
||||
bool ended() noexcept { return (max_iters != -1 && total_xacts >= max_iters) || signal.load() == SIGNAL_RED; }
|
||||
void postNextTick();
|
||||
void runOneTick();
|
||||
void updateStepStats();
|
||||
void onTransactionSuccess();
|
||||
void onIterationEnd(FutureRC rc);
|
||||
void updateErrorStats(fdb::Error err, int op);
|
||||
};
|
||||
|
||||
using RunWorkloadStateHandle = std::shared_ptr<ResumableStateForRunWorkload>;
|
||||
|
|
|
@ -31,21 +31,28 @@ extern thread_local mako::Logger logr;
|
|||
|
||||
namespace mako {
|
||||
|
||||
enum class FutureRC { OK, RETRY, CONFLICT, ABORT };
|
||||
enum class FutureRC { OK, RETRY, ABORT };
|
||||
|
||||
template <class FutureType>
|
||||
force_inline bool waitFuture(FutureType& f, std::string_view step) {
|
||||
assert(f);
|
||||
auto err = f.blockUntilReady();
|
||||
if (err) {
|
||||
assert(!err.retryable());
|
||||
logr.error("'{}' found at blockUntilReady during step '{}'", err.what(), step);
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
template <class FutureType>
|
||||
force_inline FutureRC handleForOnError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
|
||||
if (auto err = f.error()) {
|
||||
if (err.is(1020 /*not_committed*/)) {
|
||||
return FutureRC::CONFLICT;
|
||||
} else if (err.retryable()) {
|
||||
logr.warn("Retryable error '{}' found at on_error(), step: {}", err.what(), step);
|
||||
return FutureRC::RETRY;
|
||||
} else {
|
||||
logr.error("Unretryable error '{}' found at on_error(), step: {}", err.what(), step);
|
||||
tx.reset();
|
||||
return FutureRC::ABORT;
|
||||
}
|
||||
assert(!(err.retryable()));
|
||||
logr.error("Unretryable error '{}' found at on_error(), step: {}", err.what(), step);
|
||||
tx.reset();
|
||||
return FutureRC::ABORT;
|
||||
} else {
|
||||
return FutureRC::RETRY;
|
||||
}
|
||||
|
@ -54,8 +61,7 @@ force_inline FutureRC handleForOnError(fdb::Transaction& tx, FutureType& f, std:
|
|||
template <class FutureType>
|
||||
force_inline FutureRC waitAndHandleForOnError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
|
||||
assert(f);
|
||||
if (auto err = f.blockUntilReady()) {
|
||||
logr.error("'{}' found while waiting for on_error() future, step: {}", err.what(), step);
|
||||
if (!waitFuture(f, step)) {
|
||||
return FutureRC::ABORT;
|
||||
}
|
||||
return handleForOnError(tx, f, step);
|
||||
|
@ -65,15 +71,13 @@ force_inline FutureRC waitAndHandleForOnError(fdb::Transaction& tx, FutureType&
|
|||
template <class FutureType>
|
||||
force_inline FutureRC waitAndHandleError(fdb::Transaction& tx, FutureType& f, std::string_view step) {
|
||||
assert(f);
|
||||
auto err = fdb::Error{};
|
||||
if ((err = f.blockUntilReady())) {
|
||||
const auto retry = err.retryable();
|
||||
logr.error("{} error '{}' found during step: {}", (retry ? "Retryable" : "Unretryable"), err.what(), step);
|
||||
return retry ? FutureRC::RETRY : FutureRC::ABORT;
|
||||
if (!waitFuture(f, step)) {
|
||||
return FutureRC::ABORT;
|
||||
}
|
||||
err = f.error();
|
||||
if (!err)
|
||||
auto err = f.error();
|
||||
if (!err) {
|
||||
return FutureRC::OK;
|
||||
}
|
||||
if (err.retryable()) {
|
||||
logr.warn("step {} returned '{}'", step, err.what());
|
||||
} else {
|
||||
|
|
|
@ -59,6 +59,8 @@
|
|||
#include "shm.hpp"
|
||||
#include "stats.hpp"
|
||||
#include "time.hpp"
|
||||
#include "rapidjson/document.h"
|
||||
#include "rapidjson/error/en.h"
|
||||
|
||||
namespace mako {
|
||||
|
||||
|
@ -88,14 +90,29 @@ Transaction createNewTransaction(Database db, Arguments const& args, int id = -1
|
|||
}
|
||||
// Create Tenant Transaction
|
||||
int tenant_id = (id == -1) ? urand(0, args.active_tenants - 1) : id;
|
||||
Transaction tr;
|
||||
std::string tenantStr;
|
||||
// If provided tenants array, use it
|
||||
if (tenants) {
|
||||
return tenants[tenant_id].createTransaction();
|
||||
tr = tenants[tenant_id].createTransaction();
|
||||
} else {
|
||||
tenantStr = "tenant" + std::to_string(tenant_id);
|
||||
BytesRef tenant_name = toBytesRef(tenantStr);
|
||||
Tenant t = db.openTenant(tenant_name);
|
||||
tr = t.createTransaction();
|
||||
}
|
||||
std::string tenantStr = "tenant" + std::to_string(tenant_id);
|
||||
BytesRef tenant_name = toBytesRef(tenantStr);
|
||||
Tenant t = db.openTenant(tenant_name);
|
||||
return t.createTransaction();
|
||||
if (!args.authorization_tokens.empty()) {
|
||||
// lookup token based on tenant name and, if found, set authz token to transaction
|
||||
if (tenantStr.empty())
|
||||
tenantStr = "tenant" + std::to_string(tenant_id);
|
||||
auto tokenMapItr = args.authorization_tokens.find(tenantStr);
|
||||
if (tokenMapItr != args.authorization_tokens.end()) {
|
||||
tr.setOption(FDB_TR_OPTION_AUTHORIZATION_TOKEN, tokenMapItr->second);
|
||||
} else {
|
||||
logr.warn("Authorization token map is not empty, but could not find token for tenant '{}'", tenantStr);
|
||||
}
|
||||
}
|
||||
return tr;
|
||||
}
|
||||
|
||||
uint64_t byteswapHelper(uint64_t input) {
|
||||
|
@ -143,7 +160,7 @@ int cleanup(Database db, Arguments const& args) {
|
|||
const auto rc = waitAndHandleError(tx, future_commit, "COMMIT_CLEANUP");
|
||||
if (rc == FutureRC::OK) {
|
||||
break;
|
||||
} else if (rc == FutureRC::RETRY || rc == FutureRC::CONFLICT) {
|
||||
} else if (rc == FutureRC::RETRY) {
|
||||
// tx already reset
|
||||
continue;
|
||||
} else {
|
||||
|
@ -266,24 +283,60 @@ int populate(Database db,
|
|||
int batch_size = args.tenant_batch_size;
|
||||
int batches = (args.total_tenants + batch_size - 1) / batch_size;
|
||||
for (int batch = 0; batch < batches; ++batch) {
|
||||
while (1) {
|
||||
for (int i = batch * batch_size; i < args.total_tenants && i < (batch + 1) * batch_size; ++i) {
|
||||
std::string tenant_str = "tenant" + std::to_string(i);
|
||||
Tenant::createTenant(systemTx, toBytesRef(tenant_str));
|
||||
}
|
||||
auto future_commit = systemTx.commit();
|
||||
const auto rc = waitAndHandleError(systemTx, future_commit, "CREATE_TENANT");
|
||||
if (rc == FutureRC::OK) {
|
||||
// Keep going with reset transaction if commit was successful
|
||||
systemTx.reset();
|
||||
break;
|
||||
} else if (rc == FutureRC::RETRY) {
|
||||
// We want to retry this batch. Transaction is already reset
|
||||
} else {
|
||||
// Abort
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
Tenant tenants[batch_size];
|
||||
fdb::TypedFuture<fdb::future_var::Bool> blobbifyResults[batch_size];
|
||||
|
||||
// blobbify tenant ranges explicitly
|
||||
// FIXME: skip if database not configured for blob granules?
|
||||
for (int i = batch * batch_size; i < args.total_tenants && i < (batch + 1) * batch_size; ++i) {
|
||||
std::string tenant_name = "tenant" + std::to_string(i);
|
||||
Tenant::createTenant(systemTx, toBytesRef(tenant_name));
|
||||
std::string tenant_str = "tenant" + std::to_string(i);
|
||||
BytesRef tenant_name = toBytesRef(tenant_str);
|
||||
tenants[i] = db.openTenant(tenant_name);
|
||||
std::string rangeEnd = "\xff";
|
||||
blobbifyResults[i - (batch * batch_size)] =
|
||||
tenants[i].blobbifyRange(BytesRef(), toBytesRef(rangeEnd));
|
||||
}
|
||||
auto future_commit = systemTx.commit();
|
||||
const auto rc = waitAndHandleError(systemTx, future_commit, "CREATE_TENANT");
|
||||
if (rc == FutureRC::OK) {
|
||||
// Keep going with reset transaction if commit was successful
|
||||
systemTx.reset();
|
||||
} else if (rc == FutureRC::RETRY) {
|
||||
// We want to retry this batch, so decrement the number
|
||||
// and go back through the loop to get the same value
|
||||
// Transaction is already reset
|
||||
--batch;
|
||||
} else {
|
||||
// Abort
|
||||
return -1;
|
||||
|
||||
for (int i = batch * batch_size; i < args.total_tenants && i < (batch + 1) * batch_size; ++i) {
|
||||
while (true) {
|
||||
// not technically an operation that's part of systemTx, but it works
|
||||
const auto rc =
|
||||
waitAndHandleError(systemTx, blobbifyResults[i - (batch * batch_size)], "BLOBBIFY_TENANT");
|
||||
if (rc == FutureRC::OK) {
|
||||
if (!blobbifyResults[i - (batch * batch_size)].get()) {
|
||||
fmt::print("Blobbifying tenant {0} failed!\n", i);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
} else if (rc == FutureRC::RETRY) {
|
||||
continue;
|
||||
} else {
|
||||
// Abort
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
systemTx.reset();
|
||||
}
|
||||
} else {
|
||||
std::string last_tenant_name = "tenant" + std::to_string(args.total_tenants - 1);
|
||||
|
@ -405,6 +458,16 @@ int populate(Database db,
|
|||
return 0;
|
||||
}
|
||||
|
||||
void updateErrorStatsRunMode(ThreadStatistics& stats, fdb::Error err, int op) {
|
||||
if (err) {
|
||||
if (err.is(1020 /*not_commited*/)) {
|
||||
stats.incrConflictCount();
|
||||
} else {
|
||||
stats.incrErrorCount(op);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* run one iteration of configured transaction */
|
||||
int runOneTransaction(Transaction& tx,
|
||||
Arguments const& args,
|
||||
|
@ -435,17 +498,13 @@ transaction_begin:
|
|||
} else {
|
||||
future_rc = waitAndHandleForOnError(tx, f, opTable[op].name());
|
||||
}
|
||||
updateErrorStatsRunMode(stats, f.error(), op);
|
||||
}
|
||||
if (auto postStepFn = opTable[op].postStepFunction(step))
|
||||
postStepFn(f, tx, args, key1, key2, val);
|
||||
watch_step.stop();
|
||||
if (future_rc != FutureRC::OK) {
|
||||
if (future_rc == FutureRC::CONFLICT) {
|
||||
stats.incrConflictCount();
|
||||
} else if (future_rc == FutureRC::RETRY) {
|
||||
stats.incrErrorCount(op);
|
||||
} else {
|
||||
// abort
|
||||
if (future_rc == FutureRC::ABORT) {
|
||||
return -1;
|
||||
}
|
||||
// retry from first op
|
||||
|
@ -484,6 +543,7 @@ transaction_begin:
|
|||
auto watch_commit = Stopwatch(StartAtCtor{});
|
||||
auto f = tx.commit();
|
||||
const auto rc = waitAndHandleError(tx, f, "COMMIT_AT_TX_END");
|
||||
updateErrorStatsRunMode(stats, f.error(), OP_COMMIT);
|
||||
watch_commit.stop();
|
||||
auto tx_resetter = ExitGuard([&tx]() { tx.reset(); });
|
||||
if (rc == FutureRC::OK) {
|
||||
|
@ -493,10 +553,6 @@ transaction_begin:
|
|||
}
|
||||
stats.incrOpCount(OP_COMMIT);
|
||||
} else {
|
||||
if (rc == FutureRC::CONFLICT)
|
||||
stats.incrConflictCount();
|
||||
else
|
||||
stats.incrErrorCount(OP_COMMIT);
|
||||
if (rc == FutureRC::ABORT) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -563,62 +619,70 @@ int runWorkload(Database db,
|
|||
|
||||
/* main transaction loop */
|
||||
while (1) {
|
||||
Transaction tx = createNewTransaction(db, args, -1, args.active_tenants > 0 ? tenants : nullptr);
|
||||
while ((thread_tps > 0) && (xacts >= current_tps)) {
|
||||
if ((thread_tps > 0 /* iff throttling on */) && (xacts >= current_tps)) {
|
||||
/* throttle on */
|
||||
const auto time_now = steady_clock::now();
|
||||
if (toDoubleSeconds(time_now - time_prev) >= 1.0) {
|
||||
/* more than 1 second passed, no need to throttle */
|
||||
xacts = 0;
|
||||
time_prev = time_now;
|
||||
|
||||
/* update throttle rate */
|
||||
current_tps = static_cast<int>(thread_tps * throttle_factor.load());
|
||||
} else {
|
||||
auto time_now = steady_clock::now();
|
||||
while (toDoubleSeconds(time_now - time_prev) < 1.0) {
|
||||
usleep(1000);
|
||||
time_now = steady_clock::now();
|
||||
}
|
||||
|
||||
/* more than 1 second passed*/
|
||||
xacts = 0;
|
||||
time_prev = time_now;
|
||||
|
||||
/* update throttle rate */
|
||||
current_tps = static_cast<int>(thread_tps * throttle_factor.load());
|
||||
}
|
||||
/* enable transaction trace */
|
||||
if (dotrace) {
|
||||
const auto time_now = steady_clock::now();
|
||||
if (toIntegerSeconds(time_now - time_last_trace) >= 1) {
|
||||
time_last_trace = time_now;
|
||||
traceid.clear();
|
||||
fmt::format_to(std::back_inserter(traceid), "makotrace{:0>19d}", total_xacts);
|
||||
logr.debug("txn tracing {}", traceid);
|
||||
auto err = Error{};
|
||||
err = tx.setOptionNothrow(FDB_TR_OPTION_DEBUG_TRANSACTION_IDENTIFIER, toBytesRef(traceid));
|
||||
|
||||
if (current_tps > 0 || thread_tps == 0 /* throttling off */) {
|
||||
Transaction tx = createNewTransaction(db, args, -1, args.active_tenants > 0 ? tenants : nullptr);
|
||||
|
||||
/* enable transaction trace */
|
||||
if (dotrace) {
|
||||
const auto time_now = steady_clock::now();
|
||||
if (toIntegerSeconds(time_now - time_last_trace) >= 1) {
|
||||
time_last_trace = time_now;
|
||||
traceid.clear();
|
||||
fmt::format_to(std::back_inserter(traceid), "makotrace{:0>19d}", total_xacts);
|
||||
logr.debug("txn tracing {}", traceid);
|
||||
auto err = Error{};
|
||||
err = tx.setOptionNothrow(FDB_TR_OPTION_DEBUG_TRANSACTION_IDENTIFIER, toBytesRef(traceid));
|
||||
if (err) {
|
||||
logr.error("TR_OPTION_DEBUG_TRANSACTION_IDENTIFIER: {}", err.what());
|
||||
}
|
||||
err = tx.setOptionNothrow(FDB_TR_OPTION_LOG_TRANSACTION, BytesRef());
|
||||
if (err) {
|
||||
logr.error("TR_OPTION_LOG_TRANSACTION: {}", err.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* enable transaction tagging */
|
||||
if (dotagging > 0) {
|
||||
tagstr.clear();
|
||||
fmt::format_to(std::back_inserter(tagstr),
|
||||
"{}{}{:0>3d}",
|
||||
KEY_PREFIX,
|
||||
args.txntagging_prefix,
|
||||
urand(0, args.txntagging - 1));
|
||||
auto err = tx.setOptionNothrow(FDB_TR_OPTION_AUTO_THROTTLE_TAG, toBytesRef(tagstr));
|
||||
if (err) {
|
||||
logr.error("TR_OPTION_DEBUG_TRANSACTION_IDENTIFIER: {}", err.what());
|
||||
}
|
||||
err = tx.setOptionNothrow(FDB_TR_OPTION_LOG_TRANSACTION, BytesRef());
|
||||
if (err) {
|
||||
logr.error("TR_OPTION_LOG_TRANSACTION: {}", err.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* enable transaction tagging */
|
||||
if (dotagging > 0) {
|
||||
tagstr.clear();
|
||||
fmt::format_to(std::back_inserter(tagstr),
|
||||
"{}{}{:0>3d}",
|
||||
KEY_PREFIX,
|
||||
args.txntagging_prefix,
|
||||
urand(0, args.txntagging - 1));
|
||||
auto err = tx.setOptionNothrow(FDB_TR_OPTION_AUTO_THROTTLE_TAG, toBytesRef(tagstr));
|
||||
if (err) {
|
||||
logr.error("TR_OPTION_DEBUG_TRANSACTION_IDENTIFIER: {}", err.what());
|
||||
rc = runOneTransaction(tx, args, stats, key1, key2, val);
|
||||
if (rc) {
|
||||
logr.warn("runOneTransaction failed ({})", rc);
|
||||
}
|
||||
}
|
||||
|
||||
rc = runOneTransaction(tx, args, stats, key1, key2, val);
|
||||
if (rc) {
|
||||
logr.warn("runOneTransaction failed ({})", rc);
|
||||
xacts++;
|
||||
total_xacts++;
|
||||
}
|
||||
|
||||
if (thread_iters != -1) {
|
||||
if (thread_iters >= total_xacts) {
|
||||
if (total_xacts >= thread_iters) {
|
||||
/* xact limit reached */
|
||||
break;
|
||||
}
|
||||
|
@ -626,8 +690,6 @@ int runWorkload(Database db,
|
|||
/* signal turned red, target duration reached */
|
||||
break;
|
||||
}
|
||||
xacts++;
|
||||
total_xacts++;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
@ -710,6 +772,9 @@ void runAsyncWorkload(Arguments const& args,
|
|||
args.iteration == 0
|
||||
? -1
|
||||
: computeThreadIters(args.iteration, worker_id, i, args.num_processes, args.async_xacts);
|
||||
// argument validation should ensure max_iters > 0
|
||||
assert(args.iteration == 0 || max_iters > 0);
|
||||
|
||||
auto state =
|
||||
std::make_shared<ResumableStateForRunWorkload>(Logger(WorkerProcess{}, args.verbose, worker_id, i),
|
||||
db,
|
||||
|
@ -757,11 +822,15 @@ void workerThread(ThreadArgs& thread_args) {
|
|||
const auto thread_tps =
|
||||
args.tpsmax == 0 ? 0
|
||||
: computeThreadTps(args.tpsmax, worker_id, thread_id, args.num_processes, args.num_threads);
|
||||
// argument validation should ensure thread_tps > 0
|
||||
assert(args.tpsmax == 0 || thread_tps > 0);
|
||||
|
||||
const auto thread_iters =
|
||||
args.iteration == 0
|
||||
? -1
|
||||
: computeThreadIters(args.iteration, worker_id, thread_id, args.num_processes, args.num_threads);
|
||||
// argument validation should ensure thread_iters > 0
|
||||
assert(args.iteration == 0 || thread_iters > 0);
|
||||
|
||||
/* i'm ready */
|
||||
readycount.fetch_add(1);
|
||||
|
@ -815,6 +884,18 @@ int workerProcessMain(Arguments const& args, int worker_id, shared_memory::Acces
|
|||
logr.error("network::setOption(FDB_NET_OPTION_DISTRIBUTED_CLIENT_TRACER): {}", err.what());
|
||||
}
|
||||
|
||||
if (args.tls_certificate_file.has_value()) {
|
||||
network::setOption(FDB_NET_OPTION_TLS_CERT_PATH, args.tls_certificate_file.value());
|
||||
}
|
||||
|
||||
if (args.tls_key_file.has_value()) {
|
||||
network::setOption(FDB_NET_OPTION_TLS_KEY_PATH, args.tls_key_file.value());
|
||||
}
|
||||
|
||||
if (args.tls_ca_file.has_value()) {
|
||||
network::setOption(FDB_NET_OPTION_TLS_CA_PATH, args.tls_ca_file.value());
|
||||
}
|
||||
|
||||
/* enable flatbuffers if specified */
|
||||
if (args.flatbuffers) {
|
||||
#ifdef FDB_NET_OPTION_USE_FLATBUFFERS
|
||||
|
@ -982,57 +1063,56 @@ int workerProcessMain(Arguments const& args, int worker_id, shared_memory::Acces
|
|||
}
|
||||
|
||||
/* initialize the parameters with default values */
|
||||
int initArguments(Arguments& args) {
|
||||
memset(&args, 0, sizeof(Arguments)); /* zero-out everything */
|
||||
args.num_fdb_clusters = 0;
|
||||
args.num_databases = 1;
|
||||
args.api_version = maxApiVersion();
|
||||
args.json = 0;
|
||||
args.num_processes = 1;
|
||||
args.num_threads = 1;
|
||||
args.async_xacts = 0;
|
||||
args.mode = MODE_INVALID;
|
||||
args.rows = 100000;
|
||||
args.load_factor = 1.0;
|
||||
args.row_digits = digits(args.rows);
|
||||
args.seconds = 30;
|
||||
args.iteration = 0;
|
||||
args.tpsmax = 0;
|
||||
args.tpsmin = -1;
|
||||
args.tpsinterval = 10;
|
||||
args.tpschange = TPS_SIN;
|
||||
args.sampling = 1000;
|
||||
args.key_length = 32;
|
||||
args.value_length = 16;
|
||||
args.active_tenants = 0;
|
||||
args.total_tenants = 0;
|
||||
args.tenant_batch_size = 10000;
|
||||
args.zipf = 0;
|
||||
args.commit_get = 0;
|
||||
args.verbose = 1;
|
||||
args.flatbuffers = 0; /* internal */
|
||||
args.knobs[0] = '\0';
|
||||
args.log_group[0] = '\0';
|
||||
args.prefixpadding = 0;
|
||||
args.trace = 0;
|
||||
args.tracepath[0] = '\0';
|
||||
args.traceformat = 0; /* default to client's default (XML) */
|
||||
args.streaming_mode = FDB_STREAMING_MODE_WANT_ALL;
|
||||
args.txntrace = 0;
|
||||
args.txntagging = 0;
|
||||
memset(args.txntagging_prefix, 0, TAGPREFIXLENGTH_MAX);
|
||||
Arguments::Arguments() {
|
||||
num_fdb_clusters = 0;
|
||||
num_databases = 1;
|
||||
api_version = maxApiVersion();
|
||||
json = 0;
|
||||
num_processes = 1;
|
||||
num_threads = 1;
|
||||
async_xacts = 0;
|
||||
mode = MODE_INVALID;
|
||||
rows = 100000;
|
||||
load_factor = 1.0;
|
||||
row_digits = digits(rows);
|
||||
seconds = 0;
|
||||
iteration = 0;
|
||||
tpsmax = 0;
|
||||
tpsmin = -1;
|
||||
tpsinterval = 10;
|
||||
tpschange = TPS_SIN;
|
||||
sampling = 1000;
|
||||
key_length = 32;
|
||||
value_length = 16;
|
||||
active_tenants = 0;
|
||||
total_tenants = 0;
|
||||
tenant_batch_size = 10000;
|
||||
zipf = 0;
|
||||
commit_get = 0;
|
||||
verbose = 1;
|
||||
flatbuffers = 0; /* internal */
|
||||
knobs[0] = '\0';
|
||||
log_group[0] = '\0';
|
||||
prefixpadding = 0;
|
||||
trace = 0;
|
||||
tracepath[0] = '\0';
|
||||
traceformat = 0; /* default to client's default (XML) */
|
||||
streaming_mode = FDB_STREAMING_MODE_WANT_ALL;
|
||||
txntrace = 0;
|
||||
txntagging = 0;
|
||||
memset(txntagging_prefix, 0, TAGPREFIXLENGTH_MAX);
|
||||
for (auto i = 0; i < MAX_OP; i++) {
|
||||
args.txnspec.ops[i][OP_COUNT] = 0;
|
||||
txnspec.ops[i][OP_COUNT] = 0;
|
||||
}
|
||||
args.client_threads_per_version = 0;
|
||||
args.disable_client_bypass = false;
|
||||
args.disable_ryw = 0;
|
||||
args.json_output_path[0] = '\0';
|
||||
args.stats_export_path[0] = '\0';
|
||||
args.bg_materialize_files = false;
|
||||
args.bg_file_path[0] = '\0';
|
||||
args.distributed_tracer_client = 0;
|
||||
return 0;
|
||||
client_threads_per_version = 0;
|
||||
disable_client_bypass = false;
|
||||
disable_ryw = 0;
|
||||
json_output_path[0] = '\0';
|
||||
stats_export_path[0] = '\0';
|
||||
bg_materialize_files = false;
|
||||
bg_file_path[0] = '\0';
|
||||
distributed_tracer_client = 0;
|
||||
num_report_files = 0;
|
||||
}
|
||||
|
||||
/* parse transaction specification */
|
||||
|
@ -1234,7 +1314,7 @@ int parseArguments(int argc, char* argv[], Arguments& args) {
|
|||
/* name, has_arg, flag, val */
|
||||
{ "api_version", required_argument, NULL, 'a' },
|
||||
{ "cluster", required_argument, NULL, 'c' },
|
||||
{ "num_databases", optional_argument, NULL, 'd' },
|
||||
{ "num_databases", required_argument, NULL, 'd' },
|
||||
{ "procs", required_argument, NULL, 'p' },
|
||||
{ "threads", required_argument, NULL, 't' },
|
||||
{ "async_xacts", required_argument, NULL, ARG_ASYNC },
|
||||
|
@ -1279,8 +1359,23 @@ int parseArguments(int argc, char* argv[], Arguments& args) {
|
|||
{ "bg_file_path", required_argument, NULL, ARG_BG_FILE_PATH },
|
||||
{ "stats_export_path", optional_argument, NULL, ARG_EXPORT_PATH },
|
||||
{ "distributed_tracer_client", required_argument, NULL, ARG_DISTRIBUTED_TRACER_CLIENT },
|
||||
{ "tls_certificate_file", required_argument, NULL, ARG_TLS_CERTIFICATE_FILE },
|
||||
{ "tls_key_file", required_argument, NULL, ARG_TLS_KEY_FILE },
|
||||
{ "tls_ca_file", required_argument, NULL, ARG_TLS_CA_FILE },
|
||||
{ "authorization_token_file", required_argument, NULL, ARG_AUTHORIZATION_TOKEN_FILE },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
/* For optional arguments, optarg is only set when the argument is passed as "--option=[ARGUMENT]" but not as
|
||||
"--option [ARGUMENT]". This function sets optarg in the latter case. See
|
||||
https://cfengine.com/blog/2021/optional-arguments-with-getopt-long/ for a more detailed explanation */
|
||||
#define SET_OPT_ARG_IF_PRESENT() \
|
||||
{ \
|
||||
if (optarg == NULL && optind < argc && argv[optind][0] != '-') { \
|
||||
optarg = argv[optind++]; \
|
||||
} \
|
||||
}
|
||||
|
||||
idx = 0;
|
||||
c = getopt_long(argc, argv, short_options, long_options, &idx);
|
||||
if (c < 0) {
|
||||
|
@ -1482,9 +1577,8 @@ int parseArguments(int argc, char* argv[], Arguments& args) {
|
|||
args.disable_ryw = 1;
|
||||
break;
|
||||
case ARG_JSON_REPORT:
|
||||
if (optarg == NULL && (argv[optind] == NULL || (argv[optind] != NULL && argv[optind][0] == '-'))) {
|
||||
// if --report_json is the last option and no file is specified
|
||||
// or --report_json is followed by another option
|
||||
SET_OPT_ARG_IF_PRESENT();
|
||||
if (!optarg) {
|
||||
char default_file[] = "mako.json";
|
||||
strncpy(args.json_output_path, default_file, sizeof(default_file));
|
||||
} else {
|
||||
|
@ -1495,13 +1589,12 @@ int parseArguments(int argc, char* argv[], Arguments& args) {
|
|||
args.bg_materialize_files = true;
|
||||
strncpy(args.bg_file_path, optarg, std::min(sizeof(args.bg_file_path), strlen(optarg) + 1));
|
||||
case ARG_EXPORT_PATH:
|
||||
if (optarg == NULL && (argv[optind] == NULL || (argv[optind] != NULL && argv[optind][0] == '-'))) {
|
||||
SET_OPT_ARG_IF_PRESENT();
|
||||
if (!optarg) {
|
||||
char default_file[] = "sketch_data.json";
|
||||
strncpy(args.stats_export_path, default_file, sizeof(default_file));
|
||||
} else {
|
||||
strncpy(args.stats_export_path,
|
||||
argv[optind],
|
||||
std::min(sizeof(args.stats_export_path), strlen(argv[optind]) + 1));
|
||||
strncpy(args.stats_export_path, optarg, std::min(sizeof(args.stats_export_path), strlen(optarg) + 1));
|
||||
}
|
||||
break;
|
||||
case ARG_DISTRIBUTED_TRACER_CLIENT:
|
||||
|
@ -1515,6 +1608,45 @@ int parseArguments(int argc, char* argv[], Arguments& args) {
|
|||
args.distributed_tracer_client = -1;
|
||||
}
|
||||
break;
|
||||
case ARG_TLS_CERTIFICATE_FILE:
|
||||
args.tls_certificate_file = std::string(optarg);
|
||||
break;
|
||||
case ARG_TLS_KEY_FILE:
|
||||
args.tls_key_file = std::string(optarg);
|
||||
break;
|
||||
case ARG_TLS_CA_FILE:
|
||||
args.tls_ca_file = std::string(optarg);
|
||||
break;
|
||||
case ARG_AUTHORIZATION_TOKEN_FILE: {
|
||||
std::string tokenFilename(optarg);
|
||||
std::ifstream ifs(tokenFilename);
|
||||
std::ostringstream oss;
|
||||
oss << ifs.rdbuf();
|
||||
rapidjson::Document d;
|
||||
d.Parse(oss.str().c_str());
|
||||
if (d.HasParseError()) {
|
||||
logr.error("Failed to parse authorization token JSON file '{}': {} at offset {}",
|
||||
tokenFilename,
|
||||
GetParseError_En(d.GetParseError()),
|
||||
d.GetErrorOffset());
|
||||
return -1;
|
||||
} else if (!d.IsObject()) {
|
||||
logr.error("Authorization token JSON file '{}' must contain a JSON object", tokenFilename);
|
||||
return -1;
|
||||
}
|
||||
for (auto itr = d.MemberBegin(); itr != d.MemberEnd(); ++itr) {
|
||||
if (!itr->value.IsString()) {
|
||||
logr.error("Token '{}' is not a string", itr->name.GetString());
|
||||
return -1;
|
||||
}
|
||||
args.authorization_tokens.insert_or_assign(
|
||||
std::string(itr->name.GetString(), itr->name.GetStringLength()),
|
||||
std::string(itr->value.GetString(), itr->value.GetStringLength()));
|
||||
}
|
||||
logr.info("Added {} tenant authorization tokens to map from file '{}'",
|
||||
args.authorization_tokens.size(),
|
||||
tokenFilename);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1525,93 +1657,118 @@ int parseArguments(int argc, char* argv[], Arguments& args) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int validateArguments(Arguments const& args) {
|
||||
if (args.mode == MODE_INVALID) {
|
||||
int Arguments::validate() {
|
||||
if (mode == MODE_INVALID) {
|
||||
logr.error("--mode has to be set");
|
||||
return -1;
|
||||
}
|
||||
if (args.verbose < VERBOSE_NONE || args.verbose > VERBOSE_DEBUG) {
|
||||
if (verbose < VERBOSE_NONE || verbose > VERBOSE_DEBUG) {
|
||||
logr.error("--verbose must be between 0 and 3");
|
||||
return -1;
|
||||
}
|
||||
if (args.rows <= 0) {
|
||||
if (rows <= 0) {
|
||||
logr.error("--rows must be a positive integer");
|
||||
return -1;
|
||||
}
|
||||
if (args.load_factor <= 0 || args.load_factor > 1) {
|
||||
if (load_factor <= 0 || load_factor > 1) {
|
||||
logr.error("--load_factor must be in range (0, 1]");
|
||||
return -1;
|
||||
}
|
||||
if (args.key_length < 0) {
|
||||
if (key_length < 0) {
|
||||
logr.error("--keylen must be a positive integer");
|
||||
return -1;
|
||||
}
|
||||
if (args.value_length < 0) {
|
||||
if (value_length < 0) {
|
||||
logr.error("--vallen must be a positive integer");
|
||||
return -1;
|
||||
}
|
||||
if (args.num_fdb_clusters > NUM_CLUSTERS_MAX) {
|
||||
if (num_fdb_clusters > NUM_CLUSTERS_MAX) {
|
||||
logr.error("Mako is not supported to do work to more than {} clusters", NUM_CLUSTERS_MAX);
|
||||
return -1;
|
||||
}
|
||||
if (args.num_databases > NUM_DATABASES_MAX) {
|
||||
if (num_databases > NUM_DATABASES_MAX) {
|
||||
logr.error("Mako is not supported to do work to more than {} databases", NUM_DATABASES_MAX);
|
||||
return -1;
|
||||
}
|
||||
if (args.num_databases < args.num_fdb_clusters) {
|
||||
logr.error("--num_databases ({}) must be >= number of clusters({})", args.num_databases, args.num_fdb_clusters);
|
||||
if (num_databases < num_fdb_clusters) {
|
||||
logr.error("--num_databases ({}) must be >= number of clusters({})", num_databases, num_fdb_clusters);
|
||||
return -1;
|
||||
}
|
||||
if (args.num_threads < args.num_databases) {
|
||||
logr.error("--threads ({}) must be >= number of databases ({})", args.num_threads, args.num_databases);
|
||||
if (num_threads < num_databases) {
|
||||
logr.error("--threads ({}) must be >= number of databases ({})", num_threads, num_databases);
|
||||
return -1;
|
||||
}
|
||||
if (args.key_length < 4 /* "mako" */ + args.row_digits) {
|
||||
if (key_length < 4 /* "mako" */ + row_digits) {
|
||||
logr.error("--keylen must be larger than {} to store \"mako\" prefix "
|
||||
"and maximum row number",
|
||||
4 + args.row_digits);
|
||||
4 + row_digits);
|
||||
return -1;
|
||||
}
|
||||
if (args.active_tenants > args.total_tenants) {
|
||||
if (active_tenants > total_tenants) {
|
||||
logr.error("--active_tenants must be less than or equal to --total_tenants");
|
||||
return -1;
|
||||
}
|
||||
if (args.tenant_batch_size < 1) {
|
||||
if (tenant_batch_size < 1) {
|
||||
logr.error("--tenant_batch_size must be at least 1");
|
||||
return -1;
|
||||
}
|
||||
if (args.mode == MODE_RUN) {
|
||||
if ((args.seconds > 0) && (args.iteration > 0)) {
|
||||
if (mode == MODE_RUN) {
|
||||
if ((seconds > 0) && (iteration > 0)) {
|
||||
logr.error("Cannot specify seconds and iteration together");
|
||||
return -1;
|
||||
}
|
||||
if ((args.seconds == 0) && (args.iteration == 0)) {
|
||||
if ((seconds == 0) && (iteration == 0)) {
|
||||
logr.error("Must specify either seconds or iteration");
|
||||
return -1;
|
||||
}
|
||||
if (args.txntagging < 0) {
|
||||
if (txntagging < 0) {
|
||||
logr.error("--txntagging must be a non-negative integer");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// ensure that all of the files provided to mako are valid and exist
|
||||
if (args.mode == MODE_REPORT) {
|
||||
if (!args.num_report_files) {
|
||||
logr.error("No files to merge");
|
||||
}
|
||||
for (int i = 0; i < args.num_report_files; i++) {
|
||||
struct stat buffer;
|
||||
if (stat(args.report_files[i], &buffer) != 0) {
|
||||
logr.error("Couldn't open file {}", args.report_files[i]);
|
||||
if (iteration > 0) {
|
||||
if (async_xacts > 0 && async_xacts * num_processes > iteration) {
|
||||
logr.error("--async_xacts * --num_processes must be <= --iteration");
|
||||
return -1;
|
||||
} else if (async_xacts == 0 && num_threads * num_processes > iteration) {
|
||||
logr.error("--num_threads * --num_processes must be <= --iteration");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (args.distributed_tracer_client < 0) {
|
||||
logr.error("--disibuted_tracer_client must specify either (disabled, network_lossy, log_file)");
|
||||
|
||||
if (mode == MODE_RUN || mode == MODE_BUILD) {
|
||||
if (tpsmax > 0) {
|
||||
if (async_xacts > 0) {
|
||||
logr.error("--tpsmax|--tps must be 0 or unspecified because throttling is not supported in async mode");
|
||||
return -1;
|
||||
} else if (async_xacts == 0 && num_threads * num_processes > tpsmax) {
|
||||
logr.error("--num_threads * --num_processes must be <= --tpsmax|--tps");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ensure that all of the files provided to mako are valid and exist
|
||||
if (mode == MODE_REPORT) {
|
||||
if (!num_report_files) {
|
||||
logr.error("No files to merge");
|
||||
}
|
||||
for (int i = 0; i < num_report_files; i++) {
|
||||
struct stat buffer;
|
||||
if (stat(report_files[i], &buffer) != 0) {
|
||||
logr.error("Couldn't open file {}", report_files[i]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (distributed_tracer_client < 0) {
|
||||
logr.error("--distributed_tracer_client must specify either (disabled, network_lossy, log_file)");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!authorization_tokens.empty() && !tls_ca_file.has_value()) {
|
||||
logr.warn("Authorization tokens are being used without explicit TLS CA file configured");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2262,11 +2419,6 @@ int main(int argc, char* argv[]) {
|
|||
|
||||
auto rc = int{};
|
||||
auto args = Arguments{};
|
||||
rc = initArguments(args);
|
||||
if (rc < 0) {
|
||||
logr.error("initArguments failed");
|
||||
return -1;
|
||||
}
|
||||
rc = parseArguments(argc, argv, args);
|
||||
if (rc < 0) {
|
||||
/* usage printed */
|
||||
|
@ -2282,7 +2434,12 @@ int main(int argc, char* argv[]) {
|
|||
args.total_tenants = args.active_tenants;
|
||||
}
|
||||
|
||||
rc = validateArguments(args);
|
||||
// set --seconds in case no ending condition has been set
|
||||
if (args.seconds == 0 && args.iteration == 0) {
|
||||
args.seconds = 30; // default value accodring to documentation
|
||||
}
|
||||
|
||||
rc = args.validate();
|
||||
if (rc < 0)
|
||||
return -1;
|
||||
logr.setVerbosity(args.verbose);
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#define MAKO_HPP
|
||||
|
||||
#ifndef FDB_API_VERSION
|
||||
#define FDB_API_VERSION 720
|
||||
#define FDB_API_VERSION 730
|
||||
#endif
|
||||
|
||||
#include <array>
|
||||
|
@ -30,6 +30,7 @@
|
|||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string_view>
|
||||
#include <fdb_api.hpp>
|
||||
|
@ -79,7 +80,11 @@ enum ArgKind {
|
|||
ARG_JSON_REPORT,
|
||||
ARG_BG_FILE_PATH, // if blob granule files are stored locally, mako will read and materialize them if this is set
|
||||
ARG_EXPORT_PATH,
|
||||
ARG_DISTRIBUTED_TRACER_CLIENT
|
||||
ARG_DISTRIBUTED_TRACER_CLIENT,
|
||||
ARG_TLS_CERTIFICATE_FILE,
|
||||
ARG_TLS_KEY_FILE,
|
||||
ARG_TLS_CA_FILE,
|
||||
ARG_AUTHORIZATION_TOKEN_FILE,
|
||||
};
|
||||
|
||||
constexpr const int OP_COUNT = 0;
|
||||
|
@ -131,6 +136,9 @@ constexpr const int MAX_REPORT_FILES = 200;
|
|||
|
||||
/* benchmark parameters */
|
||||
struct Arguments {
|
||||
Arguments();
|
||||
int validate();
|
||||
|
||||
int api_version;
|
||||
int json;
|
||||
int num_processes;
|
||||
|
@ -180,6 +188,10 @@ struct Arguments {
|
|||
char report_files[MAX_REPORT_FILES][PATH_MAX];
|
||||
int num_report_files;
|
||||
int distributed_tracer_client;
|
||||
std::optional<std::string> tls_certificate_file;
|
||||
std::optional<std::string> tls_key_file;
|
||||
std::optional<std::string> tls_ca_file;
|
||||
std::map<std::string, std::string> authorization_tokens; // maps tenant name to token string
|
||||
};
|
||||
|
||||
} // namespace mako
|
||||
|
|
|
@ -38,7 +38,7 @@ Arguments
|
|||
| - ``build``: Populate data
|
||||
| - ``run``: Run the benchmark
|
||||
|
||||
- | ``-c | --cluster <cluster file>``
|
||||
- | ``-c | --cluster <cluster_file>``
|
||||
| FDB cluster files (Required, comma-separated)
|
||||
|
||||
- | ``-d | --num_databases <num_databases>``
|
||||
|
@ -125,9 +125,21 @@ Arguments
|
|||
| Disable snapshot read-your-writes
|
||||
|
||||
- | ``--json_report`` defaults to ``mako.json``
|
||||
| ``--json_report=PATH``
|
||||
| ``--json_report <path>``
|
||||
| Output stats to the specified json file
|
||||
|
||||
- | ``--tls_certificate_file <path>``
|
||||
| Use TLS certificate located in ``<path>``
|
||||
|
||||
- | ``--tls_key_file <path>``
|
||||
| Use TLS key file located in ``<path>``
|
||||
|
||||
- | ``--tls_ca_file <path>``
|
||||
| Use TLS CA file located in ``<path>``
|
||||
|
||||
- | ``--authorization_token_file <path>``
|
||||
| Use authorization token JSON file located in ``<path>``
|
||||
| Expected content is a JSON object where each key is a tenant name and the mapped value is a token string
|
||||
|
||||
Transaction Specification
|
||||
=========================
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include <inttypes.h>
|
||||
|
||||
#ifndef FDB_API_VERSION
|
||||
#define FDB_API_VERSION 720
|
||||
#define FDB_API_VERSION 730
|
||||
#endif
|
||||
|
||||
#include <foundationdb/fdb_c.h>
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
// Unit tests that test the timeouts for a disconnected cluster
|
||||
|
||||
#define FDB_API_VERSION 720
|
||||
#define FDB_API_VERSION 730
|
||||
#include <foundationdb/fdb_c.h>
|
||||
|
||||
#include <chrono>
|
||||
|
|
|
@ -231,6 +231,10 @@ Int64Future Transaction::get_approximate_size() {
|
|||
return Int64Future(fdb_transaction_get_approximate_size(tr_));
|
||||
}
|
||||
|
||||
Int64Future Transaction::get_total_cost() {
|
||||
return Int64Future(fdb_transaction_get_total_cost(tr_));
|
||||
}
|
||||
|
||||
KeyFuture Transaction::get_versionstamp() {
|
||||
return KeyFuture(fdb_transaction_get_versionstamp(tr_));
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#define FDB_API_VERSION 720
|
||||
#define FDB_API_VERSION 730
|
||||
#include <foundationdb/fdb_c.h>
|
||||
|
||||
#include <string>
|
||||
|
@ -276,6 +276,9 @@ public:
|
|||
// Returns a future which will be set to the approximate transaction size so far.
|
||||
Int64Future get_approximate_size();
|
||||
|
||||
// Returns a future which will be set tot the transaction's total cost so far.
|
||||
Int64Future get_total_cost();
|
||||
|
||||
// Returns a future which will be set to the versionstamp which was used by
|
||||
// any versionstamp operations in the transaction.
|
||||
KeyFuture get_versionstamp();
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
// Unit tests for API setup, network initialization functions from the FDB C API.
|
||||
|
||||
#define FDB_API_VERSION 720
|
||||
#define FDB_API_VERSION 730
|
||||
#include <foundationdb/fdb_c.h>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
// Unit tests for the FoundationDB C API.
|
||||
|
||||
#include "fdb_c_options.g.h"
|
||||
#define FDB_API_VERSION 720
|
||||
#define FDB_API_VERSION 730
|
||||
#include <foundationdb/fdb_c.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
@ -1945,6 +1945,30 @@ TEST_CASE("fdb_transaction_get_committed_version") {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE("fdb_transaction_get_total_cost") {
|
||||
fdb::Transaction tr(db);
|
||||
while (1) {
|
||||
fdb::ValueFuture f1 = tr.get("foo", /*snapshot*/ false);
|
||||
fdb_error_t err = wait_future(f1);
|
||||
if (err) {
|
||||
fdb::EmptyFuture fOnError = tr.on_error(err);
|
||||
fdb_check(wait_future(fOnError));
|
||||
continue;
|
||||
}
|
||||
fdb::Int64Future f2 = tr.get_total_cost();
|
||||
err = wait_future(f2);
|
||||
if (err) {
|
||||
fdb::EmptyFuture fOnError = tr.on_error(err);
|
||||
fdb_check(wait_future(fOnError));
|
||||
continue;
|
||||
}
|
||||
int64_t cost;
|
||||
fdb_check(f2.get(&cost));
|
||||
CHECK(cost > 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("fdb_transaction_get_approximate_size") {
|
||||
fdb::Transaction tr(db);
|
||||
while (1) {
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define FDB_API_VERSION 720
|
||||
#define FDB_API_VERSION 730
|
||||
#include "foundationdb/fdb_c.h"
|
||||
#undef DLLEXPORT
|
||||
#include "workloads.h"
|
||||
|
|
|
@ -251,6 +251,11 @@ func (o NetworkOptions) SetFutureVersionClientLibrary(param string) error {
|
|||
return o.setOpt(66, []byte(param))
|
||||
}
|
||||
|
||||
// Retain temporary external client library copies that are created for enabling multi-threading.
|
||||
func (o NetworkOptions) SetRetainClientLibraryCopies() error {
|
||||
return o.setOpt(67, nil)
|
||||
}
|
||||
|
||||
// Disables logging of client statistics, such as sampled transaction activity.
|
||||
func (o NetworkOptions) SetDisableClientStatisticsLogging() error {
|
||||
return o.setOpt(70, nil)
|
||||
|
|
|
@ -137,7 +137,7 @@ endif()
|
|||
if(NOT BUILD_GO_BINDING OR NOT BUILD_C_BINDING)
|
||||
set(WITH_GO_BINDING OFF)
|
||||
else()
|
||||
find_program(GO_EXECUTABLE go)
|
||||
find_program(GO_EXECUTABLE go HINTS /usr/local/go/bin/)
|
||||
# building the go binaries is currently not supported on Windows
|
||||
if(GO_EXECUTABLE AND NOT WIN32 AND WITH_C_BINDING)
|
||||
set(WITH_GO_BINDING ON)
|
||||
|
|
|
@ -76,38 +76,11 @@ function(generate_coverage_xml)
|
|||
add_dependencies(coverage_${target_name} coveragetool)
|
||||
endfunction()
|
||||
|
||||
# This function asserts that `versions.h` does not exist in the source
|
||||
# directory. It does this in the prebuild phase of the target.
|
||||
# This is an ugly hack that should make sure that cmake isn't used with
|
||||
# a source directory in which FDB was previously built with `make`.
|
||||
function(assert_no_version_h target)
|
||||
|
||||
message(STATUS "Check versions.h on ${target}")
|
||||
set(target_name "${target}_versions_h_check")
|
||||
|
||||
if (DEFINED ENV{VERBOSE})
|
||||
add_custom_target("${target_name}"
|
||||
COMMAND "${CMAKE_COMMAND}" -DFILE="${CMAKE_SOURCE_DIR}/versions.h"
|
||||
-P "${CMAKE_SOURCE_DIR}/cmake/AssertFileDoesntExist.cmake"
|
||||
COMMAND echo
|
||||
"${CMAKE_COMMAND}" -P "${CMAKE_SOURCE_DIR}/cmake/AssertFileDoesntExist.cmake"
|
||||
-DFILE="${CMAKE_SOURCE_DIR}/versions.h"
|
||||
COMMENT "Check old build system wasn't used in source dir")
|
||||
else()
|
||||
add_custom_target("${target_name}"
|
||||
COMMAND "${CMAKE_COMMAND}" -DFILE="${CMAKE_SOURCE_DIR}/versions.h"
|
||||
-P "${CMAKE_SOURCE_DIR}/cmake/AssertFileDoesntExist.cmake"
|
||||
COMMENT "Check old build system wasn't used in source dir")
|
||||
endif()
|
||||
|
||||
add_dependencies(${target} ${target_name})
|
||||
endfunction()
|
||||
|
||||
add_custom_target(strip_targets)
|
||||
add_dependencies(packages strip_targets)
|
||||
|
||||
function(strip_debug_symbols target)
|
||||
if (WIN32)
|
||||
if(WIN32)
|
||||
return()
|
||||
endif()
|
||||
get_target_property(target_type ${target} TYPE)
|
||||
|
@ -146,7 +119,7 @@ function(strip_debug_symbols target)
|
|||
COMMAND objcopy --verbose --only-keep-debug $<TARGET_FILE:${target}> "${out_file}.debug"
|
||||
COMMAND objcopy --verbose --add-gnu-debuglink="${out_file}.debug" "${out_file}"
|
||||
COMMENT "Copy debug symbols to ${out_name}.debug")
|
||||
add_custom_target(strip_${target} DEPENDS "${out_file}.debug")
|
||||
add_custom_target(strip_${target} DEPENDS "${out_file}.debug")
|
||||
else()
|
||||
add_custom_target(strip_${target})
|
||||
add_dependencies(strip_${target} strip_only_${target})
|
||||
|
@ -171,7 +144,7 @@ function(copy_headers)
|
|||
foreach(f IN LISTS CP_SRCS)
|
||||
is_prefix(bd "${CMAKE_CURRENT_BINARY_DIR}" "${f}")
|
||||
is_prefix(sd "${CMAKE_CURRENT_SOURCE_DIR}" "${f}")
|
||||
if (bd OR sd)
|
||||
if(bd OR sd)
|
||||
continue()
|
||||
endif()
|
||||
is_header(hdr "${f}")
|
||||
|
@ -180,7 +153,7 @@ function(copy_headers)
|
|||
endif()
|
||||
get_filename_component(fname ${f} NAME)
|
||||
get_filename_component(dname ${f} DIRECTORY)
|
||||
if (dname)
|
||||
if(dname)
|
||||
make_directory(${incl_dir}/${dname})
|
||||
endif()
|
||||
set(fpath "${incl_dir}/${dname}/${fname}")
|
||||
|
@ -309,9 +282,6 @@ function(add_flow_target)
|
|||
|
||||
add_custom_target(${AFT_NAME}_actors DEPENDS ${generated_files})
|
||||
add_dependencies(${AFT_NAME} ${AFT_NAME}_actors)
|
||||
if(NOT WIN32)
|
||||
assert_no_version_h(${AFT_NAME}_actors)
|
||||
endif()
|
||||
generate_coverage_xml(${AFT_NAME})
|
||||
if(strip_target)
|
||||
strip_debug_symbols(${AFT_NAME})
|
||||
|
|
|
@ -8,40 +8,43 @@ endif()
|
|||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(awssdk_project
|
||||
GIT_REPOSITORY https://github.com/aws/aws-sdk-cpp.git
|
||||
GIT_TAG e4b4b310d8631bc7e9a797b6ac03a73c6f210bf6 # v1.9.331
|
||||
SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/awssdk-src"
|
||||
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/awssdk-build"
|
||||
GIT_CONFIG advice.detachedHead=false
|
||||
CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF # SDK builds shared libs by default, we want static libs
|
||||
-DENABLE_TESTING=OFF
|
||||
-DBUILD_ONLY=core # git repo contains SDK for every AWS product, we only want the core auth libraries
|
||||
-DSIMPLE_INSTALL=ON
|
||||
-DCMAKE_INSTALL_PREFIX=install # need to specify an install prefix so it doesn't install in /usr/lib - FIXME: use absolute path
|
||||
-DBYO_CRYPTO=ON # we have our own crypto libraries that conflict if we let aws sdk build and link its own
|
||||
-DBUILD_CURL=ON
|
||||
-DBUILD_ZLIB=ON
|
||||
|
||||
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||
-DCMAKE_CXX_FLAGS=${AWSSDK_COMPILER_FLAGS}
|
||||
TEST_COMMAND ""
|
||||
GIT_REPOSITORY https://github.com/aws/aws-sdk-cpp.git
|
||||
GIT_TAG e4b4b310d8631bc7e9a797b6ac03a73c6f210bf6 # v1.9.331
|
||||
SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/awssdk-src"
|
||||
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/awssdk-build"
|
||||
GIT_CONFIG advice.detachedHead=false
|
||||
# it seems advice.detachedHead breaks something which causes aws sdk to always be rebuilt.
|
||||
# This option forces to cmake to build the aws sdk only once and never attempt to update it
|
||||
UPDATE_DISCONNECTED ON
|
||||
CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF # SDK builds shared libs by default, we want static libs
|
||||
-DENABLE_TESTING=OFF
|
||||
-DBUILD_ONLY=core # git repo contains SDK for every AWS product, we only want the core auth libraries
|
||||
-DSIMPLE_INSTALL=ON
|
||||
-DCMAKE_INSTALL_PREFIX=install # need to specify an install prefix so it doesn't install in /usr/lib - FIXME: use absolute path
|
||||
-DBYO_CRYPTO=ON # we have our own crypto libraries that conflict if we let aws sdk build and link its own
|
||||
-DBUILD_CURL=ON
|
||||
-DBUILD_ZLIB=ON
|
||||
|
||||
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||
-DCMAKE_CXX_FLAGS=${AWSSDK_COMPILER_FLAGS}
|
||||
TEST_COMMAND ""
|
||||
# the sdk build produces a ton of artifacts, with their own dependency tree, so there is a very specific dependency order they must be linked in
|
||||
BUILD_BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-cpp-sdk-core.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-crt-cpp.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-s3.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-auth.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-event-stream.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-http.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-mqtt.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-sdkutils.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-io.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-checksums.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-compression.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-cal.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-common.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/external-install/curl/lib/libcurl.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/external-install/zlib/lib/libz.a"
|
||||
)
|
||||
BUILD_BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-cpp-sdk-core.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-crt-cpp.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-s3.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-auth.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-event-stream.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-http.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-mqtt.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-sdkutils.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-io.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-checksums.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-compression.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-cal.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/lib64/libaws-c-common.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/external-install/curl/lib/libcurl.a"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/awssdk-build/install/external-install/zlib/lib/libz.a"
|
||||
)
|
||||
|
||||
add_library(awssdk_core STATIC IMPORTED)
|
||||
add_dependencies(awssdk_core awssdk_project)
|
||||
|
|
|
@ -303,7 +303,6 @@ class TestRun:
|
|||
self.stats: str | None = stats
|
||||
self.expected_unseed: int | None = expected_unseed
|
||||
self.use_valgrind: bool = config.use_valgrind
|
||||
self.long_running: bool = config.long_running
|
||||
self.old_binary_path: Path = config.old_binaries_path
|
||||
self.buggify_enabled: bool = buggify_enabled
|
||||
self.fault_injection_enabled: bool = True
|
||||
|
@ -315,7 +314,7 @@ class TestRun:
|
|||
# state for the run
|
||||
self.retryable_error: bool = False
|
||||
self.summary: Summary = Summary(binary, uid=self.uid, stats=self.stats, expected_unseed=self.expected_unseed,
|
||||
will_restart=will_restart)
|
||||
will_restart=will_restart, long_running=config.long_running)
|
||||
self.run_time: int = 0
|
||||
self.success = self.run()
|
||||
|
||||
|
@ -367,6 +366,11 @@ class TestRun:
|
|||
command += ['-b', 'on']
|
||||
if config.crash_on_error:
|
||||
command.append('--crash')
|
||||
if config.long_running:
|
||||
# disable simulation speedup
|
||||
command += ['--knob-sim-speedup-after-seconds=36000']
|
||||
# disable traceTooManyLines Error MAX_TRACE_LINES
|
||||
command += ['--knob-max-trace-lines=1000000000']
|
||||
|
||||
self.temp_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
@ -376,7 +380,8 @@ class TestRun:
|
|||
process = subprocess.Popen(command, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, cwd=self.temp_path,
|
||||
text=True, env=env)
|
||||
did_kill = False
|
||||
timeout = 20 * config.kill_seconds if self.use_valgrind or self.long_running else config.kill_seconds
|
||||
# No timeout for long running tests
|
||||
timeout = 20 * config.kill_seconds if self.use_valgrind else (None if config.long_running else config.kill_seconds)
|
||||
err_out: str
|
||||
try:
|
||||
_, err_out = process.communicate(timeout=timeout)
|
||||
|
|
|
@ -291,11 +291,12 @@ class Summary:
|
|||
def __init__(self, binary: Path, runtime: float = 0, max_rss: int | None = None,
|
||||
was_killed: bool = False, uid: uuid.UUID | None = None, expected_unseed: int | None = None,
|
||||
exit_code: int = 0, valgrind_out_file: Path | None = None, stats: str | None = None,
|
||||
error_out: str = None, will_restart: bool = False):
|
||||
error_out: str = None, will_restart: bool = False, long_running: bool = False):
|
||||
self.binary = binary
|
||||
self.runtime: float = runtime
|
||||
self.max_rss: int | None = max_rss
|
||||
self.was_killed: bool = was_killed
|
||||
self.long_running = long_running
|
||||
self.expected_unseed: int | None = expected_unseed
|
||||
self.exit_code: int = exit_code
|
||||
self.out: SummaryTree = SummaryTree('Test')
|
||||
|
@ -396,6 +397,10 @@ class Summary:
|
|||
if self.was_killed:
|
||||
child = SummaryTree('ExternalTimeout')
|
||||
child.attributes['Severity'] = '40'
|
||||
if self.long_running:
|
||||
# debugging info for long-running tests
|
||||
child.attributes['LongRunning'] = '1'
|
||||
child.attributes['Runtime'] = str(self.runtime)
|
||||
self.out.append(child)
|
||||
self.error = True
|
||||
if self.max_rss is not None:
|
||||
|
|
|
@ -55,6 +55,6 @@ if __name__ == '__main__':
|
|||
summary.summarize_files(files)
|
||||
summary.out.dump(sys.stdout)
|
||||
else:
|
||||
summary = Summary(Path('bin/fdbserver'), was_killed=True)
|
||||
summary = Summary(Path('bin/fdbserver'), was_killed=True, long_running=config.long_running)
|
||||
summary.summarize_files(files)
|
||||
summary.out.dump(sys.stdout)
|
||||
|
|
|
@ -11,16 +11,16 @@ The global tag throttler bases throttling decisions on "quotas" provided by clie
|
|||
The global tag throttler cannot throttle tags to a throughput below the reserved quota, and it cannot allow throughput to exceed the total quota.
|
||||
|
||||
### Cost
|
||||
Internally, the units for these quotas are "page costs", computed as follows. The "page cost" of a read operation is computed as:
|
||||
Internally, the units for these quotas are bytes. The cost of an operation is rounded up to the nearest page size. The cost of a read operation is computed as:
|
||||
|
||||
```
|
||||
readCost = ceiling(bytesRead / CLIENT_KNOBS->READ_COST_BYTE_FACTOR);
|
||||
readCost = ceiling(bytesRead / CLIENT_KNOBS->READ_COST_BYTE_FACTOR) * CLIENT_KNOBS->READ_COST_BYTE_FACTOR;
|
||||
```
|
||||
|
||||
The "page cost" of a write operation is computed as:
|
||||
The cost of a write operation is computed as:
|
||||
|
||||
```
|
||||
writeCost = SERVER_KNOBS->GLOBAL_TAG_THROTTLING_RW_FUNGIBILITY_RATIO * ceiling(bytesWritten / CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR);
|
||||
writeCost = CLIENT_KNOBS->GLOBAL_TAG_THROTTLING_RW_FUNGIBILITY_RATIO * ceiling(bytesWritten / CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR) * CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR;
|
||||
```
|
||||
|
||||
Here `bytesWritten` includes cleared bytes. The size of range clears is estimated at commit time.
|
||||
|
@ -41,12 +41,6 @@ To set the quota through `fdbcli`, run:
|
|||
fdbcli> quota set <tag> [reserved_throughput|total_throughput] <bytes_per_second>
|
||||
```
|
||||
|
||||
Note that the quotas are specified in terms of bytes/second, and internally converted to page costs:
|
||||
|
||||
```
|
||||
page_cost_quota = ceiling(byte_quota / CLIENT_KNOBS->READ_COST_BYTE_FACTOR)
|
||||
```
|
||||
|
||||
To clear a both reserved and total throughput quotas for a tag, run:
|
||||
|
||||
```
|
||||
|
|
|
@ -34,20 +34,25 @@ Commit proxies would combine idempotency IDs for transactions within a batch. Th
|
|||
|
||||
## Value format
|
||||
```
|
||||
${protocol_version}(${n (1 byte)}${idempotency_id (n bytes)}${low_order_byte_of_batch_index})*
|
||||
${protocol_version}${timestamp}(${n (1 byte)}${idempotency_id (n bytes)}${low_order_byte_of_batch_index})*
|
||||
```
|
||||
|
||||
The batch index for each idempotency id can be reconstructed from the high order byte and low order bytes stored in the key and value, respectively. This is necessary for an "unknown_committed" transaction to recover their full version stamp. Batch index is a `short int`, i.e. 2 bytes.
|
||||
|
||||
The timestamp is the unix epoch stored as a little-endian signed 64-bit integer.
|
||||
|
||||
# Cleaning up old idempotency ids
|
||||
|
||||
After learning the result of an attempt to commit a transaction with an
|
||||
idempotency id, the client may inform the cluster that it's no longer interested
|
||||
in that id and the cluster can reclaim the space used to store the idempotency
|
||||
id. The happy-path reply to a CommitTransactionRequest will say which proxy this
|
||||
request should be sent to, and all idempotency ids for a database key will be
|
||||
sent to the same proxy so that it can clear the key once it receives all of
|
||||
them. The first proxy will also periodically clean up the oldest idempotency ids, based on a policy determined by two knobs. One knob will control the minimum lifetime of an idempotency id (i.e. don't delete anything younger than 1 day), and the other will control the target byte size of the idempotency keys (e.g. keep 100 MB of idempotency keys around).
|
||||
id. The commit proxy that committed a batch is responsible for cleaning all
|
||||
idempotency kv pairs from that batch, so clients must tell that specific proxy
|
||||
that they're done with the id. The first proxy will also periodically clean up
|
||||
the oldest idempotency ids, based on a policy determined by two knobs. One knob
|
||||
will control the minimum lifetime of an idempotency id (i.e. don't delete
|
||||
anything younger than 1 day), and the other will control the target byte size of
|
||||
the idempotency keys (e.g. keep 100 MB of idempotency keys around).
|
||||
|
||||
# Commit protocol
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ master_doc = 'index'
|
|||
|
||||
# General information about the project.
|
||||
project = u'FoundationDB'
|
||||
copyright = u'2013-2021 Apple, Inc and the FoundationDB project authors'
|
||||
copyright = u'2013-2022 Apple, Inc and the FoundationDB project authors'
|
||||
|
||||
# Load the version information from 'versions.target'
|
||||
import xml.etree.ElementTree as ET
|
||||
|
|
|
@ -142,6 +142,8 @@ Here is a complete list of valid parameters:
|
|||
|
||||
*multipart_min_part_size* (or *minps*) - Min part size for multipart uploads.
|
||||
|
||||
*enable_read_cache* (or *erc*) - Whether to enable read block cache.
|
||||
|
||||
*read_block_size* (or *rbs*) - Block size in bytes to be used for reads.
|
||||
|
||||
*read_ahead_blocks* (or *rab*) - Number of blocks to read ahead of requested offset.
|
||||
|
|
|
@ -2,6 +2,12 @@
|
|||
Release Notes
|
||||
#############
|
||||
|
||||
6.3.25
|
||||
======
|
||||
* Fixed a transaction log data corruption bug. `(PR #8558) <https://github.com/apple/foundationdb/pull/8558>`_
|
||||
* Fixed a special keyspace ``SpecialKeyRangeAsyncImpl::getRange`` bug. `(PR #6453) <https://github.com/apple/foundationdb/pull/6453>`_
|
||||
* Fixed a special keyspace ``ConflictingKeysImpl::getRange`` bug. `(PR #7724) <https://github.com/apple/foundationdb/pull/7724>`_
|
||||
|
||||
6.3.24
|
||||
======
|
||||
* Fixed a bug where get key location can overload proxies. `(PR #6453) <https://github.com/apple/foundationdb/pull/6453>`_
|
||||
|
|
|
@ -2,6 +2,25 @@
|
|||
Release Notes
|
||||
#############
|
||||
|
||||
7.1.25
|
||||
======
|
||||
* Same as 7.1.24 release with AVX enabled.
|
||||
|
||||
7.1.24
|
||||
======
|
||||
* Released with AVX disabled.
|
||||
* Fixed a transaction log data corruption bug. `(PR #8525) <https://github.com/apple/foundationdb/pull/8525>`_, `(PR #8562) <https://github.com/apple/foundationdb/pull/8562>`_, and `(PR #8647) <https://github.com/apple/foundationdb/pull/8647>`_
|
||||
* Fixed a rare data race in transaction logs when PEEK_BATCHING_EMPTY_MSG is enabled. `(PR #8660) <https://github.com/apple/foundationdb/pull/8660>`_
|
||||
* Fixed a heap-use-after-free bug in cluster controller. `(PR #8683) <https://github.com/apple/foundationdb/pull/8683>`_
|
||||
* Changed consistency check to report all corruptions. `(PR #8571) <https://github.com/apple/foundationdb/pull/8571>`_
|
||||
* Fixed a rare storage server crashing bug after recovery. `(PR #8468) <https://github.com/apple/foundationdb/pull/8468>`_
|
||||
* Added client knob UNLINKONLOAD_FDBCLIB to control deletion of external client libraries. `(PR #8434) <https://github.com/apple/foundationdb/pull/8434>`_
|
||||
* Updated the default peer latency degradation percentile to 0.5. `(PR #8370) <https://github.com/apple/foundationdb/pull/8370>`_
|
||||
* Made exclusion less pessimistic when warning about low space usage. `(PR #8347) <https://github.com/apple/foundationdb/pull/8347>`_
|
||||
* Added storage server readrange and update latency metrics. `(PR #8353) <https://github.com/apple/foundationdb/pull/8353>`_
|
||||
* Increased the default PEER_DEGRADATION_CONNECTION_FAILURE_COUNT value to 5s. `(PR #8336) <https://github.com/apple/foundationdb/pull/8336>`_
|
||||
* Increased RocksDB block cache size. `(PR #8274) <https://github.com/apple/foundationdb/pull/8274>`_
|
||||
|
||||
7.1.23
|
||||
======
|
||||
* Same as 7.1.22 release with AVX enabled.
|
||||
|
|
|
@ -132,6 +132,7 @@ enum {
|
|||
OPT_DELETE_DATA,
|
||||
OPT_MIN_CLEANUP_SECONDS,
|
||||
OPT_USE_PARTITIONED_LOG,
|
||||
OPT_ENCRYPT_FILES,
|
||||
|
||||
// Backup and Restore constants
|
||||
OPT_PROXY,
|
||||
|
@ -275,6 +276,7 @@ CSimpleOpt::SOption g_rgBackupStartOptions[] = {
|
|||
{ OPT_BLOB_CREDENTIALS, "--blob-credentials", SO_REQ_SEP },
|
||||
{ OPT_INCREMENTALONLY, "--incremental", SO_NONE },
|
||||
{ OPT_ENCRYPTION_KEY_FILE, "--encryption-key-file", SO_REQ_SEP },
|
||||
{ OPT_ENCRYPT_FILES, "--encrypt-files", SO_REQ_SEP },
|
||||
TLS_OPTION_FLAGS,
|
||||
SO_END_OF_OPTIONS
|
||||
};
|
||||
|
@ -1112,6 +1114,11 @@ static void printBackupUsage(bool devhelp) {
|
|||
"and ignore the range files.\n");
|
||||
printf(" --encryption-key-file"
|
||||
" The AES-128-GCM key in the provided file is used for encrypting backup files.\n");
|
||||
printf(" --encrypt-files 0/1"
|
||||
" If passed, this argument will allow the user to override the database encryption state to "
|
||||
"either enable (1) or disable (0) encryption at rest with snapshot backups. This option refers to block "
|
||||
"level encryption of snapshot backups while --encryption-key-file (above) refers to file level encryption. "
|
||||
"Generally, these two options should not be used together.\n");
|
||||
printf(TLS_HELP);
|
||||
printf(" -w, --wait Wait for the backup to complete (allowed with `start' and `discontinue').\n");
|
||||
printf(" -z, --no-stop-when-done\n"
|
||||
|
@ -2365,6 +2372,7 @@ ACTOR Future<Void> runRestore(Database db,
|
|||
KeyRef(addPrefix),
|
||||
KeyRef(removePrefix),
|
||||
LockDB::True,
|
||||
UnlockDB::True,
|
||||
onlyApplyMutationLogs,
|
||||
inconsistentSnapshotOnly,
|
||||
beginVersion,
|
||||
|
@ -3384,8 +3392,8 @@ int main(int argc, char* argv[]) {
|
|||
bool dryRun = false;
|
||||
bool restoreSystemKeys = false;
|
||||
bool restoreUserKeys = false;
|
||||
// TODO (Nim): Set this value when we add optional encrypt_files CLI argument to backup agent start
|
||||
bool encryptionEnabled = true;
|
||||
bool encryptSnapshotFilesPresent = false;
|
||||
std::string traceDir = "";
|
||||
std::string traceFormat = "";
|
||||
std::string traceLogGroup;
|
||||
|
@ -3559,6 +3567,25 @@ int main(int argc, char* argv[]) {
|
|||
case OPT_BASEURL:
|
||||
baseUrl = args->OptionArg();
|
||||
break;
|
||||
case OPT_ENCRYPT_FILES: {
|
||||
const char* a = args->OptionArg();
|
||||
int encryptFiles;
|
||||
if (!sscanf(a, "%d", &encryptFiles)) {
|
||||
fprintf(stderr, "ERROR: Could not parse encrypt-files `%s'\n", a);
|
||||
return FDB_EXIT_ERROR;
|
||||
}
|
||||
if (encryptFiles != 0 && encryptFiles != 1) {
|
||||
fprintf(stderr, "ERROR: encrypt-files must be either 0 or 1\n");
|
||||
return FDB_EXIT_ERROR;
|
||||
}
|
||||
encryptSnapshotFilesPresent = true;
|
||||
if (encryptFiles == 0) {
|
||||
encryptionEnabled = false;
|
||||
} else {
|
||||
encryptionEnabled = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OPT_RESTORE_CLUSTERFILE_DEST:
|
||||
restoreClusterFileDest = args->OptionArg();
|
||||
break;
|
||||
|
@ -3792,6 +3819,10 @@ int main(int argc, char* argv[]) {
|
|||
}
|
||||
}
|
||||
|
||||
if (encryptionKeyFile.present() && encryptSnapshotFilesPresent) {
|
||||
fprintf(stderr, "WARNING: Use of --encrypt-files and --encryption-key-file together is discouraged\n");
|
||||
}
|
||||
|
||||
// Process the extra arguments
|
||||
for (int argLoop = 0; argLoop < args->FileCount(); argLoop++) {
|
||||
switch (programExe) {
|
||||
|
|
|
@ -43,9 +43,9 @@ Optional<LimitType> parseLimitType(StringRef token) {
|
|||
}
|
||||
}
|
||||
|
||||
Optional<double> parseLimitValue(StringRef token) {
|
||||
Optional<int64_t> parseLimitValue(StringRef token) {
|
||||
try {
|
||||
return std::stod(token.toString());
|
||||
return std::stol(token.toString());
|
||||
} catch (...) {
|
||||
return {};
|
||||
}
|
||||
|
@ -63,9 +63,9 @@ ACTOR Future<Void> getQuota(Reference<IDatabase> db, TransactionTag tag, LimitTy
|
|||
} else {
|
||||
auto const quota = ThrottleApi::TagQuotaValue::fromValue(v.get());
|
||||
if (limitType == LimitType::TOTAL) {
|
||||
fmt::print("{}\n", quota.totalQuota * CLIENT_KNOBS->READ_COST_BYTE_FACTOR);
|
||||
fmt::print("{}\n", quota.totalQuota);
|
||||
} else if (limitType == LimitType::RESERVED) {
|
||||
fmt::print("{}\n", quota.reservedQuota * CLIENT_KNOBS->READ_COST_BYTE_FACTOR);
|
||||
fmt::print("{}\n", quota.reservedQuota);
|
||||
}
|
||||
}
|
||||
return Void();
|
||||
|
@ -75,7 +75,7 @@ ACTOR Future<Void> getQuota(Reference<IDatabase> db, TransactionTag tag, LimitTy
|
|||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Void> setQuota(Reference<IDatabase> db, TransactionTag tag, LimitType limitType, double value) {
|
||||
ACTOR Future<Void> setQuota(Reference<IDatabase> db, TransactionTag tag, LimitType limitType, int64_t value) {
|
||||
state Reference<ITransaction> tr = db->createTransaction();
|
||||
loop {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
@ -89,9 +89,13 @@ ACTOR Future<Void> setQuota(Reference<IDatabase> db, TransactionTag tag, LimitTy
|
|||
// Internally, costs are stored in terms of pages, but in the API,
|
||||
// costs are specified in terms of bytes
|
||||
if (limitType == LimitType::TOTAL) {
|
||||
quota.totalQuota = (value - 1) / CLIENT_KNOBS->READ_COST_BYTE_FACTOR + 1;
|
||||
// Round up to nearest page size
|
||||
quota.totalQuota =
|
||||
((value - 1) / CLIENT_KNOBS->READ_COST_BYTE_FACTOR + 1) * CLIENT_KNOBS->READ_COST_BYTE_FACTOR;
|
||||
} else if (limitType == LimitType::RESERVED) {
|
||||
quota.reservedQuota = (value - 1) / CLIENT_KNOBS->READ_COST_BYTE_FACTOR + 1;
|
||||
// Round up to nearest page size
|
||||
quota.reservedQuota =
|
||||
((value - 1) / CLIENT_KNOBS->READ_COST_BYTE_FACTOR + 1) * CLIENT_KNOBS->READ_COST_BYTE_FACTOR;
|
||||
}
|
||||
if (!quota.isValid()) {
|
||||
throw invalid_throttle_quota_value();
|
||||
|
|
|
@ -175,11 +175,13 @@ Future<Reference<IAsyncFile>> BackupContainerS3BlobStore::readFile(const std::st
|
|||
if (usesEncryption()) {
|
||||
f = makeReference<AsyncFileEncrypted>(f, AsyncFileEncrypted::Mode::READ_ONLY);
|
||||
}
|
||||
f = makeReference<AsyncFileReadAheadCache>(f,
|
||||
m_bstore->knobs.read_block_size,
|
||||
m_bstore->knobs.read_ahead_blocks,
|
||||
m_bstore->knobs.concurrent_reads_per_file,
|
||||
m_bstore->knobs.read_cache_blocks_per_file);
|
||||
if (m_bstore->knobs.enable_read_cache) {
|
||||
f = makeReference<AsyncFileReadAheadCache>(f,
|
||||
m_bstore->knobs.read_block_size,
|
||||
m_bstore->knobs.read_ahead_blocks,
|
||||
m_bstore->knobs.concurrent_reads_per_file,
|
||||
m_bstore->knobs.read_cache_blocks_per_file);
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
|
|
|
@ -76,6 +76,10 @@ BlobCipherMetrics::BlobCipherMetrics()
|
|||
UID(),
|
||||
FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL,
|
||||
FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SAMPLE_SIZE),
|
||||
getBlobMetadataLatency("GetBlobMetadataLatency",
|
||||
UID(),
|
||||
FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL,
|
||||
FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SAMPLE_SIZE),
|
||||
counterSets({ CounterSet(cc, "TLog"),
|
||||
CounterSet(cc, "KVMemory"),
|
||||
CounterSet(cc, "KVRedwood"),
|
||||
|
|
|
@ -232,10 +232,10 @@ void validateEncryptionHeaderDetails(const BlobGranuleFileEncryptionKeys& eKeys,
|
|||
.detail("ExpectedHeaderSalt", header.cipherHeaderDetails.salt);
|
||||
throw encrypt_header_metadata_mismatch();
|
||||
}
|
||||
// Validate encryption header 'cipherHeader' details sanity
|
||||
if (!(header.cipherHeaderDetails.baseCipherId == eKeys.headerCipherKey->getBaseCipherId() &&
|
||||
header.cipherHeaderDetails.encryptDomainId == eKeys.headerCipherKey->getDomainId() &&
|
||||
header.cipherHeaderDetails.salt == eKeys.headerCipherKey->getSalt())) {
|
||||
// Validate encryption header 'cipherText' details sanity
|
||||
if (!(header.cipherTextDetails.baseCipherId == eKeys.textCipherKey->getBaseCipherId() &&
|
||||
header.cipherTextDetails.encryptDomainId == eKeys.textCipherKey->getDomainId() &&
|
||||
header.cipherTextDetails.salt == eKeys.textCipherKey->getSalt())) {
|
||||
TraceEvent(SevError, "EncryptionHeader_CipherTextMismatch")
|
||||
.detail("TextDomainId", eKeys.textCipherKey->getDomainId())
|
||||
.detail("ExpectedTextDomainId", header.cipherTextDetails.encryptDomainId)
|
||||
|
@ -650,12 +650,12 @@ struct IndexedBlobGranuleFile {
|
|||
IndexBlobGranuleFileChunkRef chunkRef =
|
||||
IndexBlobGranuleFileChunkRef::fromBytes(cipherKeysCtx, childData, childArena);
|
||||
|
||||
ChildType child;
|
||||
ObjectReader dataReader(chunkRef.chunkBytes.get().begin(), IncludeVersion());
|
||||
dataReader.deserialize(FileIdentifierFor<ChildType>::value, child, childArena);
|
||||
|
||||
// TODO implement some sort of decrypted+decompressed+deserialized cache, if this object gets reused?
|
||||
return Standalone<ChildType>(child, childArena);
|
||||
|
||||
BinaryReader br(chunkRef.chunkBytes.get(), IncludeVersion());
|
||||
Standalone<ChildType> child;
|
||||
br >> child;
|
||||
return child;
|
||||
}
|
||||
|
||||
template <class Ar>
|
||||
|
@ -751,7 +751,7 @@ Value serializeChunkedSnapshot(const Standalone<StringRef>& fileNameRef,
|
|||
|
||||
if (currentChunkBytesEstimate >= targetChunkBytes || i == snapshot.size() - 1) {
|
||||
Value serialized =
|
||||
ObjectWriter::toValue(currentChunk, IncludeVersion(ProtocolVersion::withBlobGranuleFile()));
|
||||
BinaryWriter::toValue(currentChunk, IncludeVersion(ProtocolVersion::withBlobGranuleFile()));
|
||||
Value chunkBytes =
|
||||
IndexBlobGranuleFileChunkRef::toBytes(cipherKeysCtx, compressFilter, serialized, file.arena());
|
||||
chunks.push_back(chunkBytes);
|
||||
|
@ -1020,7 +1020,7 @@ Value serializeChunkedDeltaFile(const Standalone<StringRef>& fileNameRef,
|
|||
|
||||
if (currentChunkBytesEstimate >= chunkSize || i == boundaries.size() - 1) {
|
||||
Value serialized =
|
||||
ObjectWriter::toValue(currentChunk, IncludeVersion(ProtocolVersion::withBlobGranuleFile()));
|
||||
BinaryWriter::toValue(currentChunk, IncludeVersion(ProtocolVersion::withBlobGranuleFile()));
|
||||
Value chunkBytes =
|
||||
IndexBlobGranuleFileChunkRef::toBytes(cipherKeysCtx, compressFilter, serialized, file.arena());
|
||||
chunks.push_back(chunkBytes);
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
* BlobMetadataUtils.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/BlobMetadataUtils.h"
|
||||
|
||||
#include "fmt/format.h"
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/flow.h"
|
||||
#include "fdbclient/Knobs.h"
|
||||
#include "fdbclient/S3BlobStore.h"
|
||||
|
||||
std::string buildPartitionPath(const std::string& url, const std::string& partition) {
|
||||
ASSERT(!partition.empty());
|
||||
ASSERT(partition.front() != '/');
|
||||
ASSERT(partition.back() == '/');
|
||||
StringRef u(url);
|
||||
if (u.startsWith("file://"_sr)) {
|
||||
ASSERT(u.endsWith("/"_sr));
|
||||
return url + partition;
|
||||
} else if (u.startsWith("blobstore://"_sr)) {
|
||||
std::string resource;
|
||||
std::string lastOpenError;
|
||||
S3BlobStoreEndpoint::ParametersT backupParams;
|
||||
|
||||
std::string urlCopy = url;
|
||||
|
||||
Reference<S3BlobStoreEndpoint> bstore =
|
||||
S3BlobStoreEndpoint::fromString(url, {}, &resource, &lastOpenError, &backupParams);
|
||||
|
||||
ASSERT(!resource.empty());
|
||||
ASSERT(resource.back() != '/');
|
||||
size_t resourceStart = url.find(resource);
|
||||
ASSERT(resourceStart != std::string::npos);
|
||||
|
||||
return urlCopy.insert(resourceStart + resource.size(), "/" + partition);
|
||||
} else {
|
||||
// FIXME: support azure
|
||||
throw backup_invalid_url();
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: make this (more) deterministic outside of simulation for FDBPerfKmsConnector
|
||||
Standalone<BlobMetadataDetailsRef> createRandomTestBlobMetadata(const std::string& baseUrl,
|
||||
BlobMetadataDomainId domainId,
|
||||
BlobMetadataDomainName domainName) {
|
||||
Standalone<BlobMetadataDetailsRef> metadata;
|
||||
metadata.domainId = domainId;
|
||||
metadata.arena().dependsOn(domainName.arena());
|
||||
metadata.domainName = domainName;
|
||||
// 0 == no partition, 1 == suffix partitioned, 2 == storage location partitioned
|
||||
int type = deterministicRandom()->randomInt(0, 3);
|
||||
int partitionCount = (type == 0) ? 0 : deterministicRandom()->randomInt(2, 12);
|
||||
TraceEvent ev(SevDebug, "SimBlobMetadata");
|
||||
ev.detail("DomainId", domainId).detail("TypeNum", type).detail("PartitionCount", partitionCount);
|
||||
if (type == 0) {
|
||||
// single storage location
|
||||
std::string partition = std::to_string(domainId) + "/";
|
||||
metadata.base = StringRef(metadata.arena(), buildPartitionPath(baseUrl, partition));
|
||||
ev.detail("Base", metadata.base);
|
||||
}
|
||||
if (type == 1) {
|
||||
// simulate hash prefixing in s3
|
||||
metadata.base = StringRef(metadata.arena(), baseUrl);
|
||||
ev.detail("Base", metadata.base);
|
||||
for (int i = 0; i < partitionCount; i++) {
|
||||
metadata.partitions.push_back_deep(metadata.arena(),
|
||||
deterministicRandom()->randomUniqueID().shortString() + "-" +
|
||||
std::to_string(domainId) + "/");
|
||||
ev.detail("P" + std::to_string(i), metadata.partitions.back());
|
||||
}
|
||||
}
|
||||
if (type == 2) {
|
||||
// simulate separate storage location per partition
|
||||
for (int i = 0; i < partitionCount; i++) {
|
||||
std::string partition = std::to_string(domainId) + "_" + std::to_string(i) + "/";
|
||||
metadata.partitions.push_back_deep(metadata.arena(), buildPartitionPath(baseUrl, partition));
|
||||
ev.detail("P" + std::to_string(i), metadata.partitions.back());
|
||||
}
|
||||
}
|
||||
|
||||
// set random refresh + expire time
|
||||
if (deterministicRandom()->coinflip()) {
|
||||
metadata.refreshAt = now() + deterministicRandom()->random01() * CLIENT_KNOBS->BLOB_METADATA_REFRESH_INTERVAL;
|
||||
metadata.expireAt =
|
||||
metadata.refreshAt + deterministicRandom()->random01() * CLIENT_KNOBS->BLOB_METADATA_REFRESH_INTERVAL;
|
||||
} else {
|
||||
metadata.refreshAt = std::numeric_limits<double>::max();
|
||||
metadata.expireAt = metadata.refreshAt;
|
||||
}
|
||||
|
||||
return metadata;
|
||||
}
|
|
@ -61,7 +61,7 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
init( WRONG_SHARD_SERVER_DELAY, .01 ); if( randomize && BUGGIFY ) WRONG_SHARD_SERVER_DELAY = deterministicRandom()->random01(); // FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test)
|
||||
init( FUTURE_VERSION_RETRY_DELAY, .01 ); if( randomize && BUGGIFY ) FUTURE_VERSION_RETRY_DELAY = deterministicRandom()->random01();// FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY;
|
||||
init( GRV_ERROR_RETRY_DELAY, 5.0 ); if( randomize && BUGGIFY ) GRV_ERROR_RETRY_DELAY = 0.01 + 5 * deterministicRandom()->random01();
|
||||
init( UNKNOWN_TENANT_RETRY_DELAY, 0.0 ); if( randomize && BUGGIFY ) UNKNOWN_TENANT_RETRY_DELAY = deterministicRandom()->random01();
|
||||
init( UNKNOWN_TENANT_RETRY_DELAY, .01 ); if( randomize && BUGGIFY ) UNKNOWN_TENANT_RETRY_DELAY = 0.01 + deterministicRandom()->random01();
|
||||
init( REPLY_BYTE_LIMIT, 80000 );
|
||||
init( DEFAULT_BACKOFF, .01 ); if( randomize && BUGGIFY ) DEFAULT_BACKOFF = deterministicRandom()->random01();
|
||||
init( DEFAULT_MAX_BACKOFF, 1.0 );
|
||||
|
@ -198,7 +198,6 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
init( DEFAULT_AUTO_LOGS, 3 );
|
||||
init( DEFAULT_COMMIT_GRV_PROXIES_RATIO, 3 );
|
||||
init( DEFAULT_MAX_GRV_PROXIES, 4 );
|
||||
init( DELETE_NATIVE_LIB_AFTER_LOADING, true ); // if false, don't delete libfdb_c in tmp directory on client connect.
|
||||
|
||||
init( GLOBAL_CONFIG_REFRESH_BACKOFF, 0.5 );
|
||||
init( GLOBAL_CONFIG_REFRESH_MAX_BACKOFF, 60.0 );
|
||||
|
@ -220,6 +219,7 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
|
||||
init( BLOBSTORE_CONCURRENT_WRITES_PER_FILE, 5 );
|
||||
init( BLOBSTORE_CONCURRENT_READS_PER_FILE, 3 );
|
||||
init( BLOBSTORE_ENABLE_READ_CACHE, true );
|
||||
init( BLOBSTORE_READ_BLOCK_SIZE, 1024 * 1024 );
|
||||
init( BLOBSTORE_READ_AHEAD_BLOCKS, 0 );
|
||||
init( BLOBSTORE_READ_CACHE_BLOCKS_PER_FILE, 2 );
|
||||
|
@ -272,7 +272,7 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
init( TAG_THROTTLE_EXPIRATION_INTERVAL, 60.0 ); if( randomize && BUGGIFY ) TAG_THROTTLE_EXPIRATION_INTERVAL = 1.0;
|
||||
init( WRITE_COST_BYTE_FACTOR, 16384 ); if( randomize && BUGGIFY ) WRITE_COST_BYTE_FACTOR = 4096;
|
||||
init( READ_COST_BYTE_FACTOR, 16384 ); if( randomize && BUGGIFY ) READ_COST_BYTE_FACTOR = 4096;
|
||||
init( PROXY_MAX_TAG_THROTTLE_DURATION, 5.0 ); if( randomize && BUGGIFY ) PROXY_MAX_TAG_THROTTLE_DURATION = 0.5;
|
||||
init( GLOBAL_TAG_THROTTLING_RW_FUNGIBILITY_RATIO, 5.0 );
|
||||
|
||||
// busyness reporting
|
||||
init( BUSYNESS_SPIKE_START_THRESHOLD, 0.100 );
|
||||
|
@ -281,6 +281,7 @@ void ClientKnobs::initialize(Randomize randomize) {
|
|||
// Blob granules
|
||||
init( BG_MAX_GRANULE_PARALLELISM, 10 );
|
||||
init( BG_TOO_MANY_GRANULES, 10000 );
|
||||
init( BLOB_METADATA_REFRESH_INTERVAL, 3600 ); if ( randomize && BUGGIFY ) { BLOB_METADATA_REFRESH_INTERVAL = deterministicRandom()->randomInt(5, 120); }
|
||||
|
||||
init( CHANGE_QUORUM_BAD_STATE_RETRY_TIMES, 3 );
|
||||
init( CHANGE_QUORUM_BAD_STATE_RETRY_DELAY, 2.0 );
|
||||
|
|
|
@ -658,7 +658,7 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) {
|
|||
parse((&type), value);
|
||||
blobGranulesEnabled = (type != 0);
|
||||
} else if (ck == "encryption_at_rest_mode"_sr) {
|
||||
encryptionAtRestMode = EncryptionAtRestMode::fromValue(value);
|
||||
encryptionAtRestMode = EncryptionAtRestMode::fromValueRef(Optional<ValueRef>(value));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -167,6 +167,7 @@ public:
|
|||
KeyBackedProperty<Key> removePrefix() { return configSpace.pack(__FUNCTION__sr); }
|
||||
KeyBackedProperty<bool> onlyApplyMutationLogs() { return configSpace.pack(__FUNCTION__sr); }
|
||||
KeyBackedProperty<bool> inconsistentSnapshotOnly() { return configSpace.pack(__FUNCTION__sr); }
|
||||
KeyBackedProperty<bool> unlockDBAfterRestore() { return configSpace.pack(__FUNCTION__sr); }
|
||||
// XXX: Remove restoreRange() once it is safe to remove. It has been changed to restoreRanges
|
||||
KeyBackedProperty<KeyRange> restoreRange() { return configSpace.pack(__FUNCTION__sr); }
|
||||
KeyBackedProperty<std::vector<KeyRange>> restoreRanges() { return configSpace.pack(__FUNCTION__sr); }
|
||||
|
@ -591,12 +592,11 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
}
|
||||
|
||||
ACTOR static Future<StringRef> decryptImpl(Database cx,
|
||||
StringRef headerS,
|
||||
BlobCipherEncryptHeader header,
|
||||
const uint8_t* dataP,
|
||||
int64_t dataLen,
|
||||
Arena* arena) {
|
||||
Reference<AsyncVar<ClientDBInfo> const> dbInfo = cx->clientInfo;
|
||||
state BlobCipherEncryptHeader header = BlobCipherEncryptHeader::fromStringRef(headerS);
|
||||
TextAndHeaderCipherKeys cipherKeys = wait(getEncryptCipherKeys(dbInfo, header, BlobCipherMetrics::BACKUP));
|
||||
ASSERT(cipherKeys.cipherHeaderKey.isValid() && cipherKeys.cipherTextKey.isValid());
|
||||
validateEncryptionHeader(cipherKeys.cipherHeaderKey, cipherKeys.cipherTextKey, header);
|
||||
|
@ -606,7 +606,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
}
|
||||
|
||||
static Future<StringRef> decrypt(Database cx,
|
||||
StringRef headerS,
|
||||
BlobCipherEncryptHeader headerS,
|
||||
const uint8_t* dataP,
|
||||
int64_t dataLen,
|
||||
Arena* arena) {
|
||||
|
@ -651,7 +651,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
}
|
||||
|
||||
ACTOR static Future<Void> updateEncryptionKeysCtx(EncryptedRangeFileWriter* self, KeyRef key) {
|
||||
state std::pair<int64_t, TenantName> curTenantInfo = wait(getEncryptionDomainDetails(key, self));
|
||||
state std::pair<int64_t, TenantName> curTenantInfo = wait(getEncryptionDomainDetails(key, self->tenantCache));
|
||||
state Reference<AsyncVar<ClientDBInfo> const> dbInfo = self->cx->clientInfo;
|
||||
|
||||
// Get text and header cipher key
|
||||
|
@ -693,12 +693,13 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
|
||||
static bool isSystemKey(KeyRef key) { return key.size() && key[0] == systemKeys.begin[0]; }
|
||||
|
||||
ACTOR static Future<std::pair<int64_t, TenantName>>
|
||||
getEncryptionDomainDetailsImpl(KeyRef key, Reference<TenantEntryCache<Void>> tenantCache, bool useTenantCache) {
|
||||
ACTOR static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetailsImpl(
|
||||
KeyRef key,
|
||||
Reference<TenantEntryCache<Void>> tenantCache) {
|
||||
if (isSystemKey(key)) {
|
||||
return std::make_pair(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, FDB_SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_NAME);
|
||||
}
|
||||
if (key.size() < TENANT_PREFIX_SIZE || !useTenantCache) {
|
||||
if (key.size() < TENANT_PREFIX_SIZE) {
|
||||
return std::make_pair(FDB_DEFAULT_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME);
|
||||
}
|
||||
KeyRef tenantPrefix = KeyRef(key.begin(), TENANT_PREFIX_SIZE);
|
||||
|
@ -710,21 +711,10 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
return std::make_pair(FDB_DEFAULT_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME);
|
||||
}
|
||||
|
||||
static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetails(KeyRef key,
|
||||
EncryptedRangeFileWriter* self) {
|
||||
// If tenants are disabled on a cluster then don't use the TenantEntryCache as it will result in alot of
|
||||
// unnecessary cache misses. For a cluster configured in TenantMode::Optional, the backup performance may
|
||||
// degrade if most of the mutations belong to an invalid tenant
|
||||
TenantMode mode = self->cx->clientInfo->get().tenantMode;
|
||||
bool useTenantCache = mode != TenantMode::DISABLED;
|
||||
if (g_network->isSimulated() && mode == TenantMode::OPTIONAL_TENANT) {
|
||||
// TODO: Currently simulation tests run with optional tenant mode but most data does not belong to any
|
||||
// tenant. This results in many timeouts so disable using the tenant cache until optional tenant mode
|
||||
// support with backups is more performant
|
||||
useTenantCache = false;
|
||||
}
|
||||
CODE_PROBE(useTenantCache, "using tenant cache");
|
||||
return getEncryptionDomainDetailsImpl(key, self->tenantCache, useTenantCache);
|
||||
static Future<std::pair<int64_t, TenantName>> getEncryptionDomainDetails(
|
||||
KeyRef key,
|
||||
Reference<TenantEntryCache<Void>> tenantCache) {
|
||||
return getEncryptionDomainDetailsImpl(key, tenantCache);
|
||||
}
|
||||
|
||||
// Handles the first block and internal blocks. Ends current block if needed.
|
||||
|
@ -816,6 +806,7 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
curKeyTenantInfo.first != FDB_DEFAULT_ENCRYPT_DOMAIN_ID) {
|
||||
endKey = StringRef(k.begin(), TENANT_PREFIX_SIZE);
|
||||
}
|
||||
|
||||
state ValueRef newValue = StringRef();
|
||||
self->lastKey = k;
|
||||
self->lastValue = v;
|
||||
|
@ -834,9 +825,9 @@ struct EncryptedRangeFileWriter : public IRangeFileWriter {
|
|||
if (self->lastKey.size() == 0 || k.size() == 0) {
|
||||
return false;
|
||||
}
|
||||
state std::pair<int64_t, TenantName> curKeyTenantInfo = wait(getEncryptionDomainDetails(k, self));
|
||||
state std::pair<int64_t, TenantName> prevKeyTenantInfo = wait(getEncryptionDomainDetails(self->lastKey, self));
|
||||
// crossing tenant boundaries so finish the current block using only the tenant prefix of the new key
|
||||
state std::pair<int64_t, TenantName> curKeyTenantInfo = wait(getEncryptionDomainDetails(k, self->tenantCache));
|
||||
state std::pair<int64_t, TenantName> prevKeyTenantInfo =
|
||||
wait(getEncryptionDomainDetails(self->lastKey, self->tenantCache));
|
||||
if (curKeyTenantInfo.first != prevKeyTenantInfo.first) {
|
||||
CODE_PROBE(true, "crossed tenant boundaries");
|
||||
wait(handleTenantBondary(self, k, v, writeValue, curKeyTenantInfo));
|
||||
|
@ -1040,11 +1031,18 @@ private:
|
|||
Key lastValue;
|
||||
};
|
||||
|
||||
void decodeKVPairs(StringRefReader* reader, Standalone<VectorRef<KeyValueRef>>* results) {
|
||||
ACTOR static Future<Void> decodeKVPairs(StringRefReader* reader,
|
||||
Standalone<VectorRef<KeyValueRef>>* results,
|
||||
bool encryptedBlock,
|
||||
Optional<Reference<TenantEntryCache<Void>>> tenantCache,
|
||||
Optional<BlobCipherEncryptHeader> encryptHeader) {
|
||||
// Read begin key, if this fails then block was invalid.
|
||||
uint32_t kLen = reader->consumeNetworkUInt32();
|
||||
const uint8_t* k = reader->consume(kLen);
|
||||
state uint32_t kLen = reader->consumeNetworkUInt32();
|
||||
state const uint8_t* k = reader->consume(kLen);
|
||||
results->push_back(results->arena(), KeyValueRef(KeyRef(k, kLen), ValueRef()));
|
||||
state KeyRef prevKey = KeyRef(k, kLen);
|
||||
state bool done = false;
|
||||
state Optional<std::pair<int64_t, TenantName>> prevTenantInfo;
|
||||
|
||||
// Read kv pairs and end key
|
||||
while (1) {
|
||||
|
@ -1052,6 +1050,35 @@ void decodeKVPairs(StringRefReader* reader, Standalone<VectorRef<KeyValueRef>>*
|
|||
kLen = reader->consumeNetworkUInt32();
|
||||
k = reader->consume(kLen);
|
||||
|
||||
// make sure that all keys in a block belong to exactly one tenant,
|
||||
// unless its the last key in which case it can be a truncated (different) tenant prefix
|
||||
if (encryptedBlock && g_network && g_network->isSimulated()) {
|
||||
ASSERT(tenantCache.present());
|
||||
ASSERT(encryptHeader.present());
|
||||
state KeyRef curKey = KeyRef(k, kLen);
|
||||
if (!prevTenantInfo.present()) {
|
||||
std::pair<int64_t, TenantName> tenantInfo =
|
||||
wait(EncryptedRangeFileWriter::getEncryptionDomainDetails(prevKey, tenantCache.get()));
|
||||
prevTenantInfo = tenantInfo;
|
||||
}
|
||||
std::pair<int64_t, TenantName> curTenantInfo =
|
||||
wait(EncryptedRangeFileWriter::getEncryptionDomainDetails(curKey, tenantCache.get()));
|
||||
if (!curKey.empty() && !prevKey.empty() && prevTenantInfo.get().first != curTenantInfo.first) {
|
||||
ASSERT(!done);
|
||||
if (curTenantInfo.first != SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID &&
|
||||
curTenantInfo.first != FDB_DEFAULT_ENCRYPT_DOMAIN_ID) {
|
||||
ASSERT(curKey.size() == TENANT_PREFIX_SIZE);
|
||||
}
|
||||
done = true;
|
||||
}
|
||||
// make sure that all keys (except possibly the last key) in a block are encrypted using the correct key
|
||||
if (!prevKey.empty()) {
|
||||
ASSERT(prevTenantInfo.get().first == encryptHeader.get().cipherTextDetails.encryptDomainId);
|
||||
}
|
||||
prevKey = curKey;
|
||||
prevTenantInfo = curTenantInfo;
|
||||
}
|
||||
|
||||
// If eof reached or first value len byte is 0xFF then a valid block end was reached.
|
||||
if (reader->eof() || *reader->rptr == 0xFF) {
|
||||
results->push_back(results->arena(), KeyValueRef(KeyRef(k, kLen), ValueRef()));
|
||||
|
@ -1072,6 +1099,8 @@ void decodeKVPairs(StringRefReader* reader, Standalone<VectorRef<KeyValueRef>>*
|
|||
for (auto b : reader->remainder())
|
||||
if (b != 0xFF)
|
||||
throw restore_corrupted_data_padding();
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
||||
ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<IAsyncFile> file,
|
||||
|
@ -1094,7 +1123,11 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
|
|||
// BACKUP_AGENT_ENCRYPTED_SNAPSHOT_FILE_VERSION
|
||||
int32_t file_version = reader.consume<int32_t>();
|
||||
if (file_version == BACKUP_AGENT_SNAPSHOT_FILE_VERSION) {
|
||||
decodeKVPairs(&reader, &results);
|
||||
wait(decodeKVPairs(&reader,
|
||||
&results,
|
||||
false,
|
||||
Optional<Reference<TenantEntryCache<Void>>>(),
|
||||
Optional<BlobCipherEncryptHeader>()));
|
||||
} else if (file_version == BACKUP_AGENT_ENCRYPTED_SNAPSHOT_FILE_VERSION) {
|
||||
CODE_PROBE(true, "decoding encrypted block");
|
||||
ASSERT(cx.present());
|
||||
|
@ -1108,7 +1141,8 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
|
|||
|
||||
// read encryption header
|
||||
const uint8_t* headerStart = reader.consume(BlobCipherEncryptHeader::headerSize);
|
||||
StringRef header = StringRef(headerStart, BlobCipherEncryptHeader::headerSize);
|
||||
StringRef headerS = StringRef(headerStart, BlobCipherEncryptHeader::headerSize);
|
||||
state BlobCipherEncryptHeader header = BlobCipherEncryptHeader::fromStringRef(headerS);
|
||||
const uint8_t* dataPayloadStart = headerStart + BlobCipherEncryptHeader::headerSize;
|
||||
// calculate the total bytes read up to (and including) the header
|
||||
int64_t bytesRead = sizeof(int32_t) + sizeof(uint32_t) + optionsLen + BlobCipherEncryptHeader::headerSize;
|
||||
|
@ -1117,7 +1151,12 @@ ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<
|
|||
StringRef decryptedData =
|
||||
wait(EncryptedRangeFileWriter::decrypt(cx.get(), header, dataPayloadStart, dataLen, &results.arena()));
|
||||
reader = StringRefReader(decryptedData, restore_corrupted_data());
|
||||
decodeKVPairs(&reader, &results);
|
||||
state Optional<Reference<TenantEntryCache<Void>>> tenantCache;
|
||||
if (g_network && g_simulator->isSimulated()) {
|
||||
tenantCache = makeReference<TenantEntryCache<Void>>(cx.get(), TenantEntryCacheRefreshMode::WATCH);
|
||||
wait(tenantCache.get()->init());
|
||||
}
|
||||
wait(decodeKVPairs(&reader, &results, true, tenantCache, header));
|
||||
} else {
|
||||
throw restore_unsupported_file_version();
|
||||
}
|
||||
|
@ -1711,7 +1750,7 @@ struct BackupRangeTaskFunc : BackupTaskFuncBase {
|
|||
|
||||
state bool done = false;
|
||||
state int64_t nrKeys = 0;
|
||||
state bool encryptionEnabled = false;
|
||||
state Optional<bool> encryptionEnabled;
|
||||
|
||||
loop {
|
||||
state RangeResultWithVersion values;
|
||||
|
@ -1777,7 +1816,7 @@ struct BackupRangeTaskFunc : BackupTaskFuncBase {
|
|||
|
||||
wait(taskBucket->keepRunning(tr, task) &&
|
||||
storeOrThrow(snapshotBeginVersion, backup.snapshotBeginVersion().get(tr)) &&
|
||||
storeOrThrow(encryptionEnabled, backup.enableSnapshotBackupEncryption().get(tr)) &&
|
||||
store(encryptionEnabled, backup.enableSnapshotBackupEncryption().get(tr)) &&
|
||||
store(snapshotRangeFileCount, backup.snapshotRangeFileCount().getD(tr)));
|
||||
|
||||
break;
|
||||
|
@ -1790,9 +1829,10 @@ struct BackupRangeTaskFunc : BackupTaskFuncBase {
|
|||
wait(bc->writeRangeFile(snapshotBeginVersion, snapshotRangeFileCount, outVersion, blockSize));
|
||||
outFile = f;
|
||||
|
||||
encryptionEnabled = encryptionEnabled && cx->clientInfo->get().isEncryptionEnabled;
|
||||
const bool encrypted =
|
||||
encryptionEnabled.present() && encryptionEnabled.get() && cx->clientInfo->get().isEncryptionEnabled;
|
||||
// Initialize range file writer and write begin key
|
||||
if (encryptionEnabled) {
|
||||
if (encrypted) {
|
||||
CODE_PROBE(true, "using encrypted snapshot file writer");
|
||||
if (!tenantCache.isValid()) {
|
||||
tenantCache = makeReference<TenantEntryCache<Void>>(cx, TenantEntryCacheRefreshMode::WATCH);
|
||||
|
@ -3398,6 +3438,8 @@ struct RestoreCompleteTaskFunc : RestoreTaskFuncBase {
|
|||
|
||||
state RestoreConfig restore(task);
|
||||
restore.stateEnum().set(tr, ERestoreState::COMPLETED);
|
||||
state bool unlockDB = wait(restore.unlockDBAfterRestore().getD(tr, Snapshot::False, true));
|
||||
|
||||
tr->atomicOp(metadataVersionKey, metadataVersionRequiredValue, MutationRef::SetVersionstampedValue);
|
||||
// Clear the file map now since it could be huge.
|
||||
restore.fileSet().clear(tr);
|
||||
|
@ -3413,7 +3455,9 @@ struct RestoreCompleteTaskFunc : RestoreTaskFuncBase {
|
|||
restore.clearApplyMutationsKeys(tr);
|
||||
|
||||
wait(taskBucket->finish(tr, task));
|
||||
wait(unlockDatabase(tr, restore.getUid()));
|
||||
if (unlockDB) {
|
||||
wait(unlockDatabase(tr, restore.getUid()));
|
||||
}
|
||||
|
||||
return Void();
|
||||
}
|
||||
|
@ -5172,6 +5216,7 @@ public:
|
|||
Key addPrefix,
|
||||
Key removePrefix,
|
||||
LockDB lockDB,
|
||||
UnlockDB unlockDB,
|
||||
OnlyApplyMutationLogs onlyApplyMutationLogs,
|
||||
InconsistentSnapshotOnly inconsistentSnapshotOnly,
|
||||
Version beginVersion,
|
||||
|
@ -5245,6 +5290,7 @@ public:
|
|||
restore.onlyApplyMutationLogs().set(tr, onlyApplyMutationLogs);
|
||||
restore.inconsistentSnapshotOnly().set(tr, inconsistentSnapshotOnly);
|
||||
restore.beginVersion().set(tr, beginVersion);
|
||||
restore.unlockDBAfterRestore().set(tr, unlockDB);
|
||||
if (BUGGIFY && restoreRanges.size() == 1) {
|
||||
restore.restoreRange().set(tr, restoreRanges[0]);
|
||||
} else {
|
||||
|
@ -5836,6 +5882,7 @@ public:
|
|||
Key addPrefix,
|
||||
Key removePrefix,
|
||||
LockDB lockDB,
|
||||
UnlockDB unlockDB,
|
||||
OnlyApplyMutationLogs onlyApplyMutationLogs,
|
||||
InconsistentSnapshotOnly inconsistentSnapshotOnly,
|
||||
Version beginVersion,
|
||||
|
@ -5892,6 +5939,7 @@ public:
|
|||
addPrefix,
|
||||
removePrefix,
|
||||
lockDB,
|
||||
unlockDB,
|
||||
onlyApplyMutationLogs,
|
||||
inconsistentSnapshotOnly,
|
||||
beginVersion,
|
||||
|
@ -6017,7 +6065,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
Reference<IBackupContainer> bc = wait(backupConfig.backupContainer().getOrThrow(cx.getReference()));
|
||||
state Reference<IBackupContainer> bc = wait(backupConfig.backupContainer().getOrThrow(cx.getReference()));
|
||||
|
||||
if (fastRestore) {
|
||||
TraceEvent("AtomicParallelRestoreStartRestore").log();
|
||||
|
@ -6043,24 +6091,80 @@ public:
|
|||
return -1;
|
||||
} else {
|
||||
TraceEvent("AS_StartRestore").log();
|
||||
Version ver = wait(restore(backupAgent,
|
||||
cx,
|
||||
cx,
|
||||
tagName,
|
||||
KeyRef(bc->getURL()),
|
||||
bc->getProxy(),
|
||||
ranges,
|
||||
WaitForComplete::True,
|
||||
::invalidVersion,
|
||||
Verbose::True,
|
||||
addPrefix,
|
||||
removePrefix,
|
||||
LockDB::True,
|
||||
OnlyApplyMutationLogs::False,
|
||||
InconsistentSnapshotOnly::False,
|
||||
::invalidVersion,
|
||||
{},
|
||||
randomUid));
|
||||
state Standalone<VectorRef<KeyRangeRef>> restoreRange;
|
||||
state Standalone<VectorRef<KeyRangeRef>> systemRestoreRange;
|
||||
bool encryptionEnabled = cx->clientInfo->get().isEncryptionEnabled;
|
||||
for (auto r : ranges) {
|
||||
if (!encryptionEnabled || !r.intersects(getSystemBackupRanges())) {
|
||||
restoreRange.push_back_deep(restoreRange.arena(), r);
|
||||
} else {
|
||||
KeyRangeRef normalKeyRange = r & normalKeys;
|
||||
KeyRangeRef systemKeyRange = r & systemKeys;
|
||||
if (!normalKeyRange.empty()) {
|
||||
restoreRange.push_back_deep(restoreRange.arena(), normalKeyRange);
|
||||
}
|
||||
if (!systemKeyRange.empty()) {
|
||||
systemRestoreRange.push_back_deep(systemRestoreRange.arena(), systemKeyRange);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!systemRestoreRange.empty()) {
|
||||
// restore system keys
|
||||
wait(success(restore(backupAgent,
|
||||
cx,
|
||||
cx,
|
||||
"system_restore"_sr,
|
||||
KeyRef(bc->getURL()),
|
||||
bc->getProxy(),
|
||||
systemRestoreRange,
|
||||
WaitForComplete::True,
|
||||
::invalidVersion,
|
||||
Verbose::True,
|
||||
addPrefix,
|
||||
removePrefix,
|
||||
LockDB::True,
|
||||
UnlockDB::False,
|
||||
OnlyApplyMutationLogs::False,
|
||||
InconsistentSnapshotOnly::False,
|
||||
::invalidVersion,
|
||||
{},
|
||||
randomUid)));
|
||||
state Reference<ReadYourWritesTransaction> rywTransaction =
|
||||
Reference<ReadYourWritesTransaction>(new ReadYourWritesTransaction(cx));
|
||||
// clear old restore config associated with system keys
|
||||
loop {
|
||||
try {
|
||||
rywTransaction->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
rywTransaction->setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
state RestoreConfig oldRestore(randomUid);
|
||||
oldRestore.clear(rywTransaction);
|
||||
wait(rywTransaction->commit());
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(rywTransaction->onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
// restore user data
|
||||
state Version ver = wait(restore(backupAgent,
|
||||
cx,
|
||||
cx,
|
||||
tagName,
|
||||
KeyRef(bc->getURL()),
|
||||
bc->getProxy(),
|
||||
restoreRange,
|
||||
WaitForComplete::True,
|
||||
::invalidVersion,
|
||||
Verbose::True,
|
||||
addPrefix,
|
||||
removePrefix,
|
||||
LockDB::True,
|
||||
UnlockDB::True,
|
||||
OnlyApplyMutationLogs::False,
|
||||
InconsistentSnapshotOnly::False,
|
||||
::invalidVersion,
|
||||
{},
|
||||
randomUid));
|
||||
return ver;
|
||||
}
|
||||
}
|
||||
|
@ -6120,6 +6224,7 @@ Future<Version> FileBackupAgent::restore(Database cx,
|
|||
Key addPrefix,
|
||||
Key removePrefix,
|
||||
LockDB lockDB,
|
||||
UnlockDB unlockDB,
|
||||
OnlyApplyMutationLogs onlyApplyMutationLogs,
|
||||
InconsistentSnapshotOnly inconsistentSnapshotOnly,
|
||||
Version beginVersion,
|
||||
|
@ -6137,6 +6242,7 @@ Future<Version> FileBackupAgent::restore(Database cx,
|
|||
addPrefix,
|
||||
removePrefix,
|
||||
lockDB,
|
||||
unlockDB,
|
||||
onlyApplyMutationLogs,
|
||||
inconsistentSnapshotOnly,
|
||||
beginVersion,
|
||||
|
@ -6178,6 +6284,7 @@ Future<Version> FileBackupAgent::restore(Database cx,
|
|||
addPrefix,
|
||||
removePrefix,
|
||||
lockDB,
|
||||
UnlockDB::True,
|
||||
onlyApplyMutationLogs,
|
||||
inconsistentSnapshotOnly,
|
||||
beginVersion,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* IdempotencyId.cpp
|
||||
* IdempotencyId.actor.cpp
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
|
@ -18,9 +18,11 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "fdbclient/IdempotencyId.h"
|
||||
#include "fdbclient/IdempotencyId.actor.h"
|
||||
#include "fdbclient/ReadYourWrites.h"
|
||||
#include "fdbclient/SystemData.h"
|
||||
#include "flow/UnitTest.h"
|
||||
#include "flow/actorcompiler.h" // this has to be the last include
|
||||
|
||||
struct IdempotencyIdKVBuilderImpl {
|
||||
Optional<Version> commitVersion;
|
||||
|
@ -40,6 +42,7 @@ void IdempotencyIdKVBuilder::add(const IdempotencyIdRef& id, uint16_t batchIndex
|
|||
ASSERT((batchIndex >> 8) == impl->batchIndexHighOrderByte.get());
|
||||
} else {
|
||||
impl->batchIndexHighOrderByte = batchIndex >> 8;
|
||||
impl->value << int64_t(now());
|
||||
}
|
||||
StringRef s = id.asStringRefUnsafe();
|
||||
impl->value << uint8_t(s.size());
|
||||
|
@ -53,19 +56,17 @@ Optional<KeyValue> IdempotencyIdKVBuilder::buildAndClear() {
|
|||
return {};
|
||||
}
|
||||
|
||||
BinaryWriter key{ Unversioned() };
|
||||
key.serializeBytes(idempotencyIdKeys.begin);
|
||||
key << bigEndian64(impl->commitVersion.get());
|
||||
key << impl->batchIndexHighOrderByte.get();
|
||||
|
||||
Value v = impl->value.toValue();
|
||||
|
||||
KeyRef key =
|
||||
makeIdempotencySingleKeyRange(v.arena(), impl->commitVersion.get(), impl->batchIndexHighOrderByte.get()).begin;
|
||||
|
||||
impl->value = BinaryWriter(IncludeVersion());
|
||||
impl->batchIndexHighOrderByte = Optional<uint8_t>();
|
||||
|
||||
Optional<KeyValue> result = KeyValue();
|
||||
result.get().arena() = v.arena();
|
||||
result.get().key = key.toValue(result.get().arena());
|
||||
result.get().key = key;
|
||||
result.get().value = v;
|
||||
return result;
|
||||
}
|
||||
|
@ -86,6 +87,8 @@ Optional<CommitResult> kvContainsIdempotencyId(const KeyValueRef& kv, const Idem
|
|||
|
||||
// Even if id is a substring of value, it may still not actually contain it.
|
||||
BinaryReader reader(kv.value.begin(), kv.value.size(), IncludeVersion());
|
||||
int64_t timestamp; // ignored
|
||||
reader >> timestamp;
|
||||
while (!reader.empty()) {
|
||||
uint8_t length;
|
||||
reader >> length;
|
||||
|
@ -93,13 +96,9 @@ Optional<CommitResult> kvContainsIdempotencyId(const KeyValueRef& kv, const Idem
|
|||
uint8_t lowOrderBatchIndex;
|
||||
reader >> lowOrderBatchIndex;
|
||||
if (candidate == needle) {
|
||||
BinaryReader reader(kv.key.begin(), kv.key.size(), Unversioned());
|
||||
reader.readBytes(idempotencyIdKeys.begin.size());
|
||||
Version commitVersion;
|
||||
reader >> commitVersion;
|
||||
commitVersion = bigEndian64(commitVersion);
|
||||
uint8_t highOrderBatchIndex;
|
||||
reader >> highOrderBatchIndex;
|
||||
decodeIdempotencyKey(kv.key, commitVersion, highOrderBatchIndex);
|
||||
return CommitResult{ commitVersion,
|
||||
static_cast<uint16_t>((uint16_t(highOrderBatchIndex) << 8) |
|
||||
uint16_t(lowOrderBatchIndex)) };
|
||||
|
@ -172,4 +171,35 @@ TEST_CASE("/fdbclient/IdempotencyId/serialization") {
|
|||
ASSERT(t == id);
|
||||
}
|
||||
return Void();
|
||||
}
|
||||
|
||||
KeyRangeRef makeIdempotencySingleKeyRange(Arena& arena, Version version, uint8_t highOrderBatchIndex) {
|
||||
static const auto size =
|
||||
idempotencyIdKeys.begin.size() + sizeof(version) + sizeof(highOrderBatchIndex) + /*\x00*/ 1;
|
||||
|
||||
StringRef second = makeString(size, arena);
|
||||
auto* dst = mutateString(second);
|
||||
|
||||
memcpy(dst, idempotencyIdKeys.begin.begin(), idempotencyIdKeys.begin.size());
|
||||
dst += idempotencyIdKeys.begin.size();
|
||||
|
||||
version = bigEndian64(version);
|
||||
memcpy(dst, &version, sizeof(version));
|
||||
dst += sizeof(version);
|
||||
|
||||
*dst++ = highOrderBatchIndex;
|
||||
|
||||
*dst++ = 0;
|
||||
|
||||
ASSERT_EQ(dst - second.begin(), size);
|
||||
|
||||
return KeyRangeRef(second.removeSuffix("\x00"_sr), second);
|
||||
}
|
||||
|
||||
void decodeIdempotencyKey(KeyRef key, Version& commitVersion, uint8_t& highOrderBatchIndex) {
|
||||
BinaryReader reader(key, Unversioned());
|
||||
reader.readBytes(idempotencyIdKeys.begin.size());
|
||||
reader >> commitVersion;
|
||||
commitVersion = bigEndian64(commitVersion);
|
||||
reader >> highOrderBatchIndex;
|
||||
}
|
|
@ -2639,7 +2639,8 @@ TEST_CASE("/ManagementAPI/AutoQuorumChange/checkLocality") {
|
|||
ProcessClass(ProcessClass::CoordinatorClass, ProcessClass::CommandLineSource),
|
||||
"",
|
||||
"",
|
||||
currentProtocolVersion());
|
||||
currentProtocolVersion(),
|
||||
false);
|
||||
}
|
||||
|
||||
workers.push_back(data);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "flow/Trace.h"
|
||||
#ifdef ADDRESS_SANITIZER
|
||||
#include <sanitizer/lsan_interface.h>
|
||||
#endif
|
||||
|
@ -414,6 +415,20 @@ Version DLTransaction::getCommittedVersion() {
|
|||
return version;
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> DLTransaction::getTotalCost() {
|
||||
if (!api->transactionGetTotalCost) {
|
||||
return unsupported_operation();
|
||||
}
|
||||
|
||||
FdbCApi::FDBFuture* f = api->transactionGetTotalCost(tr);
|
||||
return toThreadFuture<int64_t>(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) {
|
||||
int64_t size = 0;
|
||||
FdbCApi::fdb_error_t error = api->futureGetInt64(f, &size);
|
||||
ASSERT(!error);
|
||||
return size;
|
||||
});
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> DLTransaction::getApproximateSize() {
|
||||
if (!api->transactionGetApproximateSize) {
|
||||
return unsupported_operation();
|
||||
|
@ -950,6 +965,11 @@ void DLApi::init() {
|
|||
fdbCPath,
|
||||
"fdb_transaction_get_committed_version",
|
||||
headerVersion >= 0);
|
||||
loadClientFunction(&api->transactionGetTotalCost,
|
||||
lib,
|
||||
fdbCPath,
|
||||
"fdb_transaction_get_total_cost",
|
||||
headerVersion >= ApiVersion::withGetTotalCost().version());
|
||||
loadClientFunction(&api->transactionGetApproximateSize,
|
||||
lib,
|
||||
fdbCPath,
|
||||
|
@ -1486,6 +1506,12 @@ ThreadFuture<SpanContext> MultiVersionTransaction::getSpanContext() {
|
|||
return SpanContext();
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> MultiVersionTransaction::getTotalCost() {
|
||||
auto tr = getTransaction();
|
||||
auto f = tr.transaction ? tr.transaction->getTotalCost() : makeTimeout<int64_t>();
|
||||
return abortableFuture(f, tr.onChange);
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> MultiVersionTransaction::getApproximateSize() {
|
||||
auto tr = getTransaction();
|
||||
auto f = tr.transaction ? tr.transaction->getApproximateSize() : makeTimeout<int64_t>();
|
||||
|
@ -1863,6 +1889,9 @@ void MultiVersionDatabase::setOption(FDBDatabaseOptions::Option option, Optional
|
|||
TraceEvent("UnknownDatabaseOption").detail("Option", option);
|
||||
throw invalid_option();
|
||||
}
|
||||
if (itr->first == FDBDatabaseOptions::USE_CONFIG_DATABASE) {
|
||||
dbState->isConfigDB = true;
|
||||
}
|
||||
|
||||
int defaultFor = itr->second.defaultFor;
|
||||
if (defaultFor >= 0) {
|
||||
|
@ -1969,7 +1998,7 @@ ThreadFuture<ProtocolVersion> MultiVersionDatabase::getServerProtocol(Optional<P
|
|||
MultiVersionDatabase::DatabaseState::DatabaseState(ClusterConnectionRecord const& connectionRecord,
|
||||
Reference<IDatabase> versionMonitorDb)
|
||||
: dbVar(new ThreadSafeAsyncVar<Reference<IDatabase>>(Reference<IDatabase>(nullptr))),
|
||||
connectionRecord(connectionRecord), versionMonitorDb(versionMonitorDb), closed(false) {}
|
||||
connectionRecord(connectionRecord), versionMonitorDb(versionMonitorDb), closed(false), isConfigDB(false) {}
|
||||
|
||||
// Adds a client (local or externally loaded) that can be used to connect to the cluster
|
||||
void MultiVersionDatabase::DatabaseState::addClient(Reference<ClientInfo> client) {
|
||||
|
@ -2167,8 +2196,12 @@ void MultiVersionDatabase::DatabaseState::updateDatabase(Reference<IDatabase> ne
|
|||
.detail("ConnectionRecord", connectionRecord);
|
||||
}
|
||||
}
|
||||
// Verify the database has the necessary functionality to update the shared
|
||||
// state. Avoid updating the shared state if the database is a
|
||||
// configuration database, because a configuration database does not have
|
||||
// access to typical system keys and does not need to be updated.
|
||||
if (db.isValid() && dbProtocolVersion.present() &&
|
||||
MultiVersionApi::api->getApiVersion().hasClusterSharedStateMap()) {
|
||||
MultiVersionApi::api->getApiVersion().hasClusterSharedStateMap() && !isConfigDB) {
|
||||
Future<std::string> updateResult =
|
||||
MultiVersionApi::api->updateClusterSharedStateMap(connectionRecord, dbProtocolVersion.get(), db);
|
||||
sharedStateUpdater = map(errorOr(updateResult), [this](ErrorOr<std::string> result) {
|
||||
|
@ -2616,6 +2649,9 @@ void MultiVersionApi::setNetworkOptionInternal(FDBNetworkOptions::Option option,
|
|||
} else if (option == FDBNetworkOptions::TRACE_SHARE_AMONG_CLIENT_THREADS) {
|
||||
validateOption(value, false, true);
|
||||
traceShareBaseNameAmongThreads = true;
|
||||
} else if (option == FDBNetworkOptions::RETAIN_CLIENT_LIBRARY_COPIES) {
|
||||
validateOption(value, false, true);
|
||||
retainClientLibCopies = true;
|
||||
} else {
|
||||
forwardOption = true;
|
||||
}
|
||||
|
@ -2661,7 +2697,7 @@ void MultiVersionApi::setupNetwork() {
|
|||
externalClients[filename] = {};
|
||||
auto libCopies = copyExternalLibraryPerThread(path);
|
||||
for (int idx = 0; idx < libCopies.size(); ++idx) {
|
||||
bool unlinkOnLoad = libCopies[idx].second && CLIENT_KNOBS->DELETE_NATIVE_LIB_AFTER_LOADING;
|
||||
bool unlinkOnLoad = libCopies[idx].second && !retainClientLibCopies;
|
||||
externalClients[filename].push_back(Reference<ClientInfo>(
|
||||
new ClientInfo(new DLApi(libCopies[idx].first, unlinkOnLoad /*unlink on load*/),
|
||||
path,
|
||||
|
@ -2780,11 +2816,19 @@ void MultiVersionApi::runNetwork() {
|
|||
});
|
||||
}
|
||||
|
||||
localClient->api->runNetwork();
|
||||
try {
|
||||
localClient->api->runNetwork();
|
||||
} catch (const Error& e) {
|
||||
closeTraceFile();
|
||||
throw e;
|
||||
}
|
||||
|
||||
for (auto h : handles) {
|
||||
waitThread(h);
|
||||
}
|
||||
|
||||
TraceEvent("MultiVersionRunNetworkTerminating");
|
||||
closeTraceFile();
|
||||
}
|
||||
|
||||
void MultiVersionApi::stopNetwork() {
|
||||
|
@ -3066,8 +3110,8 @@ void MultiVersionApi::loadEnvironmentVariableNetworkOptions() {
|
|||
|
||||
MultiVersionApi::MultiVersionApi()
|
||||
: callbackOnMainThread(true), localClientDisabled(false), networkStartSetup(false), networkSetup(false),
|
||||
disableBypass(false), bypassMultiClientApi(false), externalClient(false), apiVersion(0), threadCount(0),
|
||||
tmpDir("/tmp"), traceShareBaseNameAmongThreads(false), envOptionsLoaded(false) {}
|
||||
disableBypass(false), bypassMultiClientApi(false), externalClient(false), retainClientLibCopies(false),
|
||||
apiVersion(0), threadCount(0), tmpDir("/tmp"), traceShareBaseNameAmongThreads(false), envOptionsLoaded(false) {}
|
||||
|
||||
MultiVersionApi* MultiVersionApi::api = new MultiVersionApi();
|
||||
|
||||
|
|
|
@ -3456,6 +3456,8 @@ ACTOR Future<Optional<Value>> getValue(Reference<TransactionState> trState,
|
|||
}
|
||||
trState->cx->getValueCompleted->latency = timer_int() - startTime;
|
||||
trState->cx->getValueCompleted->log();
|
||||
trState->totalCost +=
|
||||
getReadOperationCost(key.size() + (reply.value.present() ? reply.value.get().size() : 0));
|
||||
|
||||
if (getValueID.present()) {
|
||||
g_traceBatch.addEvent("GetValueDebug",
|
||||
|
@ -4015,6 +4017,7 @@ Future<RangeResultFamily> getExactRange(Reference<TransactionState> trState,
|
|||
req.version = version;
|
||||
req.begin = firstGreaterOrEqual(range.begin);
|
||||
req.end = firstGreaterOrEqual(range.end);
|
||||
|
||||
setMatchIndex<GetKeyValuesFamilyRequest>(req, matchIndex);
|
||||
req.spanContext = span.context;
|
||||
trState->cx->getLatestCommitVersions(
|
||||
|
@ -4284,6 +4287,7 @@ void getRangeFinished(Reference<TransactionState> trState,
|
|||
RangeResultFamily result) {
|
||||
int64_t bytes = getRangeResultFamilyBytes(result);
|
||||
|
||||
trState->totalCost += getReadOperationCost(bytes);
|
||||
trState->cx->transactionBytesRead += bytes;
|
||||
trState->cx->transactionKeysRead += result.size();
|
||||
|
||||
|
@ -4352,6 +4356,7 @@ Future<RangeResultFamily> getRange(Reference<TransactionState> trState,
|
|||
state KeySelector originalEnd = end;
|
||||
state RangeResultFamily output;
|
||||
state Span span("NAPI:getRange"_loc, trState->spanContext);
|
||||
state Optional<UID> getRangeID = Optional<UID>();
|
||||
if (useTenant && trState->tenant().present()) {
|
||||
span.addAttribute("tenant"_sr, trState->tenant().get());
|
||||
}
|
||||
|
@ -4436,11 +4441,14 @@ Future<RangeResultFamily> getRange(Reference<TransactionState> trState,
|
|||
|
||||
req.tags = trState->cx->sampleReadTags() ? trState->options.readTags : Optional<TagSet>();
|
||||
req.spanContext = span.context;
|
||||
if (trState->readOptions.present() && trState->readOptions.get().debugID.present()) {
|
||||
getRangeID = nondeterministicRandom()->randomUniqueID();
|
||||
g_traceBatch.addAttach(
|
||||
"TransactionAttachID", trState->readOptions.get().debugID.get().first(), getRangeID.get().first());
|
||||
}
|
||||
try {
|
||||
if (trState->readOptions.present() && trState->readOptions.get().debugID.present()) {
|
||||
g_traceBatch.addEvent("TransactionDebug",
|
||||
trState->readOptions.get().debugID.get().first(),
|
||||
"NativeAPI.getRange.Before");
|
||||
if (getRangeID.present()) {
|
||||
g_traceBatch.addEvent("TransactionDebug", getRangeID.get().first(), "NativeAPI.getRange.Before");
|
||||
/*TraceEvent("TransactionDebugGetRangeInfo", trState->readOptions.debugID.get())
|
||||
.detail("ReqBeginKey", req.begin.getKey())
|
||||
.detail("ReqEndKey", req.end.getKey())
|
||||
|
@ -4480,9 +4488,9 @@ Future<RangeResultFamily> getRange(Reference<TransactionState> trState,
|
|||
throw;
|
||||
}
|
||||
|
||||
if (trState->readOptions.present() && trState->readOptions.get().debugID.present()) {
|
||||
if (getRangeID.present()) {
|
||||
g_traceBatch.addEvent("TransactionDebug",
|
||||
trState->readOptions.get().debugID.get().first(),
|
||||
getRangeID.get().first(),
|
||||
"NativeAPI.getRange.After"); //.detail("SizeOf", rep.data.size());
|
||||
/*TraceEvent("TransactionDebugGetRangeDone", trState->readOptions.debugID.get())
|
||||
.detail("ReqBeginKey", req.begin.getKey())
|
||||
|
@ -4596,11 +4604,9 @@ Future<RangeResultFamily> getRange(Reference<TransactionState> trState,
|
|||
}
|
||||
|
||||
} catch (Error& e) {
|
||||
if (trState->readOptions.present() && trState->readOptions.get().debugID.present()) {
|
||||
g_traceBatch.addEvent("TransactionDebug",
|
||||
trState->readOptions.get().debugID.get().first(),
|
||||
"NativeAPI.getRange.Error");
|
||||
TraceEvent("TransactionDebugError", trState->readOptions.get().debugID.get()).error(e);
|
||||
if (getRangeID.present()) {
|
||||
g_traceBatch.addEvent("TransactionDebug", getRangeID.get().first(), "NativeAPI.getRange.Error");
|
||||
TraceEvent("TransactionDebugError", getRangeID.get()).error(e);
|
||||
}
|
||||
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed ||
|
||||
(e.code() == error_code_transaction_too_old && readVersion == latestVersion)) {
|
||||
|
@ -5766,6 +5772,7 @@ void Transaction::set(const KeyRef& key, const ValueRef& value, AddConflictRange
|
|||
auto r = singleKeyRange(key, req.arena);
|
||||
auto v = ValueRef(req.arena, value);
|
||||
t.mutations.emplace_back(req.arena, MutationRef::SetValue, r.begin, v);
|
||||
trState->totalCost += getWriteOperationCost(key.expectedSize() + value.expectedSize());
|
||||
|
||||
if (addConflictRange) {
|
||||
t.write_conflict_ranges.push_back(req.arena, r);
|
||||
|
@ -5795,6 +5802,7 @@ void Transaction::atomicOp(const KeyRef& key,
|
|||
auto v = ValueRef(req.arena, operand);
|
||||
|
||||
t.mutations.emplace_back(req.arena, operationType, r.begin, v);
|
||||
trState->totalCost += getWriteOperationCost(key.expectedSize());
|
||||
|
||||
if (addConflictRange && operationType != MutationRef::SetVersionstampedKey)
|
||||
t.write_conflict_ranges.push_back(req.arena, r);
|
||||
|
@ -5826,7 +5834,10 @@ void Transaction::clear(const KeyRangeRef& range, AddConflictRange addConflictRa
|
|||
return;
|
||||
|
||||
t.mutations.emplace_back(req.arena, MutationRef::ClearRange, r.begin, r.end);
|
||||
|
||||
// NOTE: The throttling cost of each clear is assumed to be one page.
|
||||
// This makes compuation fast, but can be inaccurate and may
|
||||
// underestimate the cost of large clears.
|
||||
trState->totalCost += CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR;
|
||||
if (addConflictRange)
|
||||
t.write_conflict_ranges.push_back(req.arena, r);
|
||||
}
|
||||
|
@ -6142,6 +6153,7 @@ ACTOR static Future<Optional<CommitResult>> determineCommitStatus(Reference<Tran
|
|||
IdempotencyIdRef idempotencyId) {
|
||||
state Transaction tr(trState->cx);
|
||||
state int retries = 0;
|
||||
state Version expiredVersion;
|
||||
state Span span("NAPI:determineCommitStatus"_loc, trState->spanContext);
|
||||
tr.span.setParent(span.context);
|
||||
loop {
|
||||
|
@ -6151,11 +6163,19 @@ ACTOR static Future<Optional<CommitResult>> determineCommitStatus(Reference<Tran
|
|||
tr.trState->authToken = trState->authToken;
|
||||
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::READ_LOCK_AWARE);
|
||||
KeyBackedObjectProperty<IdempotencyIdsExpiredVersion, _Unversioned> expiredKey(idempotencyIdsExpiredVersion,
|
||||
Unversioned());
|
||||
IdempotencyIdsExpiredVersion expiredVal = wait(expiredKey.getD(&tr));
|
||||
expiredVersion = expiredVal.expired;
|
||||
if (expiredVersion >= minPossibleCommitVersion) {
|
||||
throw commit_unknown_result_fatal();
|
||||
}
|
||||
Version rv = wait(tr.getReadVersion());
|
||||
TraceEvent("DetermineCommitStatusAttempt")
|
||||
.detail("IdempotencyId", idempotencyId.asStringRefUnsafe())
|
||||
.detail("Retries", retries)
|
||||
.detail("ReadVersion", rv)
|
||||
.detail("ExpiredVersion", expiredVersion)
|
||||
.detail("MinPossibleCommitVersion", minPossibleCommitVersion)
|
||||
.detail("MaxPossibleCommitVersion", maxPossibleCommitVersion);
|
||||
KeyRange possibleRange =
|
||||
|
@ -6230,14 +6250,14 @@ ACTOR Future<Optional<ClientTrCommitCostEstimation>> estimateCommitCosts(Referen
|
|||
state int i = 0;
|
||||
|
||||
for (; i < transaction->mutations.size(); ++i) {
|
||||
auto* it = &transaction->mutations[i];
|
||||
auto const& mutation = transaction->mutations[i];
|
||||
|
||||
if (it->type == MutationRef::Type::SetValue || it->isAtomicOp()) {
|
||||
if (mutation.type == MutationRef::Type::SetValue || mutation.isAtomicOp()) {
|
||||
trCommitCosts.opsCount++;
|
||||
trCommitCosts.writeCosts += getWriteOperationCost(it->expectedSize());
|
||||
} else if (it->type == MutationRef::Type::ClearRange) {
|
||||
trCommitCosts.writeCosts += getWriteOperationCost(mutation.expectedSize());
|
||||
} else if (mutation.type == MutationRef::Type::ClearRange) {
|
||||
trCommitCosts.opsCount++;
|
||||
keyRange = KeyRangeRef(it->param1, it->param2);
|
||||
keyRange = KeyRangeRef(mutation.param1, mutation.param2);
|
||||
if (trState->options.expensiveClearCostEstimation) {
|
||||
StorageMetrics m = wait(trState->cx->getStorageMetrics(keyRange, CLIENT_KNOBS->TOO_MANY, trState));
|
||||
trCommitCosts.clearIdxCosts.emplace_back(i, getWriteOperationCost(m.bytes));
|
||||
|
@ -6366,8 +6386,11 @@ ACTOR static Future<Void> tryCommit(Reference<TransactionState> trState,
|
|||
}
|
||||
|
||||
if (req.tagSet.present() && trState->options.priority < TransactionPriority::IMMEDIATE) {
|
||||
wait(store(req.transaction.read_snapshot, readVersion) &&
|
||||
store(req.commitCostEstimation, estimateCommitCosts(trState, &req.transaction)));
|
||||
state Future<Optional<ClientTrCommitCostEstimation>> commitCostFuture =
|
||||
estimateCommitCosts(trState, &req.transaction);
|
||||
// We need to wait for the read version first so that we can be notified if the database is locked
|
||||
wait(store(req.transaction.read_snapshot, readVersion));
|
||||
wait(store(req.commitCostEstimation, commitCostFuture));
|
||||
} else {
|
||||
wait(store(req.transaction.read_snapshot, readVersion));
|
||||
}
|
||||
|
@ -6399,6 +6422,12 @@ ACTOR static Future<Void> tryCommit(Reference<TransactionState> trState,
|
|||
|
||||
req.debugID = commitID;
|
||||
state Future<CommitID> reply;
|
||||
// Only gets filled in in the happy path where we don't have to commit on the first proxy or use provisional
|
||||
// proxies
|
||||
state int alternativeChosen = -1;
|
||||
// Only valid if alternativeChosen >= 0
|
||||
state Reference<CommitProxyInfo> proxiesUsed;
|
||||
|
||||
if (trState->options.commitOnFirstProxy) {
|
||||
if (trState->cx->clientInfo->get().firstCommitProxy.present()) {
|
||||
reply = throwErrorOr(brokenPromiseToMaybeDelivered(
|
||||
|
@ -6409,11 +6438,13 @@ ACTOR static Future<Void> tryCommit(Reference<TransactionState> trState,
|
|||
: Never();
|
||||
}
|
||||
} else {
|
||||
reply = basicLoadBalance(trState->cx->getCommitProxies(trState->useProvisionalProxies),
|
||||
proxiesUsed = trState->cx->getCommitProxies(trState->useProvisionalProxies);
|
||||
reply = basicLoadBalance(proxiesUsed,
|
||||
&CommitProxyInterface::commit,
|
||||
req,
|
||||
TaskPriority::DefaultPromiseEndpoint,
|
||||
AtMostOnce::True);
|
||||
AtMostOnce::True,
|
||||
&alternativeChosen);
|
||||
}
|
||||
state double grvTime = now();
|
||||
choose {
|
||||
|
@ -6463,6 +6494,12 @@ ACTOR static Future<Void> tryCommit(Reference<TransactionState> trState,
|
|||
ci.version,
|
||||
req,
|
||||
trState->tenant()));
|
||||
if (trState->automaticIdempotency && alternativeChosen >= 0) {
|
||||
// Automatic idempotency means we're responsible for best effort idempotency id clean up
|
||||
proxiesUsed->getInterface(alternativeChosen)
|
||||
.expireIdempotencyId.send(ExpireIdempotencyIdRequest{
|
||||
ci.version, uint8_t(ci.txnBatchId >> 8), trState->getTenantInfo() });
|
||||
}
|
||||
return Void();
|
||||
} else {
|
||||
// clear the RYW transaction which contains previous conflicting keys
|
||||
|
@ -6948,11 +6985,16 @@ void Transaction::setOption(FDBTransactionOptions::Option option, Optional<Strin
|
|||
throw e;
|
||||
}
|
||||
tr.idempotencyId = IdempotencyIdRef(tr.arena, IdempotencyIdRef(value.get()));
|
||||
trState->automaticIdempotency = false;
|
||||
break;
|
||||
case FDBTransactionOptions::AUTOMATIC_IDEMPOTENCY:
|
||||
validateOptionValueNotPresent(value);
|
||||
tr.idempotencyId = IdempotencyIdRef(
|
||||
tr.arena, IdempotencyIdRef(BinaryWriter::toValue(deterministicRandom()->randomUniqueID(), Unversioned())));
|
||||
if (!tr.idempotencyId.valid()) {
|
||||
tr.idempotencyId = IdempotencyIdRef(
|
||||
tr.arena,
|
||||
IdempotencyIdRef(BinaryWriter::toValue(deterministicRandom()->randomUniqueID(), Unversioned())));
|
||||
}
|
||||
trState->automaticIdempotency = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -7519,12 +7561,11 @@ ACTOR Future<StorageMetrics> getStorageMetricsLargeKeyRange(Database cx,
|
|||
Optional<Reference<TransactionState>> trState);
|
||||
|
||||
ACTOR Future<StorageMetrics> doGetStorageMetrics(Database cx,
|
||||
TenantInfo tenantInfo,
|
||||
KeyRange keys,
|
||||
Reference<LocationInfo> locationInfo,
|
||||
TenantMapEntry tenantEntry,
|
||||
Optional<Reference<TransactionState>> trState) {
|
||||
state TenantInfo tenantInfo =
|
||||
wait(trState.present() ? populateAndGetTenant(trState.get(), keys.begin, latestVersion) : TenantInfo());
|
||||
try {
|
||||
WaitMetricsRequest req(tenantInfo, keys, StorageMetrics(), StorageMetrics());
|
||||
req.min.bytes = 0;
|
||||
|
@ -7533,12 +7574,16 @@ ACTOR Future<StorageMetrics> doGetStorageMetrics(Database cx,
|
|||
locationInfo->locations(), &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution));
|
||||
return m;
|
||||
} catch (Error& e) {
|
||||
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
||||
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed) {
|
||||
cx->invalidateCache(tenantEntry.prefix, keys);
|
||||
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
||||
} else if (e.code() == error_code_unknown_tenant && trState.present() &&
|
||||
tenantInfo.tenantId != TenantInfo::INVALID_TENANT) {
|
||||
wait(trState.get()->handleUnknownTenant());
|
||||
} else {
|
||||
TraceEvent(SevError, "WaitStorageMetricsError").error(e);
|
||||
throw;
|
||||
}
|
||||
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
||||
cx->invalidateCache(tenantEntry.prefix, keys);
|
||||
|
||||
StorageMetrics m = wait(getStorageMetricsLargeKeyRange(cx, keys, trState));
|
||||
return m;
|
||||
|
@ -7569,7 +7614,7 @@ ACTOR Future<StorageMetrics> getStorageMetricsLargeKeyRange(Database cx,
|
|||
partBegin = (i == 0) ? keys.begin : locations[i].range.begin;
|
||||
partEnd = (i == nLocs - 1) ? keys.end : locations[i].range.end;
|
||||
fx[i] = doGetStorageMetrics(
|
||||
cx, KeyRangeRef(partBegin, partEnd), locations[i].locations, locations[i].tenantEntry, trState);
|
||||
cx, tenantInfo, KeyRangeRef(partBegin, partEnd), locations[i].locations, locations[i].tenantEntry, trState);
|
||||
}
|
||||
wait(waitForAll(fx));
|
||||
for (int i = 0; i < nLocs; i++) {
|
||||
|
@ -7724,27 +7769,18 @@ ACTOR Future<Optional<StorageMetrics>> waitStorageMetricsWithLocation(TenantInfo
|
|||
StorageMetrics min,
|
||||
StorageMetrics max,
|
||||
StorageMetrics permittedError) {
|
||||
try {
|
||||
Future<StorageMetrics> fx;
|
||||
if (locations.size() > 1) {
|
||||
fx = waitStorageMetricsMultipleLocations(tenantInfo, locations, min, max, permittedError);
|
||||
} else {
|
||||
WaitMetricsRequest req(tenantInfo, keys, min, max);
|
||||
fx = loadBalance(locations[0].locations->locations(),
|
||||
&StorageServerInterface::waitMetrics,
|
||||
req,
|
||||
TaskPriority::DataDistribution);
|
||||
}
|
||||
StorageMetrics x = wait(fx);
|
||||
return x;
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevDebug, "WaitStorageMetricsError").error(e);
|
||||
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
||||
TraceEvent(SevError, "WaitStorageMetricsError").error(e);
|
||||
throw;
|
||||
}
|
||||
Future<StorageMetrics> fx;
|
||||
if (locations.size() > 1) {
|
||||
fx = waitStorageMetricsMultipleLocations(tenantInfo, locations, min, max, permittedError);
|
||||
} else {
|
||||
WaitMetricsRequest req(tenantInfo, keys, min, max);
|
||||
fx = loadBalance(locations[0].locations->locations(),
|
||||
&StorageServerInterface::waitMetrics,
|
||||
req,
|
||||
TaskPriority::DataDistribution);
|
||||
}
|
||||
return Optional<StorageMetrics>();
|
||||
StorageMetrics x = wait(fx);
|
||||
return x;
|
||||
}
|
||||
|
||||
ACTOR Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(
|
||||
|
@ -7757,9 +7793,9 @@ ACTOR Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(
|
|||
int expectedShardCount,
|
||||
Optional<Reference<TransactionState>> trState) {
|
||||
state Span span("NAPI:WaitStorageMetrics"_loc, generateSpanID(cx->transactionTracingSample));
|
||||
state TenantInfo tenantInfo =
|
||||
wait(trState.present() ? populateAndGetTenant(trState.get(), keys.begin, latestVersion) : TenantInfo());
|
||||
loop {
|
||||
state TenantInfo tenantInfo =
|
||||
wait(trState.present() ? populateAndGetTenant(trState.get(), keys.begin, latestVersion) : TenantInfo());
|
||||
state std::vector<KeyRangeLocationInfo> locations =
|
||||
wait(getKeyRangeLocations(cx,
|
||||
tenantInfo,
|
||||
|
@ -7789,13 +7825,25 @@ ACTOR Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(
|
|||
continue;
|
||||
}
|
||||
|
||||
Optional<StorageMetrics> res =
|
||||
wait(waitStorageMetricsWithLocation(tenantInfo, keys, locations, min, max, permittedError));
|
||||
if (res.present()) {
|
||||
return std::make_pair(res, -1);
|
||||
try {
|
||||
Optional<StorageMetrics> res =
|
||||
wait(waitStorageMetricsWithLocation(tenantInfo, keys, locations, min, max, permittedError));
|
||||
if (res.present()) {
|
||||
return std::make_pair(res, -1);
|
||||
}
|
||||
} catch (Error& e) {
|
||||
TraceEvent(SevDebug, "WaitStorageMetricsError").error(e);
|
||||
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed) {
|
||||
cx->invalidateCache(locations[0].tenantEntry.prefix, keys);
|
||||
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
||||
} else if (e.code() == error_code_unknown_tenant && trState.present() &&
|
||||
tenantInfo.tenantId != TenantInfo::INVALID_TENANT) {
|
||||
wait(trState.get()->handleUnknownTenant());
|
||||
} else {
|
||||
TraceEvent(SevError, "WaitStorageMetricsError").error(e);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
cx->invalidateCache(locations[0].tenantEntry.prefix, keys);
|
||||
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7965,6 +8013,21 @@ ACTOR Future<TenantMapEntry> blobGranuleGetTenantEntry(Transaction* self,
|
|||
return tme;
|
||||
}
|
||||
|
||||
// Tenant's are supposed to be unique and therefore can be loaded once.
|
||||
// There is an assumption that a tenant exists as long as operations are happening against said tenant.
|
||||
ACTOR Future<TenantMapEntry> blobLoadTenantMapEntry(Database* db, Key rangeStartKey, Optional<TenantName> tenantName) {
|
||||
state Transaction tr(*db);
|
||||
|
||||
loop {
|
||||
try {
|
||||
TenantMapEntry tme = wait(blobGranuleGetTenantEntry(&tr, rangeStartKey, tenantName));
|
||||
return tme;
|
||||
} catch (Error& e) {
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Future<Standalone<VectorRef<KeyRef>>> Transaction::getRangeSplitPoints(KeyRange const& keys, int64_t chunkSize) {
|
||||
return ::getRangeSplitPoints(
|
||||
trState, keys, chunkSize, readVersion.isValid() && readVersion.isReady() ? readVersion.get() : latestVersion);
|
||||
|
@ -8436,7 +8499,6 @@ ACTOR Future<Version> verifyBlobRangeActor(Reference<DatabaseContext> cx,
|
|||
state Version readVersionOut = invalidVersion;
|
||||
state int batchSize = BUGGIFY ? deterministicRandom()->randomInt(2, 10) : CLIENT_KNOBS->BG_TOO_MANY_GRANULES / 2;
|
||||
state int loadSize = (BUGGIFY ? deterministicRandom()->randomInt(1, 20) : 20) * batchSize;
|
||||
state bool loadedTenantEntry = false;
|
||||
|
||||
if (version.present()) {
|
||||
if (version.get() == latestVersion) {
|
||||
|
@ -8456,16 +8518,16 @@ ACTOR Future<Version> verifyBlobRangeActor(Reference<DatabaseContext> cx,
|
|||
}
|
||||
}
|
||||
|
||||
if (tenantName.present()) {
|
||||
TenantMapEntry tme = wait(blobLoadTenantMapEntry(&db, range.begin, tenantName));
|
||||
range = range.withPrefix(tme.prefix);
|
||||
curRegion = KeyRangeRef(range.begin, range.begin);
|
||||
}
|
||||
|
||||
loop {
|
||||
if (curRegion.begin >= range.end) {
|
||||
return readVersionOut;
|
||||
}
|
||||
if (tenantName.present() && !loadedTenantEntry) {
|
||||
TenantMapEntry tenantEntry = wait(blobGranuleGetTenantEntry(&tr, range.begin, tenantName));
|
||||
loadedTenantEntry = true;
|
||||
range = range.withPrefix(tenantEntry.prefix);
|
||||
curRegion = KeyRangeRef(range.begin, range.begin);
|
||||
}
|
||||
loop {
|
||||
try {
|
||||
wait(store(allRanges, tr.getBlobGranuleRanges(KeyRangeRef(curRegion.begin, range.end), loadSize)));
|
||||
|
@ -9328,7 +9390,7 @@ void handleTSSChangeFeedMismatch(const ChangeFeedStreamRequest& request,
|
|||
mismatchEvent.detail("EndKey", request.range.end);
|
||||
mismatchEvent.detail("CanReadPopped", request.canReadPopped);
|
||||
mismatchEvent.detail("PopVersion", popVersion);
|
||||
mismatchEvent.detail("DebugUID", request.debugUID);
|
||||
mismatchEvent.detail("DebugUID", request.id);
|
||||
|
||||
// mismatch info
|
||||
mismatchEvent.detail("MatchesFound", matchesFound);
|
||||
|
@ -9354,7 +9416,7 @@ void handleTSSChangeFeedMismatch(const ChangeFeedStreamRequest& request,
|
|||
"TSSMismatchChangeFeedStream");
|
||||
summaryEvent.detail("TSSID", tssData.tssId)
|
||||
.detail("MismatchId", mismatchUID)
|
||||
.detail("FeedDebugUID", request.debugUID);
|
||||
.detail("FeedDebugUID", request.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9879,7 +9941,8 @@ ACTOR Future<Void> mergeChangeFeedStream(Reference<DatabaseContext> db,
|
|||
Version* begin,
|
||||
Version end,
|
||||
int replyBufferSize,
|
||||
bool canReadPopped) {
|
||||
bool canReadPopped,
|
||||
ReadOptions readOptions) {
|
||||
state std::vector<Future<Void>> fetchers(interfs.size());
|
||||
state std::vector<Future<Void>> onErrors(interfs.size());
|
||||
state std::vector<MutationAndVersionStream> streams(interfs.size());
|
||||
|
@ -9907,10 +9970,11 @@ ACTOR Future<Void> mergeChangeFeedStream(Reference<DatabaseContext> db,
|
|||
if (replyBufferSize != -1 && req.replyBufferSize < CLIENT_KNOBS->CHANGE_FEED_STREAM_MIN_BYTES) {
|
||||
req.replyBufferSize = CLIENT_KNOBS->CHANGE_FEED_STREAM_MIN_BYTES;
|
||||
}
|
||||
req.debugUID = deterministicRandom()->randomUniqueID();
|
||||
debugUIDs.push_back(req.debugUID);
|
||||
mergeCursorUID =
|
||||
UID(mergeCursorUID.first() ^ req.debugUID.first(), mergeCursorUID.second() ^ req.debugUID.second());
|
||||
req.options = readOptions;
|
||||
req.id = deterministicRandom()->randomUniqueID();
|
||||
|
||||
debugUIDs.push_back(req.id);
|
||||
mergeCursorUID = UID(mergeCursorUID.first() ^ req.id.first(), mergeCursorUID.second() ^ req.id.second());
|
||||
|
||||
results->streams.push_back(interfs[i].first.changeFeedStream.getReplyStream(req));
|
||||
maybeDuplicateTSSChangeFeedStream(req,
|
||||
|
@ -10113,7 +10177,8 @@ ACTOR Future<Void> singleChangeFeedStream(Reference<DatabaseContext> db,
|
|||
Version* begin,
|
||||
Version end,
|
||||
int replyBufferSize,
|
||||
bool canReadPopped) {
|
||||
bool canReadPopped,
|
||||
ReadOptions readOptions) {
|
||||
state Database cx(db);
|
||||
state ChangeFeedStreamRequest req;
|
||||
state Optional<ChangeFeedTSSValidationData> tssData;
|
||||
|
@ -10123,10 +10188,11 @@ ACTOR Future<Void> singleChangeFeedStream(Reference<DatabaseContext> db,
|
|||
req.range = range;
|
||||
req.canReadPopped = canReadPopped;
|
||||
req.replyBufferSize = replyBufferSize;
|
||||
req.debugUID = deterministicRandom()->randomUniqueID();
|
||||
req.options = readOptions;
|
||||
req.id = deterministicRandom()->randomUniqueID();
|
||||
|
||||
if (DEBUG_CF_CLIENT_TRACE) {
|
||||
TraceEvent(SevDebug, "TraceChangeFeedClientSingleCursor", req.debugUID)
|
||||
TraceEvent(SevDebug, "TraceChangeFeedClientSingleCursor", req.id)
|
||||
.detail("FeedID", rangeID)
|
||||
.detail("Range", range)
|
||||
.detail("Begin", *begin)
|
||||
|
@ -10166,7 +10232,8 @@ ACTOR Future<Void> getChangeFeedStreamActor(Reference<DatabaseContext> db,
|
|||
Version end,
|
||||
KeyRange range,
|
||||
int replyBufferSize,
|
||||
bool canReadPopped) {
|
||||
bool canReadPopped,
|
||||
ReadOptions readOptions) {
|
||||
state Database cx(db);
|
||||
state Span span("NAPI:GetChangeFeedStream"_loc);
|
||||
db->usedAnyChangeFeeds = true;
|
||||
|
@ -10256,14 +10323,22 @@ ACTOR Future<Void> getChangeFeedStreamActor(Reference<DatabaseContext> db,
|
|||
}
|
||||
CODE_PROBE(true, "Change feed merge cursor");
|
||||
// TODO (jslocum): validate connectionFileChanged behavior
|
||||
wait(
|
||||
mergeChangeFeedStream(db, interfs, results, rangeID, &begin, end, replyBufferSize, canReadPopped) ||
|
||||
cx->connectionFileChanged());
|
||||
wait(mergeChangeFeedStream(
|
||||
db, interfs, results, rangeID, &begin, end, replyBufferSize, canReadPopped, readOptions) ||
|
||||
cx->connectionFileChanged());
|
||||
} else {
|
||||
CODE_PROBE(true, "Change feed single cursor");
|
||||
StorageServerInterface interf = locations[0].locations->getInterface(chosenLocations[0]);
|
||||
wait(singleChangeFeedStream(
|
||||
db, interf, range, results, rangeID, &begin, end, replyBufferSize, canReadPopped) ||
|
||||
wait(singleChangeFeedStream(db,
|
||||
interf,
|
||||
range,
|
||||
results,
|
||||
rangeID,
|
||||
&begin,
|
||||
end,
|
||||
replyBufferSize,
|
||||
canReadPopped,
|
||||
readOptions) ||
|
||||
cx->connectionFileChanged());
|
||||
}
|
||||
} catch (Error& e) {
|
||||
|
@ -10330,9 +10405,17 @@ Future<Void> DatabaseContext::getChangeFeedStream(Reference<ChangeFeedData> resu
|
|||
Version end,
|
||||
KeyRange range,
|
||||
int replyBufferSize,
|
||||
bool canReadPopped) {
|
||||
return getChangeFeedStreamActor(
|
||||
Reference<DatabaseContext>::addRef(this), results, rangeID, begin, end, range, replyBufferSize, canReadPopped);
|
||||
bool canReadPopped,
|
||||
ReadOptions readOptions) {
|
||||
return getChangeFeedStreamActor(Reference<DatabaseContext>::addRef(this),
|
||||
results,
|
||||
rangeID,
|
||||
begin,
|
||||
end,
|
||||
range,
|
||||
replyBufferSize,
|
||||
canReadPopped,
|
||||
readOptions);
|
||||
}
|
||||
|
||||
Version OverlappingChangeFeedsInfo::getFeedMetadataVersion(const KeyRangeRef& range) const {
|
||||
|
@ -10564,6 +10647,34 @@ Reference<DatabaseContext::TransactionT> DatabaseContext::createTransaction() {
|
|||
}
|
||||
|
||||
// BlobGranule API.
|
||||
ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> getBlobRanges(Transaction* tr, KeyRange range, int batchLimit) {
|
||||
state Standalone<VectorRef<KeyRangeRef>> blobRanges;
|
||||
state Key beginKey = range.begin;
|
||||
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
loop {
|
||||
|
||||
state RangeResult results =
|
||||
wait(krmGetRangesUnaligned(tr, blobRangeKeys.begin, KeyRangeRef(beginKey, range.end), 2 * batchLimit + 2));
|
||||
|
||||
blobRanges.arena().dependsOn(results.arena());
|
||||
for (int i = 0; i < results.size() - 1; i++) {
|
||||
if (results[i].value == blobRangeActive) {
|
||||
blobRanges.push_back(blobRanges.arena(), KeyRangeRef(results[i].key, results[i + 1].key));
|
||||
}
|
||||
if (blobRanges.size() == batchLimit) {
|
||||
return blobRanges;
|
||||
}
|
||||
}
|
||||
|
||||
if (!results.more) {
|
||||
return blobRanges;
|
||||
}
|
||||
beginKey = results.back().key;
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,
|
||||
KeyRange range,
|
||||
Version purgeVersion,
|
||||
|
@ -10573,7 +10684,6 @@ ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,
|
|||
state Transaction tr(cx);
|
||||
state Key purgeKey;
|
||||
state KeyRange purgeRange = range;
|
||||
state bool loadedTenantPrefix = false;
|
||||
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
if (purgeVersion == latestVersion) {
|
||||
|
@ -10593,23 +10703,25 @@ ACTOR Future<Key> purgeBlobGranulesActor(Reference<DatabaseContext> db,
|
|||
throw unsupported_operation();
|
||||
}
|
||||
|
||||
if (tenant.present()) {
|
||||
TenantMapEntry tme = wait(blobLoadTenantMapEntry(&cx, range.begin, tenant));
|
||||
purgeRange = purgeRange.withPrefix(tme.prefix);
|
||||
}
|
||||
|
||||
loop {
|
||||
try {
|
||||
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
||||
|
||||
if (tenant.present() && !loadedTenantPrefix) {
|
||||
TenantMapEntry tenantEntry = wait(blobGranuleGetTenantEntry(&tr, range.begin, tenant));
|
||||
loadedTenantPrefix = true;
|
||||
purgeRange = purgeRange.withPrefix(tenantEntry.prefix);
|
||||
}
|
||||
|
||||
// must be aligned to blob range(s)
|
||||
state Future<Optional<Value>> beginPresent = tr.get(purgeRange.begin.withPrefix(blobRangeKeys.begin));
|
||||
state Future<Optional<Value>> endPresent = tr.get(purgeRange.end.withPrefix(blobRangeKeys.begin));
|
||||
wait(success(beginPresent) && success(endPresent));
|
||||
if (!beginPresent.get().present() || !endPresent.get().present()) {
|
||||
state Future<Standalone<VectorRef<KeyRangeRef>>> blobbifiedBegin =
|
||||
getBlobRanges(&tr, KeyRangeRef(purgeRange.begin, purgeRange.begin), 2);
|
||||
state Future<Standalone<VectorRef<KeyRangeRef>>> blobbifiedEnd =
|
||||
getBlobRanges(&tr, KeyRangeRef(purgeRange.end, purgeRange.end), 2);
|
||||
wait(success(blobbifiedBegin) && success(blobbifiedEnd));
|
||||
if ((!blobbifiedBegin.get().empty() && blobbifiedBegin.get().front().begin < purgeRange.begin) ||
|
||||
(!blobbifiedEnd.get().empty() && blobbifiedEnd.get().back().end > purgeRange.end)) {
|
||||
TraceEvent("UnalignedPurge")
|
||||
.detail("Range", range)
|
||||
.detail("Version", purgeVersion)
|
||||
|
@ -10686,46 +10798,17 @@ Future<Void> DatabaseContext::waitPurgeGranulesComplete(Key purgeKey) {
|
|||
return waitPurgeGranulesCompleteActor(Reference<DatabaseContext>::addRef(this), purgeKey);
|
||||
}
|
||||
|
||||
ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> getBlobRanges(Reference<ReadYourWritesTransaction> tr,
|
||||
KeyRange range,
|
||||
int batchLimit) {
|
||||
state Standalone<VectorRef<KeyRangeRef>> blobRanges;
|
||||
state Key beginKey = range.begin;
|
||||
|
||||
loop {
|
||||
try {
|
||||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
|
||||
state RangeResult results = wait(
|
||||
krmGetRangesUnaligned(tr, blobRangeKeys.begin, KeyRangeRef(beginKey, range.end), 2 * batchLimit + 2));
|
||||
|
||||
blobRanges.arena().dependsOn(results.arena());
|
||||
for (int i = 0; i < results.size() - 1; i++) {
|
||||
if (results[i].value == blobRangeActive) {
|
||||
blobRanges.push_back(blobRanges.arena(), KeyRangeRef(results[i].key, results[i + 1].key));
|
||||
}
|
||||
if (blobRanges.size() == batchLimit) {
|
||||
return blobRanges;
|
||||
}
|
||||
}
|
||||
|
||||
if (!results.more) {
|
||||
return blobRanges;
|
||||
}
|
||||
beginKey = results.back().key;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ACTOR Future<bool> setBlobRangeActor(Reference<DatabaseContext> cx,
|
||||
KeyRange range,
|
||||
bool active,
|
||||
Optional<TenantName> tenantName) {
|
||||
state Database db(cx);
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(db);
|
||||
state bool loadedTenantEntry = false;
|
||||
|
||||
if (tenantName.present()) {
|
||||
TenantMapEntry tme = wait(blobLoadTenantMapEntry(&db, range.begin, tenantName));
|
||||
range = range.withPrefix(tme.prefix);
|
||||
}
|
||||
|
||||
state Value value = active ? blobRangeActive : blobRangeInactive;
|
||||
loop {
|
||||
|
@ -10733,14 +10816,7 @@ ACTOR Future<bool> setBlobRangeActor(Reference<DatabaseContext> cx,
|
|||
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
||||
tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
||||
|
||||
if (tenantName.present() && !loadedTenantEntry) {
|
||||
TenantMapEntry tenantEntry =
|
||||
wait(blobGranuleGetTenantEntry(&tr->getTransaction(), range.begin, tenantName));
|
||||
loadedTenantEntry = true;
|
||||
range = range.withPrefix(tenantEntry.prefix);
|
||||
}
|
||||
|
||||
Standalone<VectorRef<KeyRangeRef>> startBlobRanges = wait(getBlobRanges(tr, range, 1));
|
||||
Standalone<VectorRef<KeyRangeRef>> startBlobRanges = wait(getBlobRanges(&tr->getTransaction(), range, 1));
|
||||
|
||||
if (active) {
|
||||
// Idempotent request.
|
||||
|
@ -10788,23 +10864,26 @@ ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRangesActor(Refer
|
|||
KeyRange range,
|
||||
int rangeLimit,
|
||||
Optional<TenantName> tenantName) {
|
||||
|
||||
state Database db(cx);
|
||||
state Reference<ReadYourWritesTransaction> tr = makeReference<ReadYourWritesTransaction>(db);
|
||||
state Transaction tr(db);
|
||||
state TenantMapEntry tme;
|
||||
state Standalone<VectorRef<KeyRangeRef>> blobRanges;
|
||||
|
||||
if (tenantName.present()) {
|
||||
wait(store(tme, blobLoadTenantMapEntry(&db, range.begin, tenantName)));
|
||||
range = range.withPrefix(tme.prefix);
|
||||
}
|
||||
|
||||
loop {
|
||||
try {
|
||||
if (tenantName.present()) {
|
||||
wait(store(tme, blobGranuleGetTenantEntry(&tr->getTransaction(), range.begin, tenantName)));
|
||||
range = range.withPrefix(tme.prefix);
|
||||
}
|
||||
wait(store(blobRanges, getBlobRanges(&tr, range, rangeLimit)));
|
||||
break;
|
||||
} catch (Error& e) {
|
||||
wait(tr->onError(e));
|
||||
wait(tr.onError(e));
|
||||
}
|
||||
}
|
||||
|
||||
state Standalone<VectorRef<KeyRangeRef>> blobRanges = wait(getBlobRanges(tr, range, rangeLimit));
|
||||
if (!tenantName.present()) {
|
||||
return blobRanges;
|
||||
}
|
||||
|
@ -10826,9 +10905,9 @@ ACTOR Future<Standalone<VectorRef<KeyRangeRef>>> listBlobbifiedRangesActor(Refer
|
|||
}
|
||||
|
||||
Future<Standalone<VectorRef<KeyRangeRef>>> DatabaseContext::listBlobbifiedRanges(KeyRange range,
|
||||
int rowLimit,
|
||||
int rangeLimit,
|
||||
Optional<TenantName> tenantName) {
|
||||
return listBlobbifiedRangesActor(Reference<DatabaseContext>::addRef(this), range, rowLimit, tenantName);
|
||||
return listBlobbifiedRangesActor(Reference<DatabaseContext>::addRef(this), range, rangeLimit, tenantName);
|
||||
}
|
||||
|
||||
int64_t getMaxKeySize(KeyRef const& key) {
|
||||
|
|
|
@ -42,7 +42,7 @@ ACTOR static Future<Void> produce(ParallelStream<ParallelStreamTest::TestValue>:
|
|||
}
|
||||
|
||||
ACTOR static Future<Void> consume(FutureStream<ParallelStreamTest::TestValue> stream, int expected) {
|
||||
state int next;
|
||||
state int next = 0;
|
||||
try {
|
||||
loop {
|
||||
ParallelStreamTest::TestValue value = waitNext(stream);
|
||||
|
|
|
@ -564,6 +564,10 @@ Version PaxosConfigTransaction::getCommittedVersion() const {
|
|||
return impl->getCommittedVersion();
|
||||
}
|
||||
|
||||
int64_t PaxosConfigTransaction::getTotalCost() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t PaxosConfigTransaction::getApproximateSize() const {
|
||||
return impl->getApproximateSize();
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
#include "flow/Hostname.h"
|
||||
#include "flow/UnitTest.h"
|
||||
#include "rapidxml/rapidxml.hpp"
|
||||
#ifdef BUILD_AWS_BACKUP
|
||||
#ifdef WITH_AWS_BACKUP
|
||||
#include "fdbclient/FDBAWSCredentialsProvider.h"
|
||||
#endif
|
||||
|
||||
|
@ -88,6 +88,7 @@ S3BlobStoreEndpoint::BlobKnobs::BlobKnobs() {
|
|||
concurrent_lists = CLIENT_KNOBS->BLOBSTORE_CONCURRENT_LISTS;
|
||||
concurrent_reads_per_file = CLIENT_KNOBS->BLOBSTORE_CONCURRENT_READS_PER_FILE;
|
||||
concurrent_writes_per_file = CLIENT_KNOBS->BLOBSTORE_CONCURRENT_WRITES_PER_FILE;
|
||||
enable_read_cache = CLIENT_KNOBS->BLOBSTORE_ENABLE_READ_CACHE;
|
||||
read_block_size = CLIENT_KNOBS->BLOBSTORE_READ_BLOCK_SIZE;
|
||||
read_ahead_blocks = CLIENT_KNOBS->BLOBSTORE_READ_AHEAD_BLOCKS;
|
||||
read_cache_blocks_per_file = CLIENT_KNOBS->BLOBSTORE_READ_CACHE_BLOCKS_PER_FILE;
|
||||
|
@ -125,6 +126,7 @@ bool S3BlobStoreEndpoint::BlobKnobs::set(StringRef name, int value) {
|
|||
TRY_PARAM(concurrent_lists, cl);
|
||||
TRY_PARAM(concurrent_reads_per_file, crpf);
|
||||
TRY_PARAM(concurrent_writes_per_file, cwpf);
|
||||
TRY_PARAM(enable_read_cache, erc);
|
||||
TRY_PARAM(read_block_size, rbs);
|
||||
TRY_PARAM(read_ahead_blocks, rab);
|
||||
TRY_PARAM(read_cache_blocks_per_file, rcb);
|
||||
|
@ -162,6 +164,7 @@ std::string S3BlobStoreEndpoint::BlobKnobs::getURLParameters() const {
|
|||
_CHECK_PARAM(concurrent_lists, cl);
|
||||
_CHECK_PARAM(concurrent_reads_per_file, crpf);
|
||||
_CHECK_PARAM(concurrent_writes_per_file, cwpf);
|
||||
_CHECK_PARAM(enable_read_cache, erc);
|
||||
_CHECK_PARAM(read_block_size, rbs);
|
||||
_CHECK_PARAM(read_ahead_blocks, rab);
|
||||
_CHECK_PARAM(read_cache_blocks_per_file, rcb);
|
||||
|
@ -615,7 +618,7 @@ ACTOR Future<Optional<json_spirit::mObject>> tryReadJSONFile(std::string path) {
|
|||
// If the credentials expire, the connection will eventually fail and be discarded from the pool, and then a new
|
||||
// connection will be constructed, which will call this again to get updated credentials
|
||||
static S3BlobStoreEndpoint::Credentials getSecretSdk() {
|
||||
#ifdef BUILD_AWS_BACKUP
|
||||
#ifdef WITH_AWS_BACKUP
|
||||
double elapsed = -timer_monotonic();
|
||||
Aws::Auth::AWSCredentials awsCreds = FDBAWSCredentialsProvider::getAwsCredentials();
|
||||
elapsed += timer_monotonic();
|
||||
|
|
|
@ -115,6 +115,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( ENABLE_DETAILED_TLOG_POP_TRACE, false ); if ( randomize && BUGGIFY ) ENABLE_DETAILED_TLOG_POP_TRACE = true;
|
||||
init( PEEK_BATCHING_EMPTY_MSG, false ); if ( randomize && BUGGIFY ) PEEK_BATCHING_EMPTY_MSG = true;
|
||||
init( PEEK_BATCHING_EMPTY_MSG_INTERVAL, 0.001 ); if ( randomize && BUGGIFY ) PEEK_BATCHING_EMPTY_MSG_INTERVAL = 0.01;
|
||||
init( POP_FROM_LOG_DELAY, 1 ); if ( randomize && BUGGIFY ) POP_FROM_LOG_DELAY = 0;
|
||||
|
||||
// disk snapshot max timeout, to be put in TLog, storage and coordinator nodes
|
||||
init( MAX_FORKED_PROCESS_OUTPUT, 1024 );
|
||||
|
@ -295,7 +296,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( DD_STORAGE_WIGGLE_PAUSE_THRESHOLD, 10 ); if( randomize && BUGGIFY ) DD_STORAGE_WIGGLE_PAUSE_THRESHOLD = 1000;
|
||||
init( DD_STORAGE_WIGGLE_STUCK_THRESHOLD, 20 );
|
||||
init( DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC, isSimulated ? 2 : 21 * 60 * 60 * 24 ); if(randomize && BUGGIFY) DD_STORAGE_WIGGLE_MIN_SS_AGE_SEC = isSimulated ? 0: 120;
|
||||
init( DD_TENANT_AWARENESS_ENABLED, false );
|
||||
init( DD_TENANT_AWARENESS_ENABLED, false ); if(isSimulated) DD_TENANT_AWARENESS_ENABLED = deterministicRandom()->coinflip();
|
||||
init( TENANT_CACHE_LIST_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_LIST_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
init( TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL, 2 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_USAGE_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
init( TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL, 10 ); if( randomize && BUGGIFY ) TENANT_CACHE_STORAGE_QUOTA_REFRESH_INTERVAL = deterministicRandom()->randomInt(1, 10);
|
||||
|
@ -376,6 +377,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( REPLACE_CONTENTS_BYTES, 1e5 );
|
||||
|
||||
// KeyValueStoreRocksDB
|
||||
init( ROCKSDB_SET_READ_TIMEOUT, !isSimulated );
|
||||
init( ROCKSDB_LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES, true ); if( randomize && BUGGIFY ) ROCKSDB_LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES = false;
|
||||
init( ROCKSDB_SUGGEST_COMPACT_CLEAR_RANGE, true ); if( randomize && BUGGIFY ) ROCKSDB_SUGGEST_COMPACT_CLEAR_RANGE = false;
|
||||
init( ROCKSDB_READ_RANGE_ROW_LIMIT, 65535 ); if( randomize && BUGGIFY ) ROCKSDB_READ_RANGE_ROW_LIMIT = deterministicRandom()->randomInt(2, 10);
|
||||
|
@ -383,6 +385,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( ROCKSDB_WRITER_THREAD_PRIORITY, 0 );
|
||||
init( ROCKSDB_BACKGROUND_PARALLELISM, 4 );
|
||||
init( ROCKSDB_READ_PARALLELISM, 4 );
|
||||
// If true, do not process and store RocksDB logs
|
||||
init( ROCKSDB_MUTE_LOGS, false );
|
||||
// Use a smaller memtable in simulation to avoid OOMs.
|
||||
int64_t memtableBytes = isSimulated ? 32 * 1024 : 512 * 1024 * 1024;
|
||||
init( ROCKSDB_MEMTABLE_BYTES, memtableBytes );
|
||||
|
@ -404,7 +408,9 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( ROCKSDB_FETCH_QUEUE_SOFT_MAX, 50 );
|
||||
init( ROCKSDB_HISTOGRAMS_SAMPLE_RATE, 0.001 ); if( randomize && BUGGIFY ) ROCKSDB_HISTOGRAMS_SAMPLE_RATE = 0;
|
||||
init( ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME, 30.0 ); if( randomize && BUGGIFY ) ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME = 0.1;
|
||||
init( ROCKSDB_READ_RANGE_REUSE_ITERATORS, true ); if( randomize && BUGGIFY ) ROCKSDB_READ_RANGE_REUSE_ITERATORS = deterministicRandom()->coinflip() ? true : false;
|
||||
init( ROCKSDB_READ_RANGE_REUSE_ITERATORS, true ); if( randomize && BUGGIFY ) ROCKSDB_READ_RANGE_REUSE_ITERATORS = deterministicRandom()->coinflip();
|
||||
init( ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS, false ); if( randomize && BUGGIFY ) ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS = deterministicRandom()->coinflip();
|
||||
init( ROCKSDB_READ_RANGE_BOUNDED_ITERATORS_MAX_LIMIT, 200 );
|
||||
// Set to 0 to disable rocksdb write rate limiting. Rate limiter unit: bytes per second.
|
||||
init( ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC, 0 );
|
||||
// If true, enables dynamic adjustment of ROCKSDB_WRITE_RATE_LIMITER_BYTES according to the recent demand of background IO.
|
||||
|
@ -412,7 +418,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY, "fdb");
|
||||
init( ROCKSDB_DISABLE_AUTO_COMPACTIONS, false ); // RocksDB default
|
||||
|
||||
init( ROCKSDB_PERFCONTEXT_ENABLE, false ); if( randomize && BUGGIFY ) ROCKSDB_PERFCONTEXT_ENABLE = deterministicRandom()->coinflip() ? false : true;
|
||||
init( ROCKSDB_PERFCONTEXT_ENABLE, false ); if( randomize && BUGGIFY ) ROCKSDB_PERFCONTEXT_ENABLE = deterministicRandom()->coinflip();
|
||||
init( ROCKSDB_PERFCONTEXT_SAMPLE_RATE, 0.0001 );
|
||||
init( ROCKSDB_METRICS_SAMPLE_INTERVAL, 0.0);
|
||||
init( ROCKSDB_MAX_SUBCOMPACTIONS, 2 );
|
||||
|
@ -422,10 +428,13 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
// Enable this knob only for experminatal purpose, never enable this in production.
|
||||
// If enabled, all the committed in-memory memtable writes are lost on a crash.
|
||||
init( ROCKSDB_DISABLE_WAL_EXPERIMENTAL, false );
|
||||
// If ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE is enabled, disable ENABLE_CLEAR_RANGE_EAGER_READS knob.
|
||||
// If ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE is enabled, disable ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS knob.
|
||||
// These knobs have contrary functionality.
|
||||
init( ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE, false ); if( randomize && BUGGIFY ) ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE = deterministicRandom()->coinflip() ? false : true;
|
||||
init( ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE, false ); if( randomize && BUGGIFY ) ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE = deterministicRandom()->coinflip();
|
||||
init( ROCKSDB_SINGLEKEY_DELETES_BYTES_LIMIT, 200000 ); // 200KB
|
||||
init( ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS, true ); if( randomize && BUGGIFY ) ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS = deterministicRandom()->coinflip();
|
||||
// ROCKSDB_STATS_LEVEL=1 indicates rocksdb::StatsLevel::kExceptHistogramOrTimers
|
||||
init( ROCKSDB_STATS_LEVEL, 1 ); if( randomize && BUGGIFY ) ROCKSDB_STATS_LEVEL = deterministicRandom()->randomInt(0, 6);
|
||||
// Can commit will delay ROCKSDB_CAN_COMMIT_DELAY_ON_OVERLOAD seconds for
|
||||
// ROCKSDB_CAN_COMMIT_DELAY_TIMES_ON_OVERLOAD times, if rocksdb overloaded.
|
||||
// Set ROCKSDB_CAN_COMMIT_DELAY_TIMES_ON_OVERLOAD to 0, to disable
|
||||
|
@ -731,9 +740,10 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( ENFORCE_TAG_THROTTLING_ON_PROXIES, GLOBAL_TAG_THROTTLING );
|
||||
init( GLOBAL_TAG_THROTTLING_MIN_RATE, 1.0 );
|
||||
init( GLOBAL_TAG_THROTTLING_FOLDING_TIME, 10.0 );
|
||||
init( GLOBAL_TAG_THROTTLING_RW_FUNGIBILITY_RATIO, 5.0 );
|
||||
init( GLOBAL_TAG_THROTTLING_MAX_TAGS_TRACKED, 10 );
|
||||
init( GLOBAL_TAG_THROTTLING_TAG_EXPIRE_AFTER, 240.0 );
|
||||
init( PROXY_MAX_TAG_THROTTLE_DURATION, 5.0 ); if( randomize && BUGGIFY ) PROXY_MAX_TAG_THROTTLE_DURATION = 0.5;
|
||||
init( GLOBAL_TAG_THROTTLING_PROXY_LOGGING_INTERVAL, 60.0 );
|
||||
|
||||
//Storage Metrics
|
||||
init( STORAGE_METRICS_AVERAGE_INTERVAL, 120.0 );
|
||||
|
@ -761,8 +771,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( FETCH_KEYS_PARALLELISM_FULL, 6 );
|
||||
init( FETCH_KEYS_LOWER_PRIORITY, 0 );
|
||||
init( SERVE_FETCH_CHECKPOINT_PARALLELISM, 4 );
|
||||
init( SERVE_AUDIT_STORAGE_PARALLELISM, 2 );
|
||||
init( CHANGE_FEED_DISK_READS_PARALLELISM, 1000 ); if( randomize && BUGGIFY ) CHANGE_FEED_DISK_READS_PARALLELISM = 20;
|
||||
init( SERVE_AUDIT_STORAGE_PARALLELISM, 1 );
|
||||
init( BUGGIFY_BLOCK_BYTES, 10000 );
|
||||
init( STORAGE_RECOVERY_VERSION_LAG_LIMIT, 2 * MAX_READ_TRANSACTION_LIFE_VERSIONS );
|
||||
init( STORAGE_COMMIT_BYTES, 10000000 ); if( randomize && BUGGIFY ) STORAGE_COMMIT_BYTES = 2000000;
|
||||
|
@ -801,6 +810,10 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( QUICK_GET_KEY_VALUES_LIMIT, 2000 );
|
||||
init( QUICK_GET_KEY_VALUES_LIMIT_BYTES, 1e7 );
|
||||
init( STORAGE_FEED_QUERY_HARD_LIMIT, 100000 );
|
||||
init( STORAGE_SERVER_READ_CONCURRENCY, 70 );
|
||||
// Priorities which each ReadType maps to, in enumeration order
|
||||
init( STORAGESERVER_READ_RANKS, "0,2,1,1,1" );
|
||||
init( STORAGESERVER_READ_PRIORITIES, "48,32,8" );
|
||||
|
||||
//Wait Failure
|
||||
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
||||
|
@ -912,7 +925,6 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( REDWOOD_DEFAULT_EXTENT_SIZE, 32 * 1024 * 1024 );
|
||||
init( REDWOOD_DEFAULT_EXTENT_READ_SIZE, 1024 * 1024 );
|
||||
init( REDWOOD_EXTENT_CONCURRENT_READS, 4 );
|
||||
init( REDWOOD_KVSTORE_CONCURRENT_READS, 64 );
|
||||
init( REDWOOD_KVSTORE_RANGE_PREFETCH, true );
|
||||
init( REDWOOD_PAGE_REBUILD_MAX_SLACK, 0.33 );
|
||||
init( REDWOOD_LAZY_CLEAR_BATCH_SIZE_PAGES, 10 );
|
||||
|
@ -925,6 +937,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( REDWOOD_HISTOGRAM_INTERVAL, 30.0 );
|
||||
init( REDWOOD_EVICT_UPDATED_PAGES, true ); if( randomize && BUGGIFY ) { REDWOOD_EVICT_UPDATED_PAGES = false; }
|
||||
init( REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT, 2 ); if( randomize && BUGGIFY ) { REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT = deterministicRandom()->randomInt(1, 7); }
|
||||
init( REDWOOD_PRIORITY_LAUNCHS, "32,32,32,32" );
|
||||
init( REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT, false );
|
||||
|
||||
// Server request latency measurement
|
||||
|
@ -939,9 +952,9 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( ENCRYPTION_MODE, "AES-256-CTR" );
|
||||
init( SIM_KMS_MAX_KEYS, 4096 );
|
||||
init( ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH, 100000 );
|
||||
init( ENABLE_TLOG_ENCRYPTION, ENABLE_ENCRYPTION ); if ( randomize && BUGGIFY && ENABLE_ENCRYPTION && !PROXY_USE_RESOLVER_PRIVATE_MUTATIONS ) ENABLE_TLOG_ENCRYPTION = true;
|
||||
init( ENABLE_STORAGE_SERVER_ENCRYPTION, ENABLE_ENCRYPTION ); if ( randomize && BUGGIFY) ENABLE_STORAGE_SERVER_ENCRYPTION = !ENABLE_STORAGE_SERVER_ENCRYPTION;
|
||||
init( ENABLE_BLOB_GRANULE_ENCRYPTION, ENABLE_ENCRYPTION ); if ( randomize && BUGGIFY) ENABLE_BLOB_GRANULE_ENCRYPTION = !ENABLE_BLOB_GRANULE_ENCRYPTION;
|
||||
init( ENABLE_TLOG_ENCRYPTION, ENABLE_ENCRYPTION ); if ( randomize && BUGGIFY && ENABLE_ENCRYPTION ) ENABLE_TLOG_ENCRYPTION = false;
|
||||
init( ENABLE_STORAGE_SERVER_ENCRYPTION, ENABLE_ENCRYPTION ); if ( randomize && BUGGIFY && ENABLE_ENCRYPTION) ENABLE_STORAGE_SERVER_ENCRYPTION = false;
|
||||
init( ENABLE_BLOB_GRANULE_ENCRYPTION, ENABLE_ENCRYPTION ); if ( randomize && BUGGIFY && ENABLE_ENCRYPTION) ENABLE_BLOB_GRANULE_ENCRYPTION = false;
|
||||
|
||||
// encrypt key proxy
|
||||
init( ENABLE_BLOB_GRANULE_COMPRESSION, false ); if ( randomize && BUGGIFY ) { ENABLE_BLOB_GRANULE_COMPRESSION = deterministicRandom()->coinflip(); }
|
||||
|
@ -951,7 +964,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( KMS_CONNECTOR_TYPE, "RESTKmsConnector" );
|
||||
|
||||
// Blob granlues
|
||||
init( BG_URL, isSimulated ? "file://fdbblob/" : "" ); // TODO: store in system key space or something, eventually
|
||||
init( BG_URL, isSimulated ? "file://simfdb/fdbblob/" : "" ); // TODO: store in system key space or something, eventually
|
||||
bool buggifyMediumGranules = simulationMediumShards || (randomize && BUGGIFY);
|
||||
// BlobGranuleVerify* simulation tests use "knobs", BlobGranuleCorrectness* use "tenant", default in real clusters is "knobs"
|
||||
init( BG_METADATA_SOURCE, "knobs" );
|
||||
|
@ -967,6 +980,9 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( BG_CONSISTENCY_CHECK_ENABLED, true ); if (randomize && BUGGIFY) BG_CONSISTENCY_CHECK_ENABLED = false;
|
||||
init( BG_CONSISTENCY_CHECK_TARGET_SPEED_KB, 1000 ); if (randomize && BUGGIFY) BG_CONSISTENCY_CHECK_TARGET_SPEED_KB *= (deterministicRandom()->randomInt(2, 50) / 10);
|
||||
init( BG_KEY_TUPLE_TRUNCATE_OFFSET, 0 );
|
||||
init( BG_ENABLE_READ_DRIVEN_COMPACTION, true ); if (randomize && BUGGIFY) BG_ENABLE_READ_DRIVEN_COMPACTION = false;
|
||||
init( BG_RDC_BYTES_FACTOR, 2 ); if (randomize && BUGGIFY) BG_RDC_BYTES_FACTOR = deterministicRandom()->randomInt(1, 10);
|
||||
init( BG_RDC_READ_FACTOR, 3 ); if (randomize && BUGGIFY) BG_RDC_READ_FACTOR = deterministicRandom()->randomInt(1, 10);
|
||||
|
||||
init( BG_ENABLE_MERGING, true ); if (randomize && BUGGIFY) BG_ENABLE_MERGING = false;
|
||||
init( BG_MERGE_CANDIDATE_THRESHOLD_SECONDS, isSimulated ? 20.0 : 30 * 60 ); if (randomize && BUGGIFY) BG_MERGE_CANDIDATE_THRESHOLD_SECONDS = 5.0;
|
||||
|
@ -975,6 +991,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( BLOB_WORKER_INITIAL_SNAPSHOT_PARALLELISM, 8 ); if( randomize && BUGGIFY ) BLOB_WORKER_INITIAL_SNAPSHOT_PARALLELISM = 1;
|
||||
init( BLOB_WORKER_RESNAPSHOT_PARALLELISM, 40 ); if( randomize && BUGGIFY ) BLOB_WORKER_RESNAPSHOT_PARALLELISM = deterministicRandom()->randomInt(1, 10);
|
||||
init( BLOB_WORKER_DELTA_FILE_WRITE_PARALLELISM, 2000 ); if( randomize && BUGGIFY ) BLOB_WORKER_DELTA_FILE_WRITE_PARALLELISM = deterministicRandom()->randomInt(10, 100);
|
||||
init( BLOB_WORKER_RDC_PARALLELISM, 2 ); if( randomize && BUGGIFY ) BLOB_WORKER_RDC_PARALLELISM = deterministicRandom()->randomInt(1, 6);
|
||||
|
||||
init( BLOB_WORKER_TIMEOUT, 10.0 ); if( randomize && BUGGIFY ) BLOB_WORKER_TIMEOUT = 1.0;
|
||||
init( BLOB_WORKER_REQUEST_TIMEOUT, 5.0 ); if( randomize && BUGGIFY ) BLOB_WORKER_REQUEST_TIMEOUT = 1.0;
|
||||
init( BLOB_WORKERLIST_FETCH_INTERVAL, 1.0 );
|
||||
|
@ -990,6 +1008,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
init( BLOB_MANIFEST_BACKUP, false );
|
||||
init( BLOB_MANIFEST_BACKUP_INTERVAL, isSimulated ? 5.0 : 30.0 );
|
||||
init( BLOB_FULL_RESTORE_MODE, false );
|
||||
init( BLOB_MIGRATOR_CHECK_INTERVAL, isSimulated ? 1.0 : 5.0);
|
||||
|
||||
init( BGCC_TIMEOUT, isSimulated ? 10.0 : 120.0 );
|
||||
init( BGCC_MIN_INTERVAL, isSimulated ? 1.0 : 10.0 );
|
||||
|
@ -997,8 +1016,6 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
// Blob Metadata
|
||||
init( BLOB_METADATA_CACHE_TTL, isSimulated ? 120 : 24 * 60 * 60 );
|
||||
if ( randomize && BUGGIFY) { BLOB_METADATA_CACHE_TTL = deterministicRandom()->randomInt(50, 100); }
|
||||
init( BLOB_METADATA_REFRESH_INTERVAL, isSimulated ? 60 : 60 * 60 );
|
||||
if ( randomize && BUGGIFY) { BLOB_METADATA_REFRESH_INTERVAL = deterministicRandom()->randomInt(5, 120); }
|
||||
|
||||
// HTTP KMS Connector
|
||||
init( REST_KMS_CONNECTOR_KMS_DISCOVERY_URL_MODE, "file");
|
||||
|
@ -1019,6 +1036,10 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
|
|||
// NOTE: 'token-name" can NOT contain '#' character
|
||||
init( REST_KMS_CONNECTOR_VALIDATION_TOKEN_DETAILS, "");
|
||||
|
||||
// Drop in-memory state associated with an idempotency id after this many seconds. Once dropped, this id cannot be
|
||||
// expired proactively, but will eventually get cleaned up by the idempotency id cleaner.
|
||||
init( IDEMPOTENCY_ID_IN_MEMORY_LIFETIME, 10);
|
||||
|
||||
// clang-format on
|
||||
|
||||
if (clientKnobs) {
|
||||
|
|
|
@ -296,6 +296,10 @@ Version SimpleConfigTransaction::getCommittedVersion() const {
|
|||
return impl->getCommittedVersion();
|
||||
}
|
||||
|
||||
int64_t SimpleConfigTransaction::getTotalCost() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t SimpleConfigTransaction::getApproximateSize() const {
|
||||
return impl->getApproximateSize();
|
||||
}
|
||||
|
|
|
@ -284,8 +284,6 @@ const KeyRangeRef readConflictRangeKeysRange =
|
|||
const KeyRangeRef writeConflictRangeKeysRange = KeyRangeRef("\xff\xff/transaction/write_conflict_range/"_sr,
|
||||
"\xff\xff/transaction/write_conflict_range/\xff\xff"_sr);
|
||||
|
||||
const KeyRef clusterIdKey = "\xff/clusterId"_sr;
|
||||
|
||||
const KeyRangeRef auditRange = KeyRangeRef("\xff/audit/"_sr, "\xff/audit0"_sr);
|
||||
const KeyRef auditPrefix = auditRange.begin;
|
||||
|
||||
|
@ -1074,6 +1072,11 @@ const KeyRangeRef timeKeeperPrefixRange("\xff\x02/timeKeeper/map/"_sr, "\xff\x02
|
|||
const KeyRef timeKeeperVersionKey = "\xff\x02/timeKeeper/version"_sr;
|
||||
const KeyRef timeKeeperDisableKey = "\xff\x02/timeKeeper/disable"_sr;
|
||||
|
||||
// Durable cluster ID key. Added "Key" to the end to differentiate from the key
|
||||
// "\xff/clusterId" which was stored in the txnStateStore in FDB 7.1, whereas
|
||||
// this key is stored in the database in 7.2+.
|
||||
const KeyRef clusterIdKey = "\xff/clusterIdKey"_sr;
|
||||
|
||||
// Backup Log Mutation constant variables
|
||||
const KeyRef backupEnabledKey = "\xff/backupEnabled"_sr;
|
||||
const KeyRangeRef backupLogKeys("\xff\x02/blog/"_sr, "\xff\x02/blog0"_sr);
|
||||
|
@ -1810,4 +1813,4 @@ TEST_CASE("noSim/SystemData/compat/KeyServers") {
|
|||
printf("ssi serdes test complete\n");
|
||||
|
||||
return Void();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -626,6 +626,14 @@ ThreadFuture<SpanContext> ThreadSafeTransaction::getSpanContext() {
|
|||
});
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> ThreadSafeTransaction::getTotalCost() {
|
||||
ISingleThreadTransaction* tr = this->tr;
|
||||
return onMainThread([tr]() -> Future<int64_t> {
|
||||
tr->checkDeferredError();
|
||||
return tr->getTotalCost();
|
||||
});
|
||||
}
|
||||
|
||||
ThreadFuture<int64_t> ThreadSafeTransaction::getApproximateSize() {
|
||||
ISingleThreadTransaction* tr = this->tr;
|
||||
return onMainThread([tr]() -> Future<int64_t> {
|
||||
|
@ -735,10 +743,10 @@ void ThreadSafeApi::runNetwork() {
|
|||
Optional<Error> runErr;
|
||||
try {
|
||||
::runNetwork();
|
||||
} catch (Error& e) {
|
||||
} catch (const Error& e) {
|
||||
TraceEvent(SevError, "RunNetworkError").error(e);
|
||||
runErr = e;
|
||||
} catch (std::exception& e) {
|
||||
} catch (const std::exception& e) {
|
||||
runErr = unknown_error();
|
||||
TraceEvent(SevError, "RunNetworkError").error(unknown_error()).detail("RootException", e.what());
|
||||
} catch (...) {
|
||||
|
@ -749,9 +757,9 @@ void ThreadSafeApi::runNetwork() {
|
|||
for (auto& hook : threadCompletionHooks) {
|
||||
try {
|
||||
hook.first(hook.second);
|
||||
} catch (Error& e) {
|
||||
} catch (const Error& e) {
|
||||
TraceEvent(SevError, "NetworkShutdownHookError").error(e);
|
||||
} catch (std::exception& e) {
|
||||
} catch (const std::exception& e) {
|
||||
TraceEvent(SevError, "NetworkShutdownHookError").error(unknown_error()).detail("RootException", e.what());
|
||||
} catch (...) {
|
||||
TraceEvent(SevError, "NetworkShutdownHookError").error(unknown_error());
|
||||
|
@ -759,12 +767,10 @@ void ThreadSafeApi::runNetwork() {
|
|||
}
|
||||
|
||||
if (runErr.present()) {
|
||||
closeTraceFile();
|
||||
throw runErr.get();
|
||||
}
|
||||
|
||||
TraceEvent("RunNetworkTerminating");
|
||||
closeTraceFile();
|
||||
}
|
||||
|
||||
void ThreadSafeApi::stopNetwork() {
|
||||
|
|
|
@ -196,6 +196,7 @@ public:
|
|||
Key addPrefix = Key(),
|
||||
Key removePrefix = Key(),
|
||||
LockDB = LockDB::True,
|
||||
UnlockDB = UnlockDB::True,
|
||||
OnlyApplyMutationLogs = OnlyApplyMutationLogs::False,
|
||||
InconsistentSnapshotOnly = InconsistentSnapshotOnly::False,
|
||||
Version beginVersion = ::invalidVersion,
|
||||
|
|
|
@ -103,6 +103,7 @@ public:
|
|||
Counter latestCipherKeyCacheNeedsRefresh;
|
||||
LatencySample getCipherKeysLatency;
|
||||
LatencySample getLatestCipherKeysLatency;
|
||||
LatencySample getBlobMetadataLatency;
|
||||
std::array<CounterSet, int(UsageType::MAX)> counterSets;
|
||||
};
|
||||
|
||||
|
|
|
@ -91,4 +91,8 @@ struct BlobMetadataDetailsRef {
|
|||
}
|
||||
};
|
||||
|
||||
Standalone<BlobMetadataDetailsRef> createRandomTestBlobMetadata(const std::string& baseUrl,
|
||||
BlobMetadataDomainId domainId,
|
||||
BlobMetadataDomainName domainName);
|
||||
|
||||
#endif
|
|
@ -45,6 +45,7 @@ struct BlobWorkerStats {
|
|||
Counter compressionBytesFinal;
|
||||
Counter fullRejections;
|
||||
Counter forceFlushCleanups;
|
||||
Counter readDrivenCompactions;
|
||||
|
||||
int numRangesAssigned;
|
||||
int mutationBytesBuffered;
|
||||
|
@ -83,10 +84,11 @@ struct BlobWorkerStats {
|
|||
readRequestsWithBegin("ReadRequestsWithBegin", cc), readRequestsCollapsed("ReadRequestsCollapsed", cc),
|
||||
flushGranuleReqs("FlushGranuleReqs", cc), compressionBytesRaw("CompressionBytesRaw", cc),
|
||||
compressionBytesFinal("CompressionBytesFinal", cc), fullRejections("FullRejections", cc),
|
||||
forceFlushCleanups("ForceFlushCleanups", cc), numRangesAssigned(0), mutationBytesBuffered(0),
|
||||
activeReadRequests(0), granulesPendingSplitCheck(0), minimumCFVersion(0), cfVersionLag(0),
|
||||
notAtLatestChangeFeeds(0), lastResidentMemory(0), estimatedMaxResidentMemory(0),
|
||||
initialSnapshotLock(initialSnapshotLock), resnapshotLock(resnapshotLock), deltaWritesLock(deltaWritesLock) {
|
||||
forceFlushCleanups("ForceFlushCleanups", cc), readDrivenCompactions("ReadDrivenCompactions", cc),
|
||||
numRangesAssigned(0), mutationBytesBuffered(0), activeReadRequests(0), granulesPendingSplitCheck(0),
|
||||
minimumCFVersion(0), cfVersionLag(0), notAtLatestChangeFeeds(0), lastResidentMemory(0),
|
||||
estimatedMaxResidentMemory(0), initialSnapshotLock(initialSnapshotLock), resnapshotLock(resnapshotLock),
|
||||
deltaWritesLock(deltaWritesLock) {
|
||||
specialCounter(cc, "NumRangesAssigned", [this]() { return this->numRangesAssigned; });
|
||||
specialCounter(cc, "MutationBytesBuffered", [this]() { return this->mutationBytesBuffered; });
|
||||
specialCounter(cc, "ActiveReadRequests", [this]() { return this->activeReadRequests; });
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#define FDBCLIENT_BUILD_IDEMPOTENCY_ID_MUTATIONS_H
|
||||
|
||||
#include "fdbclient/CommitProxyInterface.h"
|
||||
#include "fdbclient/IdempotencyId.h"
|
||||
#include "fdbclient/IdempotencyId.actor.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
|
|
|
@ -199,7 +199,6 @@ public:
|
|||
int32_t DEFAULT_MAX_GRV_PROXIES;
|
||||
int32_t DEFAULT_AUTO_RESOLVERS;
|
||||
int32_t DEFAULT_AUTO_LOGS;
|
||||
bool DELETE_NATIVE_LIB_AFTER_LOADING;
|
||||
|
||||
double GLOBAL_CONFIG_REFRESH_BACKOFF;
|
||||
double GLOBAL_CONFIG_REFRESH_MAX_BACKOFF;
|
||||
|
@ -235,6 +234,7 @@ public:
|
|||
int BLOBSTORE_CONCURRENT_LISTS;
|
||||
int BLOBSTORE_CONCURRENT_WRITES_PER_FILE;
|
||||
int BLOBSTORE_CONCURRENT_READS_PER_FILE;
|
||||
int BLOBSTORE_ENABLE_READ_CACHE;
|
||||
int BLOBSTORE_READ_BLOCK_SIZE;
|
||||
int BLOBSTORE_READ_AHEAD_BLOCKS;
|
||||
int BLOBSTORE_READ_CACHE_BLOCKS_PER_FILE;
|
||||
|
@ -262,8 +262,8 @@ public:
|
|||
double TAG_THROTTLE_EXPIRATION_INTERVAL;
|
||||
int64_t WRITE_COST_BYTE_FACTOR; // Used to round up the cost of write operations
|
||||
int64_t READ_COST_BYTE_FACTOR; // Used to round up the cost of read operations
|
||||
double PROXY_MAX_TAG_THROTTLE_DURATION; // Maximum duration that a transaction can be tag throttled by proxy before
|
||||
// being rejected
|
||||
// Cost multiplier for writes (because write operations are more expensive than reads):
|
||||
double GLOBAL_TAG_THROTTLING_RW_FUNGIBILITY_RATIO;
|
||||
|
||||
// busyness reporting
|
||||
double BUSYNESS_SPIKE_START_THRESHOLD;
|
||||
|
@ -272,6 +272,7 @@ public:
|
|||
// Blob Granules
|
||||
int BG_MAX_GRANULE_PARALLELISM;
|
||||
int BG_TOO_MANY_GRANULES;
|
||||
int64_t BLOB_METADATA_REFRESH_INTERVAL;
|
||||
|
||||
// The coordinator key/value in storage server might be inconsistent to the value stored in the cluster file.
|
||||
// This might happen when a recovery is happening together with a cluster controller coordinator key change.
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
#include "fdbclient/FDBTypes.h"
|
||||
#include "fdbclient/GlobalConfig.h"
|
||||
#include "fdbclient/GrvProxyInterface.h"
|
||||
#include "fdbclient/IdempotencyId.h"
|
||||
#include "fdbclient/IdempotencyId.actor.h"
|
||||
#include "fdbclient/StorageServerInterface.h"
|
||||
#include "fdbclient/TagThrottle.actor.h"
|
||||
#include "fdbclient/VersionVector.h"
|
||||
|
@ -61,6 +61,7 @@ struct CommitProxyInterface {
|
|||
RequestStream<struct ProxySnapRequest> proxySnapReq;
|
||||
RequestStream<struct ExclusionSafetyCheckRequest> exclusionSafetyCheckReq;
|
||||
RequestStream<struct GetDDMetricsRequest> getDDMetrics;
|
||||
PublicRequestStream<struct ExpireIdempotencyIdRequest> expireIdempotencyId;
|
||||
|
||||
UID id() const { return commit.getEndpoint().token; }
|
||||
std::string toString() const { return id().shortString(); }
|
||||
|
@ -87,6 +88,8 @@ struct CommitProxyInterface {
|
|||
exclusionSafetyCheckReq =
|
||||
RequestStream<struct ExclusionSafetyCheckRequest>(commit.getEndpoint().getAdjustedEndpoint(8));
|
||||
getDDMetrics = RequestStream<struct GetDDMetricsRequest>(commit.getEndpoint().getAdjustedEndpoint(9));
|
||||
expireIdempotencyId =
|
||||
PublicRequestStream<struct ExpireIdempotencyIdRequest>(commit.getEndpoint().getAdjustedEndpoint(10));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -103,6 +106,7 @@ struct CommitProxyInterface {
|
|||
streams.push_back(proxySnapReq.getReceiver());
|
||||
streams.push_back(exclusionSafetyCheckReq.getReceiver());
|
||||
streams.push_back(getDDMetrics.getReceiver());
|
||||
streams.push_back(expireIdempotencyId.getReceiver());
|
||||
FlowTransport::transport().addEndpoints(streams);
|
||||
}
|
||||
};
|
||||
|
@ -151,6 +155,24 @@ struct ClientDBInfo {
|
|||
}
|
||||
};
|
||||
|
||||
struct ExpireIdempotencyIdRequest {
|
||||
constexpr static FileIdentifier file_identifier = 1900933;
|
||||
Version commitVersion = invalidVersion;
|
||||
uint8_t batchIndexHighByte = 0;
|
||||
TenantInfo tenant;
|
||||
|
||||
ExpireIdempotencyIdRequest() {}
|
||||
ExpireIdempotencyIdRequest(Version commitVersion, uint8_t batchIndexHighByte, TenantInfo tenant)
|
||||
: commitVersion(commitVersion), batchIndexHighByte(batchIndexHighByte), tenant(tenant) {}
|
||||
|
||||
bool verify() const { return tenant.isAuthorized(); }
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, commitVersion, batchIndexHighByte, tenant);
|
||||
}
|
||||
};
|
||||
|
||||
struct CommitID {
|
||||
constexpr static FileIdentifier file_identifier = 14254927;
|
||||
Version version; // returns invalidVersion if transaction conflicts
|
||||
|
|
|
@ -382,7 +382,8 @@ public:
|
|||
Version end = std::numeric_limits<Version>::max(),
|
||||
KeyRange range = allKeys,
|
||||
int replyBufferSize = -1,
|
||||
bool canReadPopped = true);
|
||||
bool canReadPopped = true,
|
||||
ReadOptions readOptions = { ReadType::NORMAL, CacheResult::False });
|
||||
|
||||
Future<OverlappingChangeFeedsInfo> getOverlappingChangeFeeds(KeyRangeRef ranges, Version minVersion);
|
||||
Future<Void> popChangeFeedMutations(Key rangeID, Version version);
|
||||
|
|
|
@ -546,36 +546,37 @@ struct hash<KeyRange> {
|
|||
|
||||
enum { invalidVersion = -1, latestVersion = -2, MAX_VERSION = std::numeric_limits<int64_t>::max() };
|
||||
|
||||
inline Key keyAfter(const KeyRef& key) {
|
||||
if (key == "\xff\xff"_sr)
|
||||
return key;
|
||||
|
||||
Standalone<StringRef> r;
|
||||
uint8_t* s = new (r.arena()) uint8_t[key.size() + 1];
|
||||
if (key.size() > 0) {
|
||||
memcpy(s, key.begin(), key.size());
|
||||
}
|
||||
s[key.size()] = 0;
|
||||
((StringRef&)r) = StringRef(s, key.size() + 1);
|
||||
return r;
|
||||
}
|
||||
inline KeyRef keyAfter(const KeyRef& key, Arena& arena) {
|
||||
if (key == "\xff\xff"_sr)
|
||||
return key;
|
||||
// Don't include fdbclient/SystemData.h for the allKeys symbol to avoid a cyclic include
|
||||
static const auto allKeysEnd = "\xff\xff"_sr;
|
||||
if (key == allKeysEnd) {
|
||||
return allKeysEnd;
|
||||
}
|
||||
uint8_t* t = new (arena) uint8_t[key.size() + 1];
|
||||
memcpy(t, key.begin(), key.size());
|
||||
if (!key.empty()) {
|
||||
memcpy(t, key.begin(), key.size());
|
||||
}
|
||||
t[key.size()] = 0;
|
||||
return KeyRef(t, key.size() + 1);
|
||||
}
|
||||
inline KeyRange singleKeyRange(const KeyRef& a) {
|
||||
return KeyRangeRef(a, keyAfter(a));
|
||||
inline Key keyAfter(const KeyRef& key) {
|
||||
Key result;
|
||||
result.contents() = keyAfter(key, result.arena());
|
||||
return result;
|
||||
}
|
||||
inline KeyRangeRef singleKeyRange(KeyRef const& key, Arena& arena) {
|
||||
uint8_t* t = new (arena) uint8_t[key.size() + 1];
|
||||
memcpy(t, key.begin(), key.size());
|
||||
if (!key.empty()) {
|
||||
memcpy(t, key.begin(), key.size());
|
||||
}
|
||||
t[key.size()] = 0;
|
||||
return KeyRangeRef(KeyRef(t, key.size()), KeyRef(t, key.size() + 1));
|
||||
}
|
||||
inline KeyRange singleKeyRange(const KeyRef& a) {
|
||||
KeyRange result;
|
||||
result.contents() = singleKeyRange(a, result.arena());
|
||||
return result;
|
||||
}
|
||||
inline KeyRange prefixRange(KeyRef prefix) {
|
||||
Standalone<KeyRangeRef> range;
|
||||
KeyRef start = KeyRef(range.arena(), prefix);
|
||||
|
@ -1494,7 +1495,7 @@ struct EncryptionAtRestMode {
|
|||
bool operator==(const EncryptionAtRestMode& e) const { return isEquals(e); }
|
||||
bool operator!=(const EncryptionAtRestMode& e) const { return !isEquals(e); }
|
||||
|
||||
static EncryptionAtRestMode fromValue(Optional<ValueRef> val) {
|
||||
static EncryptionAtRestMode fromValueRef(Optional<ValueRef> val) {
|
||||
if (!val.present()) {
|
||||
return DISABLED;
|
||||
}
|
||||
|
@ -1508,6 +1509,14 @@ struct EncryptionAtRestMode {
|
|||
return static_cast<Mode>(num);
|
||||
}
|
||||
|
||||
static EncryptionAtRestMode fromValue(Optional<Value> val) {
|
||||
if (!val.present()) {
|
||||
return EncryptionAtRestMode();
|
||||
}
|
||||
|
||||
return EncryptionAtRestMode::fromValueRef(Optional<ValueRef>(val.get().contents()));
|
||||
}
|
||||
|
||||
uint32_t mode;
|
||||
};
|
||||
|
||||
|
@ -1635,13 +1644,7 @@ struct StorageWiggleValue {
|
|||
}
|
||||
};
|
||||
|
||||
enum class ReadType {
|
||||
EAGER,
|
||||
FETCH,
|
||||
LOW,
|
||||
NORMAL,
|
||||
HIGH,
|
||||
};
|
||||
enum class ReadType { EAGER = 0, FETCH = 1, LOW = 2, NORMAL = 3, HIGH = 4, MIN = EAGER, MAX = HIGH };
|
||||
|
||||
FDB_DECLARE_BOOLEAN_PARAM(CacheResult);
|
||||
|
||||
|
@ -1657,14 +1660,14 @@ struct ReadOptions {
|
|||
Optional<UID> debugID;
|
||||
Optional<Version> consistencyCheckStartVersion;
|
||||
|
||||
ReadOptions() : type(ReadType::NORMAL), cacheResult(CacheResult::True){};
|
||||
|
||||
ReadOptions(Optional<UID> debugID,
|
||||
ReadOptions(Optional<UID> debugID = Optional<UID>(),
|
||||
ReadType type = ReadType::NORMAL,
|
||||
CacheResult cache = CacheResult::False,
|
||||
CacheResult cache = CacheResult::True,
|
||||
Optional<Version> version = Optional<Version>())
|
||||
: type(type), cacheResult(cache), debugID(debugID), consistencyCheckStartVersion(version){};
|
||||
|
||||
ReadOptions(ReadType type, CacheResult cache = CacheResult::True) : ReadOptions({}, type, cache) {}
|
||||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, type, cacheResult, debugID, consistencyCheckStartVersion);
|
||||
|
|
|
@ -284,7 +284,6 @@ Future<ConfigurationResult> changeConfig(Reference<DB> db, std::map<std::string,
|
|||
state Key versionKey = BinaryWriter::toValue(deterministicRandom()->randomUniqueID(), Unversioned());
|
||||
state bool oldReplicationUsesDcId = false;
|
||||
state bool warnPPWGradual = false;
|
||||
state bool warnChangeStorageNoMigrate = false;
|
||||
state bool warnRocksDBIsExperimental = false;
|
||||
state bool warnShardedRocksDBIsExperimental = false;
|
||||
loop {
|
||||
|
|
|
@ -120,6 +120,7 @@ public:
|
|||
// later if they are not really needed.
|
||||
virtual ThreadFuture<VersionVector> getVersionVector() = 0;
|
||||
virtual ThreadFuture<SpanContext> getSpanContext() = 0;
|
||||
virtual ThreadFuture<int64_t> getTotalCost() = 0;
|
||||
virtual ThreadFuture<int64_t> getApproximateSize() = 0;
|
||||
|
||||
virtual void setOption(FDBTransactionOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) = 0;
|
||||
|
|
|
@ -101,6 +101,7 @@ public:
|
|||
virtual Version getCommittedVersion() const = 0;
|
||||
virtual VersionVector getVersionVector() const = 0;
|
||||
virtual SpanContext getSpanContext() const = 0;
|
||||
virtual int64_t getTotalCost() const = 0;
|
||||
virtual int64_t getApproximateSize() const = 0;
|
||||
virtual Future<Standalone<StringRef>> getVersionstamp() = 0;
|
||||
virtual void setOption(FDBTransactionOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) = 0;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* IdempotencyId.h
|
||||
* IdempotencyId.actor.h
|
||||
*
|
||||
* This source file is part of the FoundationDB open source project
|
||||
*
|
||||
|
@ -18,8 +18,13 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FDBCLIENT_IDEMPOTENCYID_H
|
||||
#define FDBCLIENT_IDEMPOTENCYID_H
|
||||
// When actually compiled (NO_INTELLISENSE), include the generated version of this file. In intellisense use the source
|
||||
// version.
|
||||
#if defined(NO_INTELLISENSE) && !defined(FDBCLIENT_IDEMPOTENCY_ID_ACTOR_G_H)
|
||||
#define FDBCLIENT_IDEMPOTENCY_ID_ACTOR_G_H
|
||||
#include "fdbclient/IdempotencyId.actor.g.h"
|
||||
#elif !defined(FDBCLIENT_IDEMPOTENCY_ID_ACTOR_H)
|
||||
#define FDBCLIENT_IDEMPOTENCY_ID_ACTOR_H
|
||||
|
||||
#pragma once
|
||||
|
||||
|
@ -28,12 +33,24 @@
|
|||
#include "flow/Arena.h"
|
||||
#include "flow/IRandom.h"
|
||||
#include "flow/serialize.h"
|
||||
#include "flow/actorcompiler.h" // this has to be the last include
|
||||
|
||||
struct CommitResult {
|
||||
Version commitVersion;
|
||||
uint16_t batchIndex;
|
||||
};
|
||||
|
||||
// The type of the value stored at the key |idempotencyIdsExpiredVersion|
|
||||
struct IdempotencyIdsExpiredVersion {
|
||||
static constexpr auto file_identifier = 3746945;
|
||||
Version expired = 0;
|
||||
|
||||
template <class Archive>
|
||||
void serialize(Archive& ar) {
|
||||
serializer(ar, expired);
|
||||
}
|
||||
};
|
||||
|
||||
// See design/idempotency_ids.md for more information. Designed so that the common case of a random 16 byte id does not
|
||||
// usually require indirection. Either invalid or an id with length >= 16 and < 256.
|
||||
struct IdempotencyIdRef {
|
||||
|
@ -163,4 +180,10 @@ private:
|
|||
// Check if id is present in kv, and if so return the commit version and batchIndex
|
||||
Optional<CommitResult> kvContainsIdempotencyId(const KeyValueRef& kv, const IdempotencyIdRef& id);
|
||||
|
||||
#endif
|
||||
// Make a range containing only the idempotency key associated with version and highOrderBatchIndex
|
||||
KeyRangeRef makeIdempotencySingleKeyRange(Arena& arena, Version version, uint8_t highOrderBatchIndex);
|
||||
|
||||
void decodeIdempotencyKey(KeyRef key, Version& commitVersion, uint8_t& highOrderBatchIndex);
|
||||
|
||||
#include "flow/unactorcompiler.h"
|
||||
#endif
|
|
@ -377,6 +377,7 @@ struct FdbCApi : public ThreadSafeReferenceCounted<FdbCApi> {
|
|||
|
||||
FDBFuture* (*transactionCommit)(FDBTransaction* tr);
|
||||
fdb_error_t (*transactionGetCommittedVersion)(FDBTransaction* tr, int64_t* outVersion);
|
||||
FDBFuture* (*transactionGetTotalCost)(FDBTransaction* tr);
|
||||
FDBFuture* (*transactionGetApproximateSize)(FDBTransaction* tr);
|
||||
FDBFuture* (*transactionWatch)(FDBTransaction* tr, uint8_t const* keyName, int keyNameLength);
|
||||
FDBFuture* (*transactionOnError)(FDBTransaction* tr, fdb_error_t error);
|
||||
|
@ -505,6 +506,7 @@ public:
|
|||
Version getCommittedVersion() override;
|
||||
ThreadFuture<VersionVector> getVersionVector() override;
|
||||
ThreadFuture<SpanContext> getSpanContext() override { return SpanContext(); };
|
||||
ThreadFuture<int64_t> getTotalCost() override;
|
||||
ThreadFuture<int64_t> getApproximateSize() override;
|
||||
|
||||
void setOption(FDBTransactionOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) override;
|
||||
|
@ -732,6 +734,7 @@ public:
|
|||
Version getCommittedVersion() override;
|
||||
ThreadFuture<VersionVector> getVersionVector() override;
|
||||
ThreadFuture<SpanContext> getSpanContext() override;
|
||||
ThreadFuture<int64_t> getTotalCost() override;
|
||||
ThreadFuture<int64_t> getApproximateSize() override;
|
||||
|
||||
void setOption(FDBTransactionOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) override;
|
||||
|
@ -1024,6 +1027,7 @@ public:
|
|||
ThreadFuture<Void> protocolVersionMonitor;
|
||||
|
||||
Future<Void> sharedStateUpdater;
|
||||
bool isConfigDB;
|
||||
|
||||
// Versions older than 6.1 do not benefit from having their database connections closed. Additionally,
|
||||
// there are various issues that result in negative behavior in some cases if the connections are closed.
|
||||
|
@ -1147,6 +1151,7 @@ private:
|
|||
bool disableBypass;
|
||||
volatile bool bypassMultiClientApi;
|
||||
volatile bool externalClient;
|
||||
bool retainClientLibCopies;
|
||||
ApiVersion apiVersion;
|
||||
|
||||
int nextThread = 0;
|
||||
|
|
|
@ -249,6 +249,9 @@ struct TransactionState : ReferenceCounted<TransactionState> {
|
|||
SpanContext spanContext;
|
||||
UseProvisionalProxies useProvisionalProxies = UseProvisionalProxies::False;
|
||||
bool readVersionObtainedFromGrvProxy;
|
||||
// Measured by summing the bytes accessed by each read and write operation
|
||||
// after rounding up to the nearest page size and applying a write penalty
|
||||
int64_t totalCost = 0;
|
||||
|
||||
// Special flag to skip prepending tenant prefix to mutations and conflict ranges
|
||||
// when a dummy, internal transaction gets commited. The sole purpose of commitDummyTransaction() is to
|
||||
|
@ -268,6 +271,8 @@ struct TransactionState : ReferenceCounted<TransactionState> {
|
|||
// prefix/<key2> : '0' - any keys equal or larger than this key are (definitely) not conflicting keys
|
||||
std::shared_ptr<CoalescedKeyRangeMap<Value>> conflictingKeys;
|
||||
|
||||
bool automaticIdempotency = false;
|
||||
|
||||
// Only available so that Transaction can have a default constructor, for use in state variables
|
||||
TransactionState(TaskPriority taskID, SpanContext spanContext)
|
||||
: taskID(taskID), spanContext(spanContext), tenantSet(false) {}
|
||||
|
@ -445,6 +450,8 @@ public:
|
|||
// May be called only after commit() returns success
|
||||
Version getCommittedVersion() const { return trState->committedVersion; }
|
||||
|
||||
int64_t getTotalCost() const { return trState->totalCost; }
|
||||
|
||||
// Will be fulfilled only after commit() returns success
|
||||
[[nodiscard]] Future<Standalone<StringRef>> getVersionstamp();
|
||||
|
||||
|
@ -482,6 +489,7 @@ public:
|
|||
Database getDatabase() const { return trState->cx; }
|
||||
static Reference<TransactionLogInfo> createTrLogInfoProbabilistically(const Database& cx);
|
||||
|
||||
Transaction& getTransaction() { return *this; }
|
||||
void setTransactionID(UID id);
|
||||
void setToken(uint64_t token);
|
||||
|
||||
|
@ -563,9 +571,16 @@ ACTOR Future<std::vector<CheckpointMetaData>> getCheckpointMetaData(Database cx,
|
|||
// Checks with Data Distributor that it is safe to mark all servers in exclusions as failed
|
||||
ACTOR Future<bool> checkSafeExclusions(Database cx, std::vector<AddressExclusion> exclusions);
|
||||
|
||||
// Round up to the nearest page size
|
||||
// Measured in bytes, rounded up to the nearest page size. Multiply by fungibility ratio
|
||||
// because writes are more expensive than reads.
|
||||
inline uint64_t getWriteOperationCost(uint64_t bytes) {
|
||||
return (bytes - 1) / CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR + 1;
|
||||
return CLIENT_KNOBS->GLOBAL_TAG_THROTTLING_RW_FUNGIBILITY_RATIO * CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR *
|
||||
((bytes - 1) / CLIENT_KNOBS->WRITE_COST_BYTE_FACTOR + 1);
|
||||
}
|
||||
|
||||
// Measured in bytes, rounded up to the nearest page size.
|
||||
inline uint64_t getReadOperationCost(uint64_t bytes) {
|
||||
return ((bytes - 1) / CLIENT_KNOBS->READ_COST_BYTE_FACTOR + 1) * CLIENT_KNOBS->READ_COST_BYTE_FACTOR;
|
||||
}
|
||||
|
||||
// Create a transaction to set the value of system key \xff/conf/perpetual_storage_wiggle. If enable == true, the value
|
||||
|
|
|
@ -64,6 +64,7 @@ public:
|
|||
void clear(KeyRef const&) override;
|
||||
Future<Void> commit() override;
|
||||
Version getCommittedVersion() const override;
|
||||
int64_t getTotalCost() const override;
|
||||
int64_t getApproximateSize() const override;
|
||||
void setOption(FDBTransactionOptions::Option option, Optional<StringRef> value = Optional<StringRef>()) override;
|
||||
Future<Void> onError(Error const& e) override;
|
||||
|
|
|
@ -149,6 +149,7 @@ public:
|
|||
VersionVector getVersionVector() const override { return tr.getVersionVector(); }
|
||||
SpanContext getSpanContext() const override { return tr.getSpanContext(); }
|
||||
|
||||
int64_t getTotalCost() const override { return tr.getTotalCost(); }
|
||||
int64_t getApproximateSize() const override { return approximateSize; }
|
||||
[[nodiscard]] Future<Standalone<StringRef>> getVersionstamp() override;
|
||||
|
||||
|
|
|
@ -58,8 +58,8 @@ public:
|
|||
requests_per_second, list_requests_per_second, write_requests_per_second, read_requests_per_second,
|
||||
delete_requests_per_second, multipart_max_part_size, multipart_min_part_size, concurrent_requests,
|
||||
concurrent_uploads, concurrent_lists, concurrent_reads_per_file, concurrent_writes_per_file,
|
||||
read_block_size, read_ahead_blocks, read_cache_blocks_per_file, max_send_bytes_per_second,
|
||||
max_recv_bytes_per_second, sdk_auth;
|
||||
enable_read_cache, read_block_size, read_ahead_blocks, read_cache_blocks_per_file,
|
||||
max_send_bytes_per_second, max_recv_bytes_per_second, sdk_auth;
|
||||
bool set(StringRef name, int value);
|
||||
std::string getURLParameters() const;
|
||||
static std::vector<std::string> getKnobDescriptions() {
|
||||
|
@ -86,6 +86,7 @@ public:
|
|||
"concurrent_lists (or cl) Max concurrent list operations that can be in progress at once.",
|
||||
"concurrent_reads_per_file (or crps) Max concurrent reads in progress for any one file.",
|
||||
"concurrent_writes_per_file (or cwps) Max concurrent uploads in progress for any one file.",
|
||||
"enable_read_cache (or erc) Whether read block caching is enabled.",
|
||||
"read_block_size (or rbs) Block size in bytes to be used for reads.",
|
||||
"read_ahead_blocks (or rab) Number of blocks to read ahead of requested offset.",
|
||||
"read_cache_blocks_per_file (or rcb) Size of the read cache for a file in blocks.",
|
||||
|
|
|
@ -110,6 +110,7 @@ public:
|
|||
double BLOCKING_PEEK_TIMEOUT;
|
||||
bool PEEK_BATCHING_EMPTY_MSG;
|
||||
double PEEK_BATCHING_EMPTY_MSG_INTERVAL;
|
||||
double POP_FROM_LOG_DELAY;
|
||||
|
||||
// Data distribution queue
|
||||
double HEALTH_POLL_TIME;
|
||||
|
@ -306,16 +307,18 @@ public:
|
|||
int64_t REPLACE_CONTENTS_BYTES;
|
||||
|
||||
// KeyValueStoreRocksDB
|
||||
int ROCKSDB_READER_THREAD_PRIORITY;
|
||||
int ROCKSDB_WRITER_THREAD_PRIORITY;
|
||||
bool ROCKSDB_SET_READ_TIMEOUT;
|
||||
bool ROCKSDB_LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES;
|
||||
int ROCKSDB_SUGGEST_COMPACT_CLEAR_RANGE;
|
||||
int ROCKSDB_READ_RANGE_ROW_LIMIT;
|
||||
int ROCKSDB_READER_THREAD_PRIORITY;
|
||||
int ROCKSDB_WRITER_THREAD_PRIORITY;
|
||||
int ROCKSDB_BACKGROUND_PARALLELISM;
|
||||
int ROCKSDB_READ_PARALLELISM;
|
||||
int64_t ROCKSDB_MEMTABLE_BYTES;
|
||||
bool ROCKSDB_LEVEL_STYLE_COMPACTION;
|
||||
bool ROCKSDB_UNSAFE_AUTO_FSYNC;
|
||||
bool ROCKSDB_MUTE_LOGS;
|
||||
int64_t ROCKSDB_PERIODIC_COMPACTION_SECONDS;
|
||||
int ROCKSDB_PREFIX_LEN;
|
||||
int64_t ROCKSDB_BLOCK_CACHE_SIZE;
|
||||
|
@ -333,6 +336,8 @@ public:
|
|||
double ROCKSDB_HISTOGRAMS_SAMPLE_RATE;
|
||||
double ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME;
|
||||
bool ROCKSDB_READ_RANGE_REUSE_ITERATORS;
|
||||
bool ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS;
|
||||
int ROCKSDB_READ_RANGE_BOUNDED_ITERATORS_MAX_LIMIT;
|
||||
int64_t ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC;
|
||||
bool ROCKSDB_WRITE_RATE_LIMITER_AUTO_TUNE;
|
||||
std::string DEFAULT_FDB_ROCKSDB_COLUMN_FAMILY;
|
||||
|
@ -349,6 +354,8 @@ public:
|
|||
bool ROCKSDB_DISABLE_WAL_EXPERIMENTAL;
|
||||
bool ROCKSDB_SINGLEKEY_DELETES_ON_CLEARRANGE;
|
||||
int64_t ROCKSDB_SINGLEKEY_DELETES_BYTES_LIMIT;
|
||||
bool ROCKSDB_ENABLE_CLEAR_RANGE_EAGER_READS;
|
||||
int ROCKSDB_STATS_LEVEL;
|
||||
int64_t ROCKSDB_COMPACTION_READAHEAD_SIZE;
|
||||
int64_t ROCKSDB_BLOCK_SIZE;
|
||||
bool ENABLE_SHARDED_ROCKSDB;
|
||||
|
@ -628,14 +635,16 @@ public:
|
|||
double GLOBAL_TAG_THROTTLING_MIN_RATE;
|
||||
// Used by global tag throttling counters
|
||||
double GLOBAL_TAG_THROTTLING_FOLDING_TIME;
|
||||
// Cost multiplier for writes (because write operations are more expensive than reads)
|
||||
double GLOBAL_TAG_THROTTLING_RW_FUNGIBILITY_RATIO;
|
||||
// Maximum number of tags tracked by global tag throttler. Additional tags will be ignored
|
||||
// until some existing tags expire
|
||||
int64_t GLOBAL_TAG_THROTTLING_MAX_TAGS_TRACKED;
|
||||
// Global tag throttler forgets about throughput from a tag once no new transactions from that
|
||||
// tag have been received for this duration (in seconds):
|
||||
int64_t GLOBAL_TAG_THROTTLING_TAG_EXPIRE_AFTER;
|
||||
// Maximum duration that a transaction can be tag throttled by proxy before being rejected
|
||||
double PROXY_MAX_TAG_THROTTLE_DURATION;
|
||||
// Interval at which latency bands are logged for each tag on grv proxy
|
||||
double GLOBAL_TAG_THROTTLING_PROXY_LOGGING_INTERVAL;
|
||||
|
||||
double MAX_TRANSACTIONS_PER_BYTE;
|
||||
|
||||
|
@ -717,7 +726,6 @@ public:
|
|||
int FETCH_KEYS_LOWER_PRIORITY;
|
||||
int SERVE_FETCH_CHECKPOINT_PARALLELISM;
|
||||
int SERVE_AUDIT_STORAGE_PARALLELISM;
|
||||
int CHANGE_FEED_DISK_READS_PARALLELISM;
|
||||
int BUGGIFY_BLOCK_BYTES;
|
||||
int64_t STORAGE_RECOVERY_VERSION_LAG_LIMIT;
|
||||
double STORAGE_DURABILITY_LAG_REJECT_THRESHOLD;
|
||||
|
@ -740,7 +748,6 @@ public:
|
|||
int64_t MIN_TAG_READ_PAGES_RATE;
|
||||
int64_t MIN_TAG_WRITE_PAGES_RATE;
|
||||
double TAG_MEASUREMENT_INTERVAL;
|
||||
int64_t READ_COST_BYTE_FACTOR;
|
||||
bool PREFIX_COMPRESS_KVS_MEM_SNAPSHOTS;
|
||||
bool REPORT_DD_METRICS;
|
||||
double DD_METRICS_REPORT_INTERVAL;
|
||||
|
@ -757,6 +764,9 @@ public:
|
|||
int QUICK_GET_KEY_VALUES_LIMIT;
|
||||
int QUICK_GET_KEY_VALUES_LIMIT_BYTES;
|
||||
int STORAGE_FEED_QUERY_HARD_LIMIT;
|
||||
int STORAGE_SERVER_READ_CONCURRENCY;
|
||||
std::string STORAGESERVER_READ_RANKS;
|
||||
std::string STORAGESERVER_READ_PRIORITIES;
|
||||
|
||||
// Wait Failure
|
||||
int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS;
|
||||
|
@ -886,7 +896,6 @@ public:
|
|||
int REDWOOD_DEFAULT_EXTENT_SIZE; // Extent size for new Redwood files
|
||||
int REDWOOD_DEFAULT_EXTENT_READ_SIZE; // Extent read size for Redwood files
|
||||
int REDWOOD_EXTENT_CONCURRENT_READS; // Max number of simultaneous extent disk reads in progress.
|
||||
int REDWOOD_KVSTORE_CONCURRENT_READS; // Max number of simultaneous point or range reads in progress.
|
||||
bool REDWOOD_KVSTORE_RANGE_PREFETCH; // Whether to use range read prefetching
|
||||
double REDWOOD_PAGE_REBUILD_MAX_SLACK; // When rebuilding pages, max slack to allow in page
|
||||
int REDWOOD_LAZY_CLEAR_BATCH_SIZE_PAGES; // Number of pages to try to pop from the lazy delete queue and process at
|
||||
|
@ -906,6 +915,8 @@ public:
|
|||
int REDWOOD_DECODECACHE_REUSE_MIN_HEIGHT; // Minimum height for which to keep and reuse page decode caches
|
||||
bool REDWOOD_SPLIT_ENCRYPTED_PAGES_BY_TENANT; // Whether to split pages by tenant if encryption is enabled
|
||||
|
||||
std::string REDWOOD_PRIORITY_LAUNCHS;
|
||||
|
||||
// Server request latency measurement
|
||||
int LATENCY_SAMPLE_SIZE;
|
||||
double LATENCY_METRICS_LOGGING_INTERVAL;
|
||||
|
@ -950,10 +961,14 @@ public:
|
|||
int BG_MERGE_CANDIDATE_THRESHOLD_SECONDS;
|
||||
int BG_MERGE_CANDIDATE_DELAY_SECONDS;
|
||||
int BG_KEY_TUPLE_TRUNCATE_OFFSET;
|
||||
bool BG_ENABLE_READ_DRIVEN_COMPACTION;
|
||||
int BG_RDC_BYTES_FACTOR;
|
||||
int BG_RDC_READ_FACTOR;
|
||||
|
||||
int BLOB_WORKER_INITIAL_SNAPSHOT_PARALLELISM;
|
||||
int BLOB_WORKER_RESNAPSHOT_PARALLELISM;
|
||||
int BLOB_WORKER_DELTA_FILE_WRITE_PARALLELISM;
|
||||
int BLOB_WORKER_RDC_PARALLELISM;
|
||||
|
||||
double BLOB_WORKER_TIMEOUT; // Blob Manager's reaction time to a blob worker failure
|
||||
double BLOB_WORKER_REQUEST_TIMEOUT; // Blob Worker's server-side request timeout
|
||||
|
@ -972,10 +987,10 @@ public:
|
|||
bool BLOB_MANIFEST_BACKUP;
|
||||
double BLOB_MANIFEST_BACKUP_INTERVAL;
|
||||
bool BLOB_FULL_RESTORE_MODE;
|
||||
double BLOB_MIGRATOR_CHECK_INTERVAL;
|
||||
|
||||
// Blob metadata
|
||||
int64_t BLOB_METADATA_CACHE_TTL;
|
||||
int64_t BLOB_METADATA_REFRESH_INTERVAL;
|
||||
|
||||
// HTTP KMS Connector
|
||||
std::string REST_KMS_CONNECTOR_KMS_DISCOVERY_URL_MODE;
|
||||
|
@ -989,6 +1004,9 @@ public:
|
|||
std::string REST_KMS_CONNECTOR_GET_ENCRYPTION_KEYS_ENDPOINT;
|
||||
std::string REST_KMS_CONNECTOR_GET_BLOB_METADATA_ENDPOINT;
|
||||
|
||||
// Idempotency ids
|
||||
double IDEMPOTENCY_ID_IN_MEMORY_LIFETIME;
|
||||
|
||||
ServerKnobs(Randomize, ClientKnobs*, IsSimulated);
|
||||
void initialize(Randomize, ClientKnobs*, IsSimulated);
|
||||
};
|
||||
|
|
|
@ -76,6 +76,7 @@ public:
|
|||
void reset() override;
|
||||
void debugTransaction(UID dID) override;
|
||||
void checkDeferredError() const override;
|
||||
int64_t getTotalCost() const override;
|
||||
int64_t getApproximateSize() const override;
|
||||
void set(KeyRef const&, ValueRef const&) override;
|
||||
void clear(KeyRangeRef const&) override { throw client_invalid_operation(); }
|
||||
|
|
|
@ -45,7 +45,7 @@ struct CheckpointMetaData {
|
|||
|
||||
constexpr static FileIdentifier file_identifier = 13804342;
|
||||
Version version;
|
||||
KeyRange range;
|
||||
std::vector<KeyRange> ranges;
|
||||
int16_t format; // CheckpointFormat.
|
||||
UID ssID; // Storage server ID on which this checkpoint is created.
|
||||
UID checkpointID; // A unique id for this checkpoint.
|
||||
|
@ -58,11 +58,15 @@ struct CheckpointMetaData {
|
|||
|
||||
CheckpointMetaData() = default;
|
||||
CheckpointMetaData(KeyRange const& range, CheckpointFormat format, UID const& ssID, UID const& checkpointID)
|
||||
: version(invalidVersion), range(range), format(format), ssID(ssID), checkpointID(checkpointID), state(Pending),
|
||||
referenceCount(0), gcTime(0) {}
|
||||
: version(invalidVersion), format(format), ssID(ssID), checkpointID(checkpointID), state(Pending),
|
||||
referenceCount(0), gcTime(0) {
|
||||
this->ranges.push_back(range);
|
||||
}
|
||||
CheckpointMetaData(Version version, KeyRange const& range, CheckpointFormat format, UID checkpointID)
|
||||
: version(version), range(range), format(format), ssID(UID()), checkpointID(checkpointID), state(Pending),
|
||||
referenceCount(0), gcTime(0) {}
|
||||
: version(version), format(format), ssID(UID()), checkpointID(checkpointID), state(Pending), referenceCount(0),
|
||||
gcTime(0) {
|
||||
this->ranges.push_back(range);
|
||||
}
|
||||
|
||||
CheckpointState getState() const { return static_cast<CheckpointState>(state); }
|
||||
|
||||
|
@ -73,7 +77,7 @@ struct CheckpointMetaData {
|
|||
void setFormat(CheckpointFormat format) { this->format = static_cast<int16_t>(format); }
|
||||
|
||||
std::string toString() const {
|
||||
std::string res = "Checkpoint MetaData:\nRange: " + range.toString() + "\nVersion: " + std::to_string(version) +
|
||||
std::string res = "Checkpoint MetaData:\nRange: " + describe(ranges) + "\nVersion: " + std::to_string(version) +
|
||||
"\nFormat: " + std::to_string(format) + "\nServer: " + ssID.toString() +
|
||||
"\nID: " + checkpointID.toString() + "\nState: " + std::to_string(static_cast<int>(state)) +
|
||||
"\n";
|
||||
|
@ -82,7 +86,7 @@ struct CheckpointMetaData {
|
|||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, version, range, format, state, checkpointID, ssID, gcTime, serializedCheckpoint);
|
||||
serializer(ar, version, ranges, format, state, checkpointID, ssID, gcTime, serializedCheckpoint);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -99,23 +103,28 @@ struct DataMoveMetaData {
|
|||
constexpr static FileIdentifier file_identifier = 13804362;
|
||||
UID id; // A unique id for this data move.
|
||||
Version version;
|
||||
KeyRange range;
|
||||
std::vector<KeyRange> ranges;
|
||||
int priority;
|
||||
std::set<UID> src;
|
||||
std::set<UID> dest;
|
||||
std::set<UID> checkpoints;
|
||||
int16_t phase; // DataMoveMetaData::Phase.
|
||||
int8_t mode;
|
||||
|
||||
DataMoveMetaData() = default;
|
||||
DataMoveMetaData(UID id, Version version, KeyRange range)
|
||||
: id(id), version(version), range(std::move(range)), priority(0) {}
|
||||
DataMoveMetaData(UID id, KeyRange range) : id(id), version(invalidVersion), range(std::move(range)), priority(0) {}
|
||||
DataMoveMetaData(UID id, Version version, KeyRange range) : id(id), version(version), priority(0), mode(0) {
|
||||
this->ranges.push_back(range);
|
||||
}
|
||||
DataMoveMetaData(UID id, KeyRange range) : id(id), version(invalidVersion), priority(0), mode(0) {
|
||||
this->ranges.push_back(range);
|
||||
}
|
||||
|
||||
Phase getPhase() const { return static_cast<Phase>(phase); }
|
||||
|
||||
void setPhase(Phase phase) { this->phase = static_cast<int16_t>(phase); }
|
||||
|
||||
std::string toString() const {
|
||||
std::string res = "DataMoveMetaData: [ID]: " + id.shortString() + " [Range]: " + range.toString() +
|
||||
std::string res = "DataMoveMetaData: [ID]: " + id.shortString() + " [Range]: " + describe(ranges) +
|
||||
" [Phase]: " + std::to_string(static_cast<int>(phase)) +
|
||||
" [Source Servers]: " + describe(src) + " [Destination Servers]: " + describe(dest);
|
||||
return res;
|
||||
|
@ -123,7 +132,7 @@ struct DataMoveMetaData {
|
|||
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, id, version, range, phase, src, dest);
|
||||
serializer(ar, id, version, ranges, priority, src, dest, checkpoints, phase, mode);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -890,16 +890,16 @@ struct ChangeFeedStreamRequest {
|
|||
KeyRange range;
|
||||
int replyBufferSize = -1;
|
||||
bool canReadPopped = true;
|
||||
UID debugUID; // This is only used for debugging and tracing, but being able to link a client + server side stream
|
||||
// is so useful for testing, and this is such small overhead compared to streaming large amounts of
|
||||
// change feed data, it is left in the interface
|
||||
UID id; // This must be globally unique among ChangeFeedStreamRequest instances
|
||||
Optional<ReadOptions> options;
|
||||
|
||||
ReplyPromiseStream<ChangeFeedStreamReply> reply;
|
||||
|
||||
ChangeFeedStreamRequest() {}
|
||||
template <class Ar>
|
||||
void serialize(Ar& ar) {
|
||||
serializer(ar, rangeID, begin, end, range, reply, spanContext, replyBufferSize, canReadPopped, debugUID, arena);
|
||||
serializer(
|
||||
ar, rangeID, begin, end, range, reply, spanContext, replyBufferSize, canReadPopped, id, options, arena);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -92,8 +92,6 @@ void decodeKeyServersValue(RangeResult result,
|
|||
UID& destID,
|
||||
bool missingIsError = true);
|
||||
|
||||
extern const KeyRef clusterIdKey;
|
||||
|
||||
extern const KeyRangeRef auditRange;
|
||||
extern const KeyRef auditPrefix;
|
||||
const Key auditRangeKey(const AuditType type, const UID& auditId, const KeyRef& key);
|
||||
|
@ -505,6 +503,9 @@ extern const KeyRangeRef timeKeeperPrefixRange;
|
|||
extern const KeyRef timeKeeperVersionKey;
|
||||
extern const KeyRef timeKeeperDisableKey;
|
||||
|
||||
// Durable cluster ID key
|
||||
extern const KeyRef clusterIdKey;
|
||||
|
||||
// Layer status metadata prefix
|
||||
extern const KeyRangeRef layerStatusMetaPrefixRange;
|
||||
|
||||
|
|
|
@ -607,7 +607,7 @@ public:
|
|||
Key getTagQuotaKey(TransactionTagRef);
|
||||
|
||||
template <class Tr>
|
||||
void setTagQuota(Reference<Tr> tr, TransactionTagRef tag, double reservedQuota, double totalQuota) {
|
||||
void setTagQuota(Reference<Tr> tr, TransactionTagRef tag, int64_t reservedQuota, int64_t totalQuota) {
|
||||
TagQuotaValue tagQuotaValue;
|
||||
tagQuotaValue.reservedQuota = reservedQuota;
|
||||
tagQuotaValue.totalQuota = totalQuota;
|
||||
|
|
|
@ -211,6 +211,31 @@ struct TenantMetadata {
|
|||
};
|
||||
|
||||
typedef VersionedMap<TenantName, TenantMapEntry> TenantMap;
|
||||
class TenantPrefixIndex : public VersionedMap<Key, TenantName>, public ReferenceCounted<TenantPrefixIndex> {};
|
||||
|
||||
// A set of tenant names that is generally expected to have one item in it. The set can have more than one item in it
|
||||
// during certain periods when the set is being updated (e.g. while restoring a backup), but it is expected to have
|
||||
// one item at the end. It is not possible to use the set while it contains more than one item.
|
||||
struct TenantNameUniqueSet {
|
||||
std::unordered_set<TenantName> tenantNames;
|
||||
|
||||
// Returns the single tenant name stored in the set
|
||||
// It is an error to call this function if the set holds more than one name
|
||||
TenantName get() const {
|
||||
ASSERT(tenantNames.size() == 1);
|
||||
return *tenantNames.begin();
|
||||
}
|
||||
|
||||
void insert(TenantName const& name) { tenantNames.insert(name); }
|
||||
|
||||
// Removes a tenant name from the set. Returns true if the set is now empty.
|
||||
bool remove(TenantName const& name) {
|
||||
auto itr = tenantNames.find(name);
|
||||
ASSERT(itr != tenantNames.end());
|
||||
tenantNames.erase(itr);
|
||||
return tenantNames.empty();
|
||||
}
|
||||
};
|
||||
|
||||
class TenantPrefixIndex : public VersionedMap<Key, TenantNameUniqueSet>, public ReferenceCounted<TenantPrefixIndex> {};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -68,6 +68,10 @@ using TenantEntryCachePayloadFunc = std::function<TenantEntryCachePayload<T>(con
|
|||
// 1. Lookup by 'TenantId'
|
||||
// 2. Lookup by 'TenantPrefix'
|
||||
// 3. Lookup by 'TenantName'
|
||||
// TODO: Currently this cache performs poorly if there are tenant access happening to unknown tenants which happens most
|
||||
// frequently in optional tenant mode but can also happen in required mode if there are alot of tenants created. Further
|
||||
// as a consequence of the design we cannot be sure that the state of a given tenant is accurate even if its present in
|
||||
// the cache.
|
||||
|
||||
template <class T>
|
||||
class TenantEntryCache : public ReferenceCounted<TenantEntryCache<T>>, NonCopyable {
|
||||
|
|
|
@ -205,6 +205,7 @@ public:
|
|||
Version getCommittedVersion() override;
|
||||
ThreadFuture<VersionVector> getVersionVector() override;
|
||||
ThreadFuture<SpanContext> getSpanContext() override;
|
||||
ThreadFuture<int64_t> getTotalCost() override;
|
||||
ThreadFuture<int64_t> getApproximateSize() override;
|
||||
|
||||
ThreadFuture<uint64_t> getProtocolVersion();
|
||||
|
|
|
@ -273,17 +273,4 @@ struct ITracer {
|
|||
virtual void trace(Span const& span) = 0;
|
||||
};
|
||||
|
||||
void openTracer(TracerType type);
|
||||
|
||||
template <class T>
|
||||
struct SpannedDeque : Deque<T> {
|
||||
Span span;
|
||||
explicit SpannedDeque(Location loc) : span(loc) {}
|
||||
SpannedDeque(SpannedDeque&& other) : Deque<T>(std::move(other)), span(std::move(other.span)) {}
|
||||
SpannedDeque(SpannedDeque const&) = delete;
|
||||
SpannedDeque& operator=(SpannedDeque const&) = delete;
|
||||
SpannedDeque& operator=(SpannedDeque&& other) {
|
||||
*static_cast<Deque<T>*>(this) = std::move(other);
|
||||
span = std::move(other.span);
|
||||
}
|
||||
};
|
||||
void openTracer(TracerType type);
|
|
@ -77,6 +77,7 @@ template <class T>
|
|||
class PTreeFinger {
|
||||
using PTreeFingerEntry = PTree<T> const*;
|
||||
// This finger size supports trees with up to exp(96/4.3) ~= 4,964,514,749 entries.
|
||||
// The number 4.3 comes from here: https://en.wikipedia.org/wiki/Random_binary_tree#The_longest_path
|
||||
// see also: check().
|
||||
static constexpr size_t N = 96;
|
||||
PTreeFingerEntry entries_[N];
|
||||
|
|
|
@ -120,6 +120,8 @@ description is not currently required but encouraged.
|
|||
<Option name="future_version_client_library" code="66"
|
||||
paramType="String" paramDescription="path to client library"
|
||||
description="Adds an external client library to be used with a future version protocol. This option can be used testing purposes only!" />
|
||||
<Option name="retain_client_library_copies" code="67"
|
||||
description="Retain temporary external client library copies that are created for enabling multi-threading." />
|
||||
<Option name="disable_client_statistics_logging" code="70"
|
||||
description="Disables logging of client statistics, such as sampled transaction activity." />
|
||||
<Option name="enable_slow_task_profiling" code="71"
|
||||
|
@ -279,7 +281,7 @@ description is not currently required but encouraged.
|
|||
description="Set the transaction size limit in bytes. The size is calculated by combining the sizes of all keys and values written or mutated, all key ranges cleared, and all read and write conflict ranges. (In other words, it includes the total size of all data included in the request to the cluster to commit the transaction.) Large transactions can cause performance problems on FoundationDB clusters, so setting this limit to a smaller value than the default can help prevent the client from accidentally degrading the cluster's performance. This value must be at least 32 and cannot be set to higher than 10,000,000, the default transaction size limit." />
|
||||
<Option name="idempotency_id" code="504"
|
||||
paramType="String" paramDescription="Unique ID"
|
||||
description="Associate this transaction with this ID for the purpose of checking whether or not this transaction has already committed. Must be at least 16 bytes and less than 256 bytes. This feature is in development and not ready for general use."
|
||||
description="Associate this transaction with this ID for the purpose of checking whether or not this transaction has already committed. Must be at least 16 bytes and less than 256 bytes. This feature is in development and not ready for general use. Unless the automatic_idempotency option is set after this option, the client will not automatically attempt to remove this id from the cluster after a successful commit."
|
||||
hidden="true" />
|
||||
<Option name="automatic_idempotency" code="505"
|
||||
description="Automatically assign a random 16 byte idempotency id for this transaction. Prevents commits from failing with ``commit_unknown_result``. WARNING: If you are also using the multiversion client or transaction timeouts, if either cluster_version_changed or transaction_timed_out was thrown during a commit, then that commit may have already succeeded or may succeed in the future. This feature is in development and not ready for general use."
|
||||
|
|
|
@ -5,9 +5,8 @@ get_target_property(fdbclient_target_includes fdbclient INCLUDE_DIRECTORIES)
|
|||
target_link_libraries(fdbmonitor PUBLIC SimpleOpt)
|
||||
target_include_directories(fdbmonitor PUBLIC "${fdbclient_target_includes}")
|
||||
strip_debug_symbols(fdbmonitor)
|
||||
assert_no_version_h(fdbmonitor)
|
||||
if(UNIX AND NOT APPLE)
|
||||
target_link_libraries(fdbmonitor PRIVATE rt)
|
||||
target_link_libraries(fdbmonitor PRIVATE rt)
|
||||
endif()
|
||||
# FIXME: This include directory is an ugly hack. We probably want to fix this.
|
||||
# as soon as we get rid of the old build system
|
||||
|
@ -17,17 +16,17 @@ target_link_libraries(fdbmonitor PUBLIC Threads::Threads)
|
|||
# appears to change its behavior (it no longer seems to restart killed
|
||||
# processes). fdbmonitor is single-threaded anyway.
|
||||
get_target_property(fdbmonitor_options fdbmonitor COMPILE_OPTIONS)
|
||||
if (NOT "${fdbmonitor_options}" STREQUAL "fdbmonitor_options-NOTFOUND")
|
||||
if(NOT "${fdbmonitor_options}" STREQUAL "fdbmonitor_options-NOTFOUND")
|
||||
list(REMOVE_ITEM fdbmonitor_options "-fsanitize=thread")
|
||||
set_property(TARGET fdbmonitor PROPERTY COMPILE_OPTIONS ${fdbmonitor_options})
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
get_target_property(fdbmonitor_options fdbmonitor LINK_OPTIONS)
|
||||
|
||||
if (NOT "${fdbmonitor_options}" STREQUAL "fdbmonitor_options-NOTFOUND")
|
||||
if(NOT "${fdbmonitor_options}" STREQUAL "fdbmonitor_options-NOTFOUND")
|
||||
list(REMOVE_ITEM fdbmonitor_options "-fsanitize=thread")
|
||||
set_property(TARGET fdbmonitor PROPERTY LINK_OPTIONS ${fdbmonitor_options})
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if(GENERATE_DEBUG_PACKAGES)
|
||||
fdb_install(TARGETS fdbmonitor DESTINATION fdbmonitor COMPONENT server)
|
||||
|
@ -51,7 +50,7 @@ add_custom_target(clean_sandbox
|
|||
|
||||
add_custom_target(start_sandbox
|
||||
COMMAND ${CMAKE_BINARY_DIR}/bin/fdbmonitor --conffile ${CMAKE_BINARY_DIR}/sandbox/foundationdb.conf
|
||||
--lockfile ${CMAKE_BINARY_DIR}/sandbox/fdbmonitor.lock)
|
||||
--lockfile ${CMAKE_BINARY_DIR}/sandbox/fdbmonitor.lock)
|
||||
|
||||
add_dependencies(start_sandbox fdbmonitor fdbserver)
|
||||
|
||||
|
@ -61,6 +60,6 @@ if(NOT EXISTS ${CMAKE_BINARY_DIR}/contrib/generate_profile.sh)
|
|||
endif()
|
||||
|
||||
add_custom_target(generate_profile
|
||||
COMMAND ${CMAKE_BINARY_DIR}/contrib/generate_profile.sh ${CMAKE_BINARY_DIR})
|
||||
COMMAND ${CMAKE_BINARY_DIR}/contrib/generate_profile.sh ${CMAKE_BINARY_DIR})
|
||||
|
||||
add_dependencies(generate_profile fdbmonitor fdbserver mako fdbcli)
|
||||
|
|
|
@ -133,3 +133,50 @@ Future<Void> CounterCollection::traceCounters(std::string const& traceEventName,
|
|||
return CounterCollectionImpl::traceCounters(
|
||||
this, traceEventName, traceEventID, interval, trackLatestName, decorator);
|
||||
}
|
||||
|
||||
void LatencyBands::insertBand(double value) {
|
||||
bands.emplace(std::make_pair(value, std::make_unique<Counter>(format("Band%f", value), *cc)));
|
||||
}
|
||||
|
||||
FDB_DEFINE_BOOLEAN_PARAM(Filtered);
|
||||
|
||||
LatencyBands::LatencyBands(std::string const& name,
|
||||
UID id,
|
||||
double loggingInterval,
|
||||
std::function<void(TraceEvent&)> const& decorator)
|
||||
: name(name), id(id), loggingInterval(loggingInterval), decorator(decorator) {}
|
||||
|
||||
void LatencyBands::addThreshold(double value) {
|
||||
if (value > 0 && bands.count(value) == 0) {
|
||||
if (bands.size() == 0) {
|
||||
ASSERT(!cc && !filteredCount);
|
||||
cc = std::make_unique<CounterCollection>(name, id.toString());
|
||||
logger = cc->traceCounters(name, id, loggingInterval, id.toString() + "/" + name, decorator);
|
||||
filteredCount = std::make_unique<Counter>("Filtered", *cc);
|
||||
insertBand(std::numeric_limits<double>::infinity());
|
||||
}
|
||||
|
||||
insertBand(value);
|
||||
}
|
||||
}
|
||||
|
||||
void LatencyBands::addMeasurement(double measurement, int count, Filtered filtered) {
|
||||
if (filtered && filteredCount) {
|
||||
(*filteredCount) += count;
|
||||
} else if (bands.size() > 0) {
|
||||
auto itr = bands.upper_bound(measurement);
|
||||
ASSERT(itr != bands.end());
|
||||
(*itr->second) += count;
|
||||
}
|
||||
}
|
||||
|
||||
void LatencyBands::clearBands() {
|
||||
logger = Void();
|
||||
bands.clear();
|
||||
filteredCount.reset();
|
||||
cc.reset();
|
||||
}
|
||||
|
||||
LatencyBands::~LatencyBands() {
|
||||
clearBands();
|
||||
}
|
||||
|
|
|
@ -757,12 +757,18 @@ Optional<BasicLoadBalancedReply> getBasicLoadBalancedReply(const BasicLoadBalanc
|
|||
Optional<BasicLoadBalancedReply> getBasicLoadBalancedReply(const void*);
|
||||
|
||||
// A simpler version of LoadBalance that does not send second requests where the list of servers are always fresh
|
||||
//
|
||||
// If |alternativeChosen| is not null, then atMostOnce must be True, and if the returned future completes successfully
|
||||
// then *alternativeChosen will be the alternative to which the message was sent. *alternativeChosen must outlive the
|
||||
// returned future.
|
||||
ACTOR template <class Interface, class Request, class Multi, bool P>
|
||||
Future<REPLY_TYPE(Request)> basicLoadBalance(Reference<ModelInterface<Multi>> alternatives,
|
||||
RequestStream<Request, P> Interface::*channel,
|
||||
Request request = Request(),
|
||||
TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint,
|
||||
AtMostOnce atMostOnce = AtMostOnce::False) {
|
||||
AtMostOnce atMostOnce = AtMostOnce::False,
|
||||
int* alternativeChosen = nullptr) {
|
||||
ASSERT(alternativeChosen == nullptr || atMostOnce == AtMostOnce::True);
|
||||
setReplyPriority(request, taskID);
|
||||
if (!alternatives)
|
||||
return Never();
|
||||
|
@ -791,6 +797,9 @@ Future<REPLY_TYPE(Request)> basicLoadBalance(Reference<ModelInterface<Multi>> al
|
|||
useAlt = (nextAlt + alternatives->size() - 1) % alternatives->size();
|
||||
|
||||
stream = &alternatives->get(useAlt, channel);
|
||||
if (alternativeChosen != nullptr) {
|
||||
*alternativeChosen = useAlt;
|
||||
}
|
||||
if (!IFailureMonitor::failureMonitor().getState(stream->getEndpoint()).failed)
|
||||
break;
|
||||
nextAlt = (nextAlt + 1) % alternatives->size();
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue