Merge branch 'main' into feature-metacluster

This commit is contained in:
A.J. Beamon 2022-06-09 12:39:34 -07:00
commit 739fc9ce6b
71 changed files with 2335 additions and 1443 deletions

View File

@ -121,8 +121,6 @@ if(NOT WIN32)
test/apitester/fdb_c_api_tester.cpp
test/apitester/TesterApiWorkload.cpp
test/apitester/TesterApiWorkload.h
test/apitester/TesterApiWrapper.cpp
test/apitester/TesterApiWrapper.h
test/apitester/TesterTestSpec.cpp
test/apitester/TesterTestSpec.h
test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp
@ -191,9 +189,9 @@ if(NOT WIN32)
target_link_libraries(disconnected_timeout_unit_tests PRIVATE fdb_c Threads::Threads)
if(USE_SANITIZER)
target_link_libraries(fdb_c_api_tester PRIVATE fdb_c toml11_target Threads::Threads fmt::fmt boost_asan)
target_link_libraries(fdb_c_api_tester PRIVATE fdb_c fdb_cpp toml11_target Threads::Threads fmt::fmt boost_asan)
else()
target_link_libraries(fdb_c_api_tester PRIVATE fdb_c toml11_target Threads::Threads fmt::fmt boost_target)
target_link_libraries(fdb_c_api_tester PRIVATE fdb_c fdb_cpp toml11_target Threads::Threads fmt::fmt boost_target)
endif()
# do not set RPATH for mako

View File

@ -20,6 +20,7 @@
#include "TesterApiWorkload.h"
#include "TesterUtil.h"
#include "test/fdb_api.hpp"
#include <fmt/format.h>
namespace FdbApiTester {
@ -35,7 +36,7 @@ ApiWorkload::ApiWorkload(const WorkloadConfig& config) : WorkloadBase(config) {
runUntilStop = config.getBoolOption("runUntilStop", false);
numRandomOperations = config.getIntOption("numRandomOperations", 1000);
numOperationsForProgressCheck = config.getIntOption("numOperationsForProgressCheck", 10);
keyPrefix = fmt::format("{}/", workloadId);
keyPrefix = fdb::toBytesRef(fmt::format("{}/", workloadId));
numRandomOpLeft = 0;
stopReceived = false;
checkingProgress = false;
@ -105,26 +106,26 @@ void ApiWorkload::randomOperation(TTaskFct cont) {
ASSERT(false);
}
std::string ApiWorkload::randomKeyName() {
fdb::Key ApiWorkload::randomKeyName() {
return keyPrefix + Random::get().randomStringLowerCase(minKeyLength, maxKeyLength);
}
std::string ApiWorkload::randomValue() {
fdb::Value ApiWorkload::randomValue() {
return Random::get().randomStringLowerCase(minValueLength, maxValueLength);
}
std::string ApiWorkload::randomNotExistingKey() {
fdb::Key ApiWorkload::randomNotExistingKey() {
while (true) {
std::string key = randomKeyName();
fdb::Key key = randomKeyName();
if (!store.exists(key)) {
return key;
}
}
}
std::string ApiWorkload::randomExistingKey() {
std::string genKey = randomKeyName();
std::string key = store.getKey(genKey, true, 1);
fdb::Key ApiWorkload::randomExistingKey() {
fdb::Key genKey = randomKeyName();
fdb::Key key = store.getKey(genKey, true, 1);
if (key != store.endKey()) {
return key;
}
@ -136,7 +137,7 @@ std::string ApiWorkload::randomExistingKey() {
return genKey;
}
std::string ApiWorkload::randomKey(double existingKeyRatio) {
fdb::Key ApiWorkload::randomKey(double existingKeyRatio) {
if (Random::get().randomBool(existingKeyRatio)) {
return randomExistingKey();
} else {
@ -146,19 +147,19 @@ std::string ApiWorkload::randomKey(double existingKeyRatio) {
void ApiWorkload::populateDataTx(TTaskFct cont) {
int numKeys = maxKeysPerTransaction;
auto kvPairs = std::make_shared<std::vector<KeyValue>>();
auto kvPairs = std::make_shared<std::vector<fdb::KeyValue>>();
for (int i = 0; i < numKeys; i++) {
kvPairs->push_back(KeyValue{ randomNotExistingKey(), randomValue() });
kvPairs->push_back(fdb::KeyValue{ randomNotExistingKey(), randomValue() });
}
execTransaction(
[kvPairs](auto ctx) {
for (const KeyValue& kv : *kvPairs) {
ctx->tx()->set(kv.key, kv.value);
for (const fdb::KeyValue& kv : *kvPairs) {
ctx->tx().set(kv.key, kv.value);
}
ctx->commit();
},
[this, kvPairs, cont]() {
for (const KeyValue& kv : *kvPairs) {
for (const fdb::KeyValue& kv : *kvPairs) {
store.set(kv.key, kv.value);
}
schedule(cont);
@ -168,7 +169,7 @@ void ApiWorkload::populateDataTx(TTaskFct cont) {
void ApiWorkload::clearData(TTaskFct cont) {
execTransaction(
[this](auto ctx) {
ctx->tx()->clearRange(keyPrefix, fmt::format("{}\xff", keyPrefix));
ctx->tx().clearRange(keyPrefix, keyPrefix + fdb::Key(1, '\xff'));
ctx->commit();
},
[this, cont]() { schedule(cont); });
@ -185,19 +186,19 @@ void ApiWorkload::populateData(TTaskFct cont) {
void ApiWorkload::randomInsertOp(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto kvPairs = std::make_shared<std::vector<KeyValue>>();
auto kvPairs = std::make_shared<std::vector<fdb::KeyValue>>();
for (int i = 0; i < numKeys; i++) {
kvPairs->push_back(KeyValue{ randomNotExistingKey(), randomValue() });
kvPairs->push_back(fdb::KeyValue{ randomNotExistingKey(), randomValue() });
}
execTransaction(
[kvPairs](auto ctx) {
for (const KeyValue& kv : *kvPairs) {
ctx->tx()->set(kv.key, kv.value);
for (const fdb::KeyValue& kv : *kvPairs) {
ctx->tx().set(kv.key, kv.value);
}
ctx->commit();
},
[this, kvPairs, cont]() {
for (const KeyValue& kv : *kvPairs) {
for (const fdb::KeyValue& kv : *kvPairs) {
store.set(kv.key, kv.value);
}
schedule(cont);
@ -206,14 +207,14 @@ void ApiWorkload::randomInsertOp(TTaskFct cont) {
void ApiWorkload::randomClearOp(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto keys = std::make_shared<std::vector<std::string>>();
auto keys = std::make_shared<std::vector<fdb::Key>>();
for (int i = 0; i < numKeys; i++) {
keys->push_back(randomExistingKey());
}
execTransaction(
[keys](auto ctx) {
for (const auto& key : *keys) {
ctx->tx()->clear(key);
ctx->tx().clear(key);
}
ctx->commit();
},
@ -226,14 +227,14 @@ void ApiWorkload::randomClearOp(TTaskFct cont) {
}
void ApiWorkload::randomClearRangeOp(TTaskFct cont) {
std::string begin = randomKeyName();
std::string end = randomKeyName();
fdb::Key begin = randomKeyName();
fdb::Key end = randomKeyName();
if (begin > end) {
std::swap(begin, end);
}
execTransaction(
[begin, end](auto ctx) {
ctx->tx()->clearRange(begin, end);
ctx->tx().clearRange(begin, end);
ctx->commit();
},
[this, begin, end, cont]() {

View File

@ -94,7 +94,7 @@ protected:
std::atomic<int> numRandomOpLeft;
// Key prefix
std::string keyPrefix;
fdb::Key keyPrefix;
// In-memory store maintaining expected database state
KeyValueStore store;
@ -102,11 +102,11 @@ protected:
ApiWorkload(const WorkloadConfig& config);
// Methods for generating random keys and values
std::string randomKeyName();
std::string randomValue();
std::string randomNotExistingKey();
std::string randomExistingKey();
std::string randomKey(double existingKeyRatio);
fdb::Key randomKeyName();
fdb::Value randomValue();
fdb::Key randomNotExistingKey();
fdb::Key randomExistingKey();
fdb::Key randomKey(double existingKeyRatio);
// Generate initial random data for the workload
void populateData(TTaskFct cont);

View File

@ -1,255 +0,0 @@
/*
* TesterApiWrapper.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TesterApiWrapper.h"
#include <cstdint>
#include <fmt/format.h>
#include <fstream>
namespace FdbApiTester {
namespace {
void fdb_check(fdb_error_t e) {
if (e) {
fmt::print(stderr, "Unexpected error: {}\n", fdb_get_error(e));
std::abort();
}
}
} // namespace
Future::Future(FDBFuture* f) : future_(f, fdb_future_destroy) {}
void Future::reset() {
future_.reset();
}
void Future::cancel() {
ASSERT(future_);
fdb_future_cancel(future_.get());
}
fdb_error_t Future::getError() const {
ASSERT(future_);
return fdb_future_get_error(future_.get());
}
std::optional<std::string> ValueFuture::getValue() const {
ASSERT(future_);
int out_present;
const std::uint8_t* val;
int vallen;
fdb_check(fdb_future_get_value(future_.get(), &out_present, &val, &vallen));
return out_present ? std::make_optional(std::string((const char*)val, vallen)) : std::nullopt;
}
std::vector<KeyValue> KeyRangesFuture::getKeyRanges() const {
ASSERT(future_);
int count;
const FDBKeyRange* ranges;
fdb_check(fdb_future_get_keyrange_array(future_.get(), &ranges, &count));
std::vector<KeyValue> result;
result.reserve(count);
for (int i = 0; i < count; i++) {
FDBKeyRange kr = *ranges++;
KeyValue rkv;
rkv.key = std::string((const char*)kr.begin_key, kr.begin_key_length);
rkv.value = std::string((const char*)kr.end_key, kr.end_key_length);
result.push_back(rkv);
}
return result;
}
Result::Result(FDBResult* r) : result_(r, fdb_result_destroy) {}
std::vector<KeyValue> KeyValuesResult::getKeyValues(bool* more_out) {
ASSERT(result_);
int count;
const FDBKeyValue* kvs;
int more;
std::vector<KeyValue> result;
error_ = fdb_result_get_keyvalue_array(result_.get(), &kvs, &count, &more);
if (error_ != error_code_success) {
return result;
}
result.reserve(count);
for (int i = 0; i < count; i++) {
FDBKeyValue kv = *kvs++;
KeyValue rkv;
rkv.key = std::string((const char*)kv.key, kv.key_length);
rkv.value = std::string((const char*)kv.value, kv.value_length);
result.push_back(rkv);
}
*more_out = more;
return result;
}
// Given an FDBDatabase, initializes a new transaction.
Transaction::Transaction(FDBTransaction* tx) : tx_(tx, fdb_transaction_destroy) {}
ValueFuture Transaction::get(std::string_view key, fdb_bool_t snapshot) {
ASSERT(tx_);
return ValueFuture(fdb_transaction_get(tx_.get(), (const uint8_t*)key.data(), key.size(), snapshot));
}
void Transaction::set(std::string_view key, std::string_view value) {
ASSERT(tx_);
fdb_transaction_set(tx_.get(), (const uint8_t*)key.data(), key.size(), (const uint8_t*)value.data(), value.size());
}
void Transaction::clear(std::string_view key) {
ASSERT(tx_);
fdb_transaction_clear(tx_.get(), (const uint8_t*)key.data(), key.size());
}
void Transaction::clearRange(std::string_view begin, std::string_view end) {
ASSERT(tx_);
fdb_transaction_clear_range(
tx_.get(), (const uint8_t*)begin.data(), begin.size(), (const uint8_t*)end.data(), end.size());
}
Future Transaction::commit() {
ASSERT(tx_);
return Future(fdb_transaction_commit(tx_.get()));
}
void Transaction::cancel() {
ASSERT(tx_);
fdb_transaction_cancel(tx_.get());
}
Future Transaction::onError(fdb_error_t err) {
ASSERT(tx_);
return Future(fdb_transaction_on_error(tx_.get(), err));
}
void Transaction::reset() {
ASSERT(tx_);
fdb_transaction_reset(tx_.get());
}
fdb_error_t Transaction::setOption(FDBTransactionOption option) {
ASSERT(tx_);
return fdb_transaction_set_option(tx_.get(), option, reinterpret_cast<const uint8_t*>(""), 0);
}
class TesterGranuleContext {
public:
std::unordered_map<int64_t, uint8_t*> loadsInProgress;
int64_t nextId = 0;
std::string basePath;
~TesterGranuleContext() {
// if there was an error or not all loads finished, delete data
for (auto& it : loadsInProgress) {
uint8_t* dataToFree = it.second;
delete[] dataToFree;
}
}
};
static int64_t granule_start_load(const char* filename,
int filenameLength,
int64_t offset,
int64_t length,
int64_t fullFileLength,
void* context) {
TesterGranuleContext* ctx = (TesterGranuleContext*)context;
int64_t loadId = ctx->nextId++;
uint8_t* buffer = new uint8_t[length];
std::ifstream fin(ctx->basePath + std::string(filename, filenameLength), std::ios::in | std::ios::binary);
fin.seekg(offset);
fin.read((char*)buffer, length);
ctx->loadsInProgress.insert({ loadId, buffer });
return loadId;
}
static uint8_t* granule_get_load(int64_t loadId, void* context) {
TesterGranuleContext* ctx = (TesterGranuleContext*)context;
return ctx->loadsInProgress.at(loadId);
}
static void granule_free_load(int64_t loadId, void* context) {
TesterGranuleContext* ctx = (TesterGranuleContext*)context;
auto it = ctx->loadsInProgress.find(loadId);
uint8_t* dataToFree = it->second;
delete[] dataToFree;
ctx->loadsInProgress.erase(it);
}
KeyValuesResult Transaction::readBlobGranules(std::string_view begin,
std::string_view end,
const std::string& basePath) {
ASSERT(tx_);
TesterGranuleContext testerContext;
testerContext.basePath = basePath;
FDBReadBlobGranuleContext granuleContext;
granuleContext.userContext = &testerContext;
granuleContext.debugNoMaterialize = false;
granuleContext.granuleParallelism = 1;
granuleContext.start_load_f = &granule_start_load;
granuleContext.get_load_f = &granule_get_load;
granuleContext.free_load_f = &granule_free_load;
return KeyValuesResult(fdb_transaction_read_blob_granules(tx_.get(),
(const uint8_t*)begin.data(),
begin.size(),
(const uint8_t*)end.data(),
end.size(),
0 /* beginVersion */,
-2 /* latest read version */,
granuleContext));
}
KeyRangesFuture Transaction::getBlobGranuleRanges(std::string_view begin, std::string_view end) {
ASSERT(tx_);
return KeyRangesFuture(fdb_transaction_get_blob_granule_ranges(
tx_.get(), (const uint8_t*)begin.data(), begin.size(), (const uint8_t*)end.data(), end.size()));
}
fdb_error_t FdbApi::setOption(FDBNetworkOption option, std::string_view value) {
return fdb_network_set_option(option, reinterpret_cast<const uint8_t*>(value.data()), value.size());
}
fdb_error_t FdbApi::setOption(FDBNetworkOption option, int64_t value) {
return fdb_network_set_option(option, reinterpret_cast<const uint8_t*>(&value), sizeof(value));
}
fdb_error_t FdbApi::setOption(FDBNetworkOption option) {
return fdb_network_set_option(option, reinterpret_cast<const uint8_t*>(""), 0);
}
} // namespace FdbApiTester

View File

@ -1,129 +0,0 @@
/*
* TesterApiWrapper.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef APITESTER_API_WRAPPER_H
#define APITESTER_API_WRAPPER_H
#include <string_view>
#include <optional>
#include <memory>
#include <unordered_map>
#define FDB_API_VERSION 720
#include "bindings/c/foundationdb/fdb_c.h"
#undef ERROR
#define ERROR(name, number, description) enum { error_code_##name = number };
#include "flow/error_definitions.h"
#include "TesterUtil.h"
namespace FdbApiTester {
// Wrapper parent class to manage memory of an FDBFuture pointer. Cleans up
// FDBFuture when this instance goes out of scope.
class Future {
public:
Future() = default;
Future(FDBFuture* f);
FDBFuture* fdbFuture() { return future_.get(); };
fdb_error_t getError() const;
explicit operator bool() const { return future_ != nullptr; };
void reset();
void cancel();
protected:
std::shared_ptr<FDBFuture> future_;
};
class ValueFuture : public Future {
public:
ValueFuture() = default;
ValueFuture(FDBFuture* f) : Future(f) {}
std::optional<std::string> getValue() const;
};
class KeyRangesFuture : public Future {
public:
KeyRangesFuture() = default;
KeyRangesFuture(FDBFuture* f) : Future(f) {}
std::vector<KeyValue> getKeyRanges() const;
};
class Result {
public:
Result() = default;
Result(FDBResult* r);
FDBResult* fdbResult() { return result_.get(); };
fdb_error_t getError() const { return error_; }
explicit operator bool() const { return result_ != nullptr; };
fdb_error_t error_ = error_code_client_invalid_operation; // have to call getX function to set this
protected:
std::shared_ptr<FDBResult> result_;
};
class KeyValuesResult : public Result {
public:
KeyValuesResult() = default;
KeyValuesResult(FDBResult* f) : Result(f) {}
std::vector<KeyValue> getKeyValues(bool* more_out);
};
class Transaction {
public:
Transaction() = default;
Transaction(FDBTransaction* tx);
ValueFuture get(std::string_view key, fdb_bool_t snapshot);
void set(std::string_view key, std::string_view value);
void clear(std::string_view key);
void clearRange(std::string_view begin, std::string_view end);
Future commit();
void cancel();
Future onError(fdb_error_t err);
void reset();
fdb_error_t setOption(FDBTransactionOption option);
KeyValuesResult readBlobGranules(std::string_view begin, std::string_view end, const std::string& basePath);
KeyRangesFuture getBlobGranuleRanges(std::string_view begin, std::string_view end);
private:
std::shared_ptr<FDBTransaction> tx_;
};
class FdbApi {
public:
static fdb_error_t setOption(FDBNetworkOption option, std::string_view value);
static fdb_error_t setOption(FDBNetworkOption option, int64_t value);
static fdb_error_t setOption(FDBNetworkOption option);
};
} // namespace FdbApiTester
#endif

View File

@ -24,6 +24,55 @@
namespace FdbApiTester {
class TesterGranuleContext {
public:
std::unordered_map<int64_t, uint8_t*> loadsInProgress;
int64_t nextId = 0;
std::string basePath;
~TesterGranuleContext() {
// if there was an error or not all loads finished, delete data
for (auto& it : loadsInProgress) {
uint8_t* dataToFree = it.second;
delete[] dataToFree;
}
}
};
static int64_t granule_start_load(const char* filename,
int filenameLength,
int64_t offset,
int64_t length,
int64_t fullFileLength,
void* context) {
TesterGranuleContext* ctx = (TesterGranuleContext*)context;
int64_t loadId = ctx->nextId++;
uint8_t* buffer = new uint8_t[length];
std::ifstream fin(ctx->basePath + std::string(filename, filenameLength), std::ios::in | std::ios::binary);
fin.seekg(offset);
fin.read((char*)buffer, length);
ctx->loadsInProgress.insert({ loadId, buffer });
return loadId;
}
static uint8_t* granule_get_load(int64_t loadId, void* context) {
TesterGranuleContext* ctx = (TesterGranuleContext*)context;
return ctx->loadsInProgress.at(loadId);
}
static void granule_free_load(int64_t loadId, void* context) {
TesterGranuleContext* ctx = (TesterGranuleContext*)context;
auto it = ctx->loadsInProgress.find(loadId);
uint8_t* dataToFree = it->second;
delete[] dataToFree;
ctx->loadsInProgress.erase(it);
}
class ApiBlobGranuleCorrectnessWorkload : public ApiWorkload {
public:
ApiBlobGranuleCorrectnessWorkload(const WorkloadConfig& config) : ApiWorkload(config) {
@ -42,28 +91,41 @@ private:
bool seenReadSuccess = false;
void randomReadOp(TTaskFct cont) {
std::string begin = randomKeyName();
std::string end = randomKeyName();
auto results = std::make_shared<std::vector<KeyValue>>();
fdb::Key begin = randomKeyName();
fdb::Key end = randomKeyName();
auto results = std::make_shared<std::vector<fdb::KeyValue>>();
auto tooOld = std::make_shared<bool>(false);
if (begin > end) {
std::swap(begin, end);
}
execTransaction(
[this, begin, end, results, tooOld](auto ctx) {
ctx->tx()->setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE);
KeyValuesResult res = ctx->tx()->readBlobGranules(begin, end, ctx->getBGBasePath());
bool more = false;
(*results) = res.getKeyValues(&more);
if (res.getError() == error_code_blob_granule_transaction_too_old) {
ctx->tx().setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE);
TesterGranuleContext testerContext;
testerContext.basePath = ctx->getBGBasePath();
fdb::native::FDBReadBlobGranuleContext granuleContext;
granuleContext.userContext = &testerContext;
granuleContext.debugNoMaterialize = false;
granuleContext.granuleParallelism = 1;
granuleContext.start_load_f = &granule_start_load;
granuleContext.get_load_f = &granule_get_load;
granuleContext.free_load_f = &granule_free_load;
fdb::Result res = ctx->tx().readBlobGranules(
begin, end, 0 /* beginVersion */, -2 /* latest read version */, granuleContext);
auto out = fdb::Result::KeyValueRefArray{};
fdb::Error err = res.getKeyValueArrayNothrow(out);
if (err.code() == error_code_blob_granule_transaction_too_old) {
info("BlobGranuleCorrectness::randomReadOp bg too old\n");
ASSERT(!seenReadSuccess);
*tooOld = true;
ctx->done();
} else if (res.getError() != error_code_success) {
ctx->onError(res.getError());
} else if (err.code() != error_code_success) {
ctx->onError(err);
} else {
ASSERT(!more);
auto& [out_kv, out_count, out_more] = out;
ASSERT(!out_more);
if (!seenReadSuccess) {
info("BlobGranuleCorrectness::randomReadOp first success\n");
}
@ -73,7 +135,7 @@ private:
},
[this, begin, end, results, tooOld, cont]() {
if (!*tooOld) {
std::vector<KeyValue> expected = store.getRange(begin, end, store.size(), false);
std::vector<fdb::KeyValue> expected = store.getRange(begin, end, store.size(), false);
if (results->size() != expected.size()) {
error(fmt::format("randomReadOp result size mismatch. expected: {} actual: {}",
expected.size(),
@ -86,8 +148,8 @@ private:
error(fmt::format("randomReadOp key mismatch at {}/{}. expected: {} actual: {}",
i,
results->size(),
expected[i].key,
(*results)[i].key));
fdb::toCharsRef(expected[i].key),
fdb::toCharsRef((*results)[i].key)));
}
ASSERT((*results)[i].key == expected[i].key);
@ -96,9 +158,9 @@ private:
"randomReadOp value mismatch at {}/{}. key: {} expected: {:.80} actual: {:.80}",
i,
results->size(),
expected[i].key,
expected[i].value,
(*results)[i].value));
fdb::toCharsRef(expected[i].key),
fdb::toCharsRef(expected[i].value),
fdb::toCharsRef((*results)[i].value)));
}
ASSERT((*results)[i].value == expected[i].value);
}
@ -108,19 +170,19 @@ private:
}
void randomGetRangesOp(TTaskFct cont) {
std::string begin = randomKeyName();
std::string end = randomKeyName();
auto results = std::make_shared<std::vector<KeyValue>>();
fdb::Key begin = randomKeyName();
fdb::Key end = randomKeyName();
auto results = std::make_shared<std::vector<fdb::KeyRange>>();
if (begin > end) {
std::swap(begin, end);
}
execTransaction(
[begin, end, results](auto ctx) {
KeyRangesFuture f = ctx->tx()->getBlobGranuleRanges(begin, end);
fdb::Future f = ctx->tx().getBlobGranuleRanges(begin, end).eraseType();
ctx->continueAfter(
f,
[ctx, f, results]() {
(*results) = f.getKeyRanges();
*results = copyKeyRangeArray(f.get<fdb::future_var::KeyRangeRefArray>());
ctx->done();
},
true);
@ -128,18 +190,18 @@ private:
[this, begin, end, results, cont]() {
if (seenReadSuccess) {
ASSERT(results->size() > 0);
ASSERT(results->front().key <= begin);
ASSERT(results->back().value >= end);
ASSERT(results->front().beginKey <= begin);
ASSERT(results->back().endKey >= end);
}
for (int i = 0; i < results->size(); i++) {
// no empty or inverted ranges
ASSERT((*results)[i].key < (*results)[i].value);
ASSERT((*results)[i].beginKey < (*results)[i].endKey);
}
for (int i = 1; i < results->size(); i++) {
// ranges contain entire requested key range
ASSERT((*results)[i].key == (*results)[i - 1].value);
ASSERT((*results)[i].beginKey == (*results)[i - 1].endKey);
}
schedule(cont);

View File

@ -19,6 +19,7 @@
*/
#include "TesterApiWorkload.h"
#include "TesterUtil.h"
#include "test/fdb_api.hpp"
namespace FdbApiTester {
@ -32,15 +33,15 @@ private:
// Start multiple concurrent gets and cancel the transaction
void randomCancelGetTx(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto keys = std::make_shared<std::vector<std::string>>();
auto keys = std::make_shared<std::vector<fdb::Key>>();
for (int i = 0; i < numKeys; i++) {
keys->push_back(randomKey(readExistingKeysRatio));
}
execTransaction(
[keys](auto ctx) {
std::vector<Future> futures;
std::vector<fdb::Future> futures;
for (const auto& key : *keys) {
futures.push_back(ctx->tx()->get(key, false));
futures.push_back(ctx->tx().get(key, false).eraseType());
}
ctx->done();
},
@ -50,24 +51,25 @@ private:
// Start multiple concurrent gets and cancel the transaction after the first get returns
void randomCancelAfterFirstResTx(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto keys = std::make_shared<std::vector<std::string>>();
auto keys = std::make_shared<std::vector<fdb::Key>>();
for (int i = 0; i < numKeys; i++) {
keys->push_back(randomKey(readExistingKeysRatio));
}
execTransaction(
[this, keys](auto ctx) {
std::vector<ValueFuture> futures;
std::vector<fdb::Future> futures;
for (const auto& key : *keys) {
futures.push_back(ctx->tx()->get(key, false));
futures.push_back(ctx->tx().get(key, false).eraseType());
}
for (int i = 0; i < keys->size(); i++) {
ValueFuture f = futures[i];
fdb::Future f = futures[i];
auto expectedVal = store.get((*keys)[i]);
ctx->continueAfter(f, [expectedVal, f, this, ctx]() {
auto val = f.getValue();
auto val = f.get<fdb::future_var::ValueRef>();
if (expectedVal != val) {
error(fmt::format(
"cancelAfterFirstResTx mismatch. expected: {:.80} actual: {:.80}", expectedVal, val));
error(fmt::format("cancelAfterFirstResTx mismatch. expected: {:.80} actual: {:.80}",
fdb::toCharsRef(expectedVal.value()),
fdb::toCharsRef(val.value())));
}
ctx->done();
});

View File

@ -19,6 +19,7 @@
*/
#include "TesterApiWorkload.h"
#include "TesterUtil.h"
#include "test/fdb_api.hpp"
#include <memory>
#include <fmt/format.h>
@ -33,36 +34,36 @@ private:
void randomCommitReadOp(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto kvPairs = std::make_shared<std::vector<KeyValue>>();
auto kvPairs = std::make_shared<std::vector<fdb::KeyValue>>();
for (int i = 0; i < numKeys; i++) {
kvPairs->push_back(KeyValue{ randomKey(readExistingKeysRatio), randomValue() });
kvPairs->push_back(fdb::KeyValue{ randomKey(readExistingKeysRatio), randomValue() });
}
execTransaction(
[kvPairs](auto ctx) {
for (const KeyValue& kv : *kvPairs) {
ctx->tx()->set(kv.key, kv.value);
for (const fdb::KeyValue& kv : *kvPairs) {
ctx->tx().set(kv.key, kv.value);
}
ctx->commit();
},
[this, kvPairs, cont]() {
for (const KeyValue& kv : *kvPairs) {
for (const fdb::KeyValue& kv : *kvPairs) {
store.set(kv.key, kv.value);
}
auto results = std::make_shared<std::vector<std::optional<std::string>>>();
auto results = std::make_shared<std::vector<std::optional<fdb::Value>>>();
execTransaction(
[kvPairs, results, this](auto ctx) {
if (apiVersion >= 710) {
// Test GRV caching in 7.1 and later
ctx->tx()->setOption(FDB_TR_OPTION_USE_GRV_CACHE);
ctx->tx().setOption(FDB_TR_OPTION_USE_GRV_CACHE);
}
auto futures = std::make_shared<std::vector<Future>>();
auto futures = std::make_shared<std::vector<fdb::Future>>();
for (const auto& kv : *kvPairs) {
futures->push_back(ctx->tx()->get(kv.key, false));
futures->push_back(ctx->tx().get(kv.key, false));
}
ctx->continueAfterAll(*futures, [ctx, futures, results]() {
results->clear();
for (auto& f : *futures) {
results->push_back(((ValueFuture&)f).getValue());
results->push_back(copyValueRef(f.get<fdb::future_var::ValueRef>()));
}
ASSERT(results->size() == futures->size());
ctx->done();
@ -76,9 +77,9 @@ private:
if (actual != expected) {
error(
fmt::format("randomCommitReadOp mismatch. key: {} expected: {:.80} actual: {:.80}",
(*kvPairs)[i].key,
expected,
actual));
fdb::toCharsRef((*kvPairs)[i].key),
fdb::toCharsRef(expected.value()),
fdb::toCharsRef(actual.value())));
ASSERT(false);
}
}
@ -89,21 +90,21 @@ private:
void randomGetOp(TTaskFct cont) {
int numKeys = Random::get().randomInt(1, maxKeysPerTransaction);
auto keys = std::make_shared<std::vector<std::string>>();
auto results = std::make_shared<std::vector<std::optional<std::string>>>();
auto keys = std::make_shared<std::vector<fdb::Key>>();
auto results = std::make_shared<std::vector<std::optional<fdb::Value>>>();
for (int i = 0; i < numKeys; i++) {
keys->push_back(randomKey(readExistingKeysRatio));
}
execTransaction(
[keys, results](auto ctx) {
auto futures = std::make_shared<std::vector<Future>>();
auto futures = std::make_shared<std::vector<fdb::Future>>();
for (const auto& key : *keys) {
futures->push_back(ctx->tx()->get(key, false));
futures->push_back(ctx->tx().get(key, false));
}
ctx->continueAfterAll(*futures, [ctx, futures, results]() {
results->clear();
for (auto& f : *futures) {
results->push_back(((ValueFuture&)f).getValue());
results->push_back(copyValueRef(f.get<fdb::future_var::ValueRef>()));
}
ASSERT(results->size() == futures->size());
ctx->done();
@ -115,9 +116,9 @@ private:
auto expected = store.get((*keys)[i]);
if ((*results)[i] != expected) {
error(fmt::format("randomGetOp mismatch. key: {} expected: {:.80} actual: {:.80}",
(*keys)[i],
expected,
(*results)[i]));
fdb::toCharsRef((*keys)[i]),
fdb::toCharsRef(expected.value()),
fdb::toCharsRef((*results)[i].value())));
}
}
schedule(cont);

View File

@ -23,26 +23,26 @@
namespace FdbApiTester {
// Get the value associated with a key
std::optional<std::string> KeyValueStore::get(std::string_view key) const {
std::optional<fdb::Value> KeyValueStore::get(fdb::KeyRef key) const {
std::unique_lock<std::mutex> lock(mutex);
auto value = store.find(std::string(key));
auto value = store.find(fdb::Key(key));
if (value != store.end())
return value->second;
else
return std::optional<std::string>();
return std::optional<fdb::Value>();
}
// Checks if the key exists
bool KeyValueStore::exists(std::string_view key) {
bool KeyValueStore::exists(fdb::KeyRef key) {
std::unique_lock<std::mutex> lock(mutex);
return (store.find(std::string(key)) != store.end());
return (store.find(fdb::Key(key)) != store.end());
}
// Returns the key designated by a key selector
std::string KeyValueStore::getKey(std::string_view keyName, bool orEqual, int offset) const {
fdb::Key KeyValueStore::getKey(fdb::KeyRef keyName, bool orEqual, int offset) const {
std::unique_lock<std::mutex> lock(mutex);
// Begin by getting the start key referenced by the key selector
std::map<std::string, std::string>::const_iterator mapItr = store.lower_bound(keyName);
std::map<fdb::Key, fdb::Value>::const_iterator mapItr = store.lower_bound(keyName);
// Update the iterator position if necessary based on the value of orEqual
int count = 0;
@ -88,28 +88,25 @@ std::string KeyValueStore::getKey(std::string_view keyName, bool orEqual, int of
}
// Gets a range of key-value pairs, returning a maximum of <limit> results
std::vector<KeyValue> KeyValueStore::getRange(std::string_view begin,
std::string_view end,
int limit,
bool reverse) const {
std::vector<fdb::KeyValue> KeyValueStore::getRange(fdb::KeyRef begin, fdb::KeyRef end, int limit, bool reverse) const {
std::unique_lock<std::mutex> lock(mutex);
std::vector<KeyValue> results;
std::vector<fdb::KeyValue> results;
if (!reverse) {
std::map<std::string, std::string>::const_iterator mapItr = store.lower_bound(begin);
std::map<fdb::Key, fdb::Value>::const_iterator mapItr = store.lower_bound(begin);
for (; mapItr != store.end() && mapItr->first < end && results.size() < limit; mapItr++)
results.push_back(KeyValue{ mapItr->first, mapItr->second });
results.push_back(fdb::KeyValue{ mapItr->first, mapItr->second });
}
// Support for reverse getRange queries is supported, but not tested at this time. This is because reverse range
// queries have been disallowed by the database at the API level
else {
std::map<std::string, std::string>::const_iterator mapItr = store.lower_bound(end);
std::map<fdb::Key, fdb::Value>::const_iterator mapItr = store.lower_bound(end);
if (mapItr == store.begin())
return results;
for (--mapItr; mapItr->first >= begin && results.size() < abs(limit); mapItr--) {
results.push_back(KeyValue{ mapItr->first, mapItr->second });
results.push_back(fdb::KeyValue{ mapItr->first, mapItr->second });
if (mapItr == store.begin())
break;
}
@ -119,13 +116,13 @@ std::vector<KeyValue> KeyValueStore::getRange(std::string_view begin,
}
// Stores a key-value pair in the database
void KeyValueStore::set(std::string_view key, std::string_view value) {
void KeyValueStore::set(fdb::KeyRef key, fdb::ValueRef value) {
std::unique_lock<std::mutex> lock(mutex);
store[std::string(key)] = value;
store[fdb::Key(key)] = value;
}
// Removes a key from the database
void KeyValueStore::clear(std::string_view key) {
void KeyValueStore::clear(fdb::KeyRef key) {
std::unique_lock<std::mutex> lock(mutex);
auto iter = store.find(key);
if (iter != store.end()) {
@ -134,7 +131,7 @@ void KeyValueStore::clear(std::string_view key) {
}
// Removes a range of keys from the database
void KeyValueStore::clear(std::string_view begin, std::string_view end) {
void KeyValueStore::clear(fdb::KeyRef begin, fdb::KeyRef end) {
std::unique_lock<std::mutex> lock(mutex);
store.erase(store.lower_bound(begin), store.lower_bound(end));
}
@ -146,20 +143,20 @@ uint64_t KeyValueStore::size() const {
}
// The first key in the database; returned by key selectors that choose a key off the front
std::string KeyValueStore::startKey() const {
return "";
fdb::Key KeyValueStore::startKey() const {
return fdb::Key();
}
// The last key in the database; returned by key selectors that choose a key off the back
std::string KeyValueStore::endKey() const {
return "\xff";
fdb::Key KeyValueStore::endKey() const {
return fdb::Key(1, '\xff');
}
// Debugging function that prints all key-value pairs
void KeyValueStore::printContents() const {
std::unique_lock<std::mutex> lock(mutex);
printf("Contents:\n");
std::map<std::string, std::string>::const_iterator mapItr;
std::map<fdb::Key, fdb::Value>::const_iterator mapItr;
for (mapItr = store.begin(); mapItr != store.end(); mapItr++)
printf("%s\n", mapItr->first.c_str());
}

View File

@ -37,41 +37,41 @@ namespace FdbApiTester {
class KeyValueStore {
public:
// Get the value associated with a key
std::optional<std::string> get(std::string_view key) const;
std::optional<fdb::Value> get(fdb::KeyRef key) const;
// Checks if the key exists
bool exists(std::string_view key);
bool exists(fdb::KeyRef key);
// Returns the key designated by a key selector
std::string getKey(std::string_view keyName, bool orEqual, int offset) const;
fdb::Key getKey(fdb::KeyRef keyName, bool orEqual, int offset) const;
// Gets a range of key-value pairs, returning a maximum of <limit> results
std::vector<KeyValue> getRange(std::string_view begin, std::string_view end, int limit, bool reverse) const;
std::vector<fdb::KeyValue> getRange(fdb::KeyRef begin, fdb::KeyRef end, int limit, bool reverse) const;
// Stores a key-value pair in the database
void set(std::string_view key, std::string_view value);
void set(fdb::KeyRef key, fdb::ValueRef value);
// Removes a key from the database
void clear(std::string_view key);
void clear(fdb::KeyRef key);
// Removes a range of keys from the database
void clear(std::string_view begin, std::string_view end);
void clear(fdb::KeyRef begin, fdb::KeyRef end);
// The number of keys in the database
uint64_t size() const;
// The first key in the database; returned by key selectors that choose a key off the front
std::string startKey() const;
fdb::Key startKey() const;
// The last key in the database; returned by key selectors that choose a key off the back
std::string endKey() const;
fdb::Key endKey() const;
// Debugging function that prints all key-value pairs
void printContents() const;
private:
// A map holding the key-value pairs
std::map<std::string, std::string, std::less<>> store;
std::map<fdb::Key, fdb::Value, std::less<>> store;
mutable std::mutex mutex;
};

View File

@ -22,6 +22,7 @@
#include "TesterUtil.h"
#include "foundationdb/fdb_c_types.h"
#include "test/apitester/TesterScheduler.h"
#include "test/fdb_api.hpp"
#include <memory>
#include <stdexcept>
#include <unordered_map>
@ -36,24 +37,24 @@ namespace FdbApiTester {
constexpr int LONG_WAIT_TIME_US = 2000000;
constexpr int LARGE_NUMBER_OF_RETRIES = 10;
void TransactionActorBase::complete(fdb_error_t err) {
void TransactionActorBase::complete(fdb::Error err) {
error = err;
context = {};
}
void ITransactionContext::continueAfterAll(std::vector<Future> futures, TTaskFct cont) {
void ITransactionContext::continueAfterAll(std::vector<fdb::Future> futures, TTaskFct cont) {
auto counter = std::make_shared<std::atomic<int>>(futures.size());
auto errorCode = std::make_shared<std::atomic<fdb_error_t>>(error_code_success);
auto errorCode = std::make_shared<std::atomic<fdb::Error>>(fdb::Error::success());
auto thisPtr = shared_from_this();
for (auto& f : futures) {
continueAfter(
f,
[thisPtr, f, counter, errorCode, cont]() {
if (f.getError() != error_code_success) {
(*errorCode) = f.getError();
if (f.error().code() != error_code_success) {
(*errorCode) = f.error();
}
if (--(*counter) == 0) {
if (*errorCode == error_code_success) {
if (errorCode->load().code() == error_code_success) {
// all futures successful -> continue
cont();
} else {
@ -71,7 +72,7 @@ void ITransactionContext::continueAfterAll(std::vector<Future> futures, TTaskFct
*/
class TransactionContextBase : public ITransactionContext {
public:
TransactionContextBase(FDBTransaction* tx,
TransactionContextBase(fdb::Transaction tx,
std::shared_ptr<ITransactionActor> txActor,
TTaskFct cont,
IScheduler* scheduler,
@ -84,10 +85,12 @@ public:
// IN_PROGRESS -> (ON_ERROR -> IN_PROGRESS)* [-> ON_ERROR] -> DONE
enum class TxState { IN_PROGRESS, ON_ERROR, DONE };
Transaction* tx() override { return &fdbTx; }
fdb::Transaction tx() override { return fdbTx; }
// Set a continuation to be executed when a future gets ready
void continueAfter(Future f, TTaskFct cont, bool retryOnError) override { doContinueAfter(f, cont, retryOnError); }
void continueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override {
doContinueAfter(f, cont, retryOnError);
}
// Complete the transaction with a commit
void commit() override {
@ -97,7 +100,7 @@ public:
}
commitCalled = true;
lock.unlock();
Future f = fdbTx.commit();
fdb::Future f = fdbTx.commit();
auto thisRef = shared_from_this();
doContinueAfter(
f, [thisRef]() { thisRef->done(); }, true);
@ -114,12 +117,12 @@ public:
if (retriedErrors.size() >= LARGE_NUMBER_OF_RETRIES) {
fmt::print("Transaction succeeded after {} retries on errors: {}\n",
retriedErrors.size(),
fmt::join(retriedErrors, ", "));
fmt::join(retriedErrorCodes(), ", "));
}
// cancel transaction so that any pending operations on it
// fail gracefully
fdbTx.cancel();
txActor->complete(error_code_success);
txActor->complete(fdb::Error::success());
cleanUp();
contAfterDone();
}
@ -127,7 +130,7 @@ public:
std::string getBGBasePath() override { return bgBasePath; }
protected:
virtual void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) = 0;
virtual void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) = 0;
// Clean up transaction state after completing the transaction
// Note that the object may live longer, because it is referenced
@ -139,8 +142,8 @@ protected:
}
// Complete the transaction with an (unretriable) error
void transactionFailed(fdb_error_t err) {
ASSERT(err != error_code_success);
void transactionFailed(fdb::Error err) {
ASSERT(err);
std::unique_lock<std::mutex> lock(mutex);
if (txState == TxState::DONE) {
return;
@ -155,7 +158,7 @@ protected:
// Handle result of an a transaction onError call
void handleOnErrorResult() {
ASSERT(txState == TxState::ON_ERROR);
fdb_error_t err = onErrorFuture.getError();
fdb::Error err = onErrorFuture.error();
onErrorFuture = {};
if (err) {
transactionFailed(err);
@ -169,24 +172,32 @@ protected:
}
// Checks if a transaction can be retried. Fails the transaction if the check fails
bool canRetry(fdb_error_t lastErr) {
bool canRetry(fdb::Error lastErr) {
ASSERT(txState == TxState::ON_ERROR);
retriedErrors.push_back(lastErr);
if (retryLimit == 0 || retriedErrors.size() <= retryLimit) {
if (retriedErrors.size() == LARGE_NUMBER_OF_RETRIES) {
fmt::print("Transaction already retried {} times, on errors: {}\n",
retriedErrors.size(),
fmt::join(retriedErrors, ", "));
fmt::join(retriedErrorCodes(), ", "));
}
return true;
}
fmt::print("Transaction retry limit reached. Retried on errors: {}\n", fmt::join(retriedErrors, ", "));
fmt::print("Transaction retry limit reached. Retried on errors: {}\n", fmt::join(retriedErrorCodes(), ", "));
transactionFailed(lastErr);
return false;
}
std::vector<fdb::Error::CodeType> retriedErrorCodes() {
std::vector<fdb::Error::CodeType> retriedErrorCodes;
for (auto e : retriedErrors) {
retriedErrorCodes.push_back(e.code());
}
return retriedErrorCodes;
}
// FDB transaction
Transaction fdbTx;
fdb::Transaction fdbTx;
// Actor implementing the transaction worklflow
std::shared_ptr<ITransactionActor> txActor;
@ -207,10 +218,10 @@ protected:
TxState txState;
// onError future used in ON_ERROR state
Future onErrorFuture;
fdb::Future onErrorFuture;
// The error code on which onError was called
fdb_error_t onErrorArg;
fdb::Error onErrorArg;
// The time point of calling onError
TimePoint onErrorCallTimePoint;
@ -219,7 +230,7 @@ protected:
bool commitCalled;
// A history of errors on which the transaction was retried
std::vector<fdb_error_t> retriedErrors;
std::vector<fdb::Error> retriedErrors;
// blob granule base path
std::string bgBasePath;
@ -230,7 +241,7 @@ protected:
*/
class BlockingTransactionContext : public TransactionContextBase {
public:
BlockingTransactionContext(FDBTransaction* tx,
BlockingTransactionContext(fdb::Transaction tx,
std::shared_ptr<ITransactionActor> txActor,
TTaskFct cont,
IScheduler* scheduler,
@ -239,37 +250,37 @@ public:
: TransactionContextBase(tx, txActor, cont, scheduler, retryLimit, bgBasePath) {}
protected:
void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) override {
void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override {
auto thisRef = std::static_pointer_cast<BlockingTransactionContext>(shared_from_this());
scheduler->schedule(
[thisRef, f, cont, retryOnError]() mutable { thisRef->blockingContinueAfter(f, cont, retryOnError); });
}
void blockingContinueAfter(Future f, TTaskFct cont, bool retryOnError) {
void blockingContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) {
std::unique_lock<std::mutex> lock(mutex);
if (txState != TxState::IN_PROGRESS) {
return;
}
lock.unlock();
auto start = timeNow();
fdb_error_t err = fdb_future_block_until_ready(f.fdbFuture());
fdb::Error err = f.blockUntilReady();
if (err) {
transactionFailed(err);
return;
}
err = f.getError();
err = f.error();
auto waitTimeUs = timeElapsedInUs(start);
if (waitTimeUs > LONG_WAIT_TIME_US) {
fmt::print("Long waiting time on a future: {:.3f}s, return code {} ({}), commit called: {}\n",
microsecToSec(waitTimeUs),
err,
fdb_get_error(err),
err.code(),
err.what(),
commitCalled);
}
if (err == error_code_transaction_cancelled) {
if (err.code() == error_code_transaction_cancelled) {
return;
}
if (err == error_code_success || !retryOnError) {
if (err.code() == error_code_success || !retryOnError) {
scheduler->schedule([cont]() { cont(); });
return;
}
@ -277,7 +288,7 @@ protected:
onError(err);
}
virtual void onError(fdb_error_t err) override {
virtual void onError(fdb::Error err) override {
std::unique_lock<std::mutex> lock(mutex);
if (txState != TxState::IN_PROGRESS) {
// Ignore further errors, if the transaction is in the error handing mode or completed
@ -295,19 +306,19 @@ protected:
onErrorArg = err;
auto start = timeNow();
fdb_error_t err2 = fdb_future_block_until_ready(onErrorFuture.fdbFuture());
fdb::Error err2 = onErrorFuture.blockUntilReady();
if (err2) {
transactionFailed(err2);
return;
}
auto waitTimeUs = timeElapsedInUs(start);
if (waitTimeUs > LONG_WAIT_TIME_US) {
fdb_error_t err3 = onErrorFuture.getError();
fdb::Error err3 = onErrorFuture.error();
fmt::print("Long waiting time on onError({}) future: {:.3f}s, return code {} ({})\n",
onErrorArg,
onErrorArg.code(),
microsecToSec(waitTimeUs),
err3,
fdb_get_error(err3));
err3.code(),
err3.what());
}
auto thisRef = std::static_pointer_cast<BlockingTransactionContext>(shared_from_this());
scheduler->schedule([thisRef]() { thisRef->handleOnErrorResult(); });
@ -319,7 +330,7 @@ protected:
*/
class AsyncTransactionContext : public TransactionContextBase {
public:
AsyncTransactionContext(FDBTransaction* tx,
AsyncTransactionContext(fdb::Transaction tx,
std::shared_ptr<ITransactionActor> txActor,
TTaskFct cont,
IScheduler* scheduler,
@ -328,23 +339,24 @@ public:
: TransactionContextBase(tx, txActor, cont, scheduler, retryLimit, bgBasePath) {}
protected:
void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) override {
void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override {
std::unique_lock<std::mutex> lock(mutex);
if (txState != TxState::IN_PROGRESS) {
return;
}
callbackMap[f.fdbFuture()] = CallbackInfo{ f, cont, shared_from_this(), retryOnError, timeNow() };
callbackMap[f] = CallbackInfo{ f, cont, shared_from_this(), retryOnError, timeNow() };
lock.unlock();
fdb_error_t err = fdb_future_set_callback(f.fdbFuture(), futureReadyCallback, this);
if (err) {
try {
f.then([this](fdb::Future f) { futureReadyCallback(f, this); });
} catch (std::runtime_error& err) {
lock.lock();
callbackMap.erase(f.fdbFuture());
callbackMap.erase(f);
lock.unlock();
transactionFailed(err);
transactionFailed(fdb::Error(error_code_operation_failed));
}
}
static void futureReadyCallback(FDBFuture* f, void* param) {
static void futureReadyCallback(fdb::Future f, void* param) {
try {
AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param;
txCtx->onFutureReady(f);
@ -357,7 +369,7 @@ protected:
}
}
void onFutureReady(FDBFuture* f) {
void onFutureReady(fdb::Future f) {
auto endTime = timeNow();
injectRandomSleep();
// Hold a reference to this to avoid it to be
@ -372,25 +384,25 @@ protected:
return;
}
lock.unlock();
fdb_error_t err = fdb_future_get_error(f);
fdb::Error err = f.error();
auto waitTimeUs = timeElapsedInUs(cbInfo.startTime, endTime);
if (waitTimeUs > LONG_WAIT_TIME_US) {
fmt::print("Long waiting time on a future: {:.3f}s, return code {} ({})\n",
microsecToSec(waitTimeUs),
err,
fdb_get_error(err));
err.code(),
err.what());
}
if (err == error_code_transaction_cancelled) {
if (err.code() == error_code_transaction_cancelled) {
return;
}
if (err == error_code_success || !cbInfo.retryOnError) {
if (err.code() == error_code_success || !cbInfo.retryOnError) {
scheduler->schedule(cbInfo.cont);
return;
}
onError(err);
}
virtual void onError(fdb_error_t err) override {
virtual void onError(fdb::Error err) override {
std::unique_lock<std::mutex> lock(mutex);
if (txState != TxState::IN_PROGRESS) {
// Ignore further errors, if the transaction is in the error handing mode or completed
@ -405,17 +417,18 @@ protected:
ASSERT(!onErrorFuture);
onErrorArg = err;
onErrorFuture = tx()->onError(err);
onErrorFuture = tx().onError(err);
onErrorCallTimePoint = timeNow();
onErrorThisRef = std::static_pointer_cast<AsyncTransactionContext>(shared_from_this());
fdb_error_t err2 = fdb_future_set_callback(onErrorFuture.fdbFuture(), onErrorReadyCallback, this);
if (err2) {
try {
onErrorFuture.then([this](fdb::Future f) { onErrorReadyCallback(f, this); });
} catch (...) {
onErrorFuture = {};
transactionFailed(err2);
transactionFailed(fdb::Error(error_code_operation_failed));
}
}
static void onErrorReadyCallback(FDBFuture* f, void* param) {
static void onErrorReadyCallback(fdb::Future f, void* param) {
try {
AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param;
txCtx->onErrorReady(f);
@ -428,15 +441,15 @@ protected:
}
}
void onErrorReady(FDBFuture* f) {
void onErrorReady(fdb::Future f) {
auto waitTimeUs = timeElapsedInUs(onErrorCallTimePoint);
if (waitTimeUs > LONG_WAIT_TIME_US) {
fdb_error_t err = onErrorFuture.getError();
fdb::Error err = onErrorFuture.error();
fmt::print("Long waiting time on onError({}): {:.3f}s, return code {} ({})\n",
onErrorArg,
onErrorArg.code(),
microsecToSec(waitTimeUs),
err,
fdb_get_error(err));
err.code(),
err.what());
}
injectRandomSleep();
auto thisRef = onErrorThisRef;
@ -450,7 +463,7 @@ protected:
// Cancel all pending operations
// Note that the callbacks of the cancelled futures will still be called
std::unique_lock<std::mutex> lock(mutex);
std::vector<Future> futures;
std::vector<fdb::Future> futures;
for (auto& iter : callbackMap) {
futures.push_back(iter.second.future);
}
@ -469,7 +482,7 @@ protected:
// Object references for a future callback
struct CallbackInfo {
Future future;
fdb::Future future;
TTaskFct cont;
std::shared_ptr<ITransactionContext> thisRef;
bool retryOnError;
@ -477,7 +490,7 @@ protected:
};
// Map for keeping track of future waits and holding necessary object references
std::unordered_map<FDBFuture*, CallbackInfo> callbackMap;
std::unordered_map<fdb::Future, CallbackInfo> callbackMap;
// Holding reference to this for onError future C callback
std::shared_ptr<AsyncTransactionContext> onErrorThisRef;
@ -498,13 +511,9 @@ public:
protected:
// Execute the transaction on the given database instance
void executeOnDatabase(FDBDatabase* db, std::shared_ptr<ITransactionActor> txActor, TTaskFct cont) {
FDBTransaction* tx;
fdb_error_t err = fdb_database_create_transaction(db, &tx);
if (err != error_code_success) {
txActor->complete(err);
cont();
} else {
void executeOnDatabase(fdb::Database db, std::shared_ptr<ITransactionActor> txActor, TTaskFct cont) {
try {
fdb::Transaction tx = db.createTransaction();
std::shared_ptr<ITransactionContext> ctx;
if (options.blockOnFutures) {
ctx = std::make_shared<BlockingTransactionContext>(
@ -515,6 +524,9 @@ protected:
}
txActor->init(ctx);
txActor->start();
} catch (...) {
txActor->complete(fdb::Error(error_code_operation_failed));
cont();
}
}
@ -537,14 +549,7 @@ public:
void init(IScheduler* scheduler, const char* clusterFile, const std::string& bgBasePath) override {
TransactionExecutorBase::init(scheduler, clusterFile, bgBasePath);
for (int i = 0; i < options.numDatabases; i++) {
FDBDatabase* db;
fdb_error_t err = fdb_create_database(clusterFile, &db);
if (err != error_code_success) {
throw TesterError(fmt::format("Failed create database with the cluster file '{}'. Error: {}({})",
clusterFile,
err,
fdb_get_error(err)));
}
fdb::Database db(clusterFile);
databases.push_back(db);
}
}
@ -554,14 +559,10 @@ public:
executeOnDatabase(databases[idx], txActor, cont);
}
void release() {
for (FDBDatabase* db : databases) {
fdb_database_destroy(db);
}
}
void release() { databases.clear(); }
private:
std::vector<FDBDatabase*> databases;
std::vector<fdb::Database> databases;
};
/**
@ -572,16 +573,8 @@ public:
DBPerTransactionExecutor(const TransactionExecutorOptions& options) : TransactionExecutorBase(options) {}
void execute(std::shared_ptr<ITransactionActor> txActor, TTaskFct cont) override {
FDBDatabase* db = nullptr;
fdb_error_t err = fdb_create_database(clusterFile.c_str(), &db);
if (err != error_code_success) {
txActor->complete(err);
cont();
}
executeOnDatabase(db, txActor, [cont, db]() {
fdb_database_destroy(db);
cont();
});
fdb::Database db(clusterFile.c_str());
executeOnDatabase(db, txActor, cont);
}
};

View File

@ -23,8 +23,8 @@
#ifndef APITESTER_TRANSACTION_EXECUTOR_H
#define APITESTER_TRANSACTION_EXECUTOR_H
#include "test/fdb_api.hpp"
#include "TesterOptions.h"
#include "TesterApiWrapper.h"
#include "TesterScheduler.h"
#include <string_view>
#include <memory>
@ -39,18 +39,18 @@ public:
virtual ~ITransactionContext() {}
// Current FDB transaction
virtual Transaction* tx() = 0;
virtual fdb::Transaction tx() = 0;
// Schedule a continuation to be executed when the future gets ready
// retryOnError controls whether transaction is retried in case of an error instead
// of calling the continuation
virtual void continueAfter(Future f, TTaskFct cont, bool retryOnError = true) = 0;
virtual void continueAfter(fdb::Future f, TTaskFct cont, bool retryOnError = true) = 0;
// Complete the transaction with a commit
virtual void commit() = 0;
// retry transaction on error
virtual void onError(fdb_error_t err) = 0;
virtual void onError(fdb::Error err) = 0;
// Mark the transaction as completed without committing it (for read transactions)
virtual void done() = 0;
@ -59,7 +59,7 @@ public:
virtual std::string getBGBasePath() = 0;
// A continuation to be executed when all of the given futures get ready
virtual void continueAfterAll(std::vector<Future> futures, TTaskFct cont);
virtual void continueAfterAll(std::vector<fdb::Future> futures, TTaskFct cont);
};
/**
@ -76,10 +76,10 @@ public:
virtual void start() = 0;
// Transaction completion result (error_code_success in case of success)
virtual fdb_error_t getErrorCode() = 0;
virtual fdb::Error getError() = 0;
// Notification about the completion of the transaction
virtual void complete(fdb_error_t err) = 0;
virtual void complete(fdb::Error err) = 0;
};
/**
@ -88,15 +88,15 @@ public:
class TransactionActorBase : public ITransactionActor {
public:
void init(std::shared_ptr<ITransactionContext> ctx) override { context = ctx; }
fdb_error_t getErrorCode() override { return error; }
void complete(fdb_error_t err) override;
fdb::Error getError() override { return error; }
void complete(fdb::Error err) override;
protected:
std::shared_ptr<ITransactionContext> ctx() { return context; }
private:
std::shared_ptr<ITransactionContext> context;
fdb_error_t error = error_code_success;
fdb::Error error = fdb::Error::success();
};
// Type of the lambda functions implementing a transaction

View File

@ -26,8 +26,8 @@
namespace FdbApiTester {
std::string lowerCase(const std::string& str) {
std::string res = str;
fdb::ByteString lowerCase(fdb::BytesRef str) {
fdb::ByteString res(str);
std::transform(res.begin(), res.end(), res.begin(), ::tolower);
return res;
}
@ -46,9 +46,9 @@ Random& Random::get() {
return random;
}
std::string Random::randomStringLowerCase(int minLength, int maxLength) {
fdb::ByteString Random::randomStringLowerCase(int minLength, int maxLength) {
int length = randomInt(minLength, maxLength);
std::string str;
fdb::ByteString str;
str.reserve(length);
for (int i = 0; i < length; i++) {
str += (char)randomInt('a', 'z');
@ -65,4 +65,45 @@ void print_internal_error(const char* msg, const char* file, int line) {
fflush(stderr);
}
std::optional<fdb::Value> copyValueRef(fdb::future_var::ValueRef::Type value) {
if (value) {
return std::make_optional(fdb::Value(value.value()));
} else {
return std::nullopt;
}
}
KeyValueArray copyKeyValueArray(fdb::future_var::KeyValueRefArray::Type array) {
auto& [in_kvs, in_count, in_more] = array;
KeyValueArray out;
auto& [out_kv, out_more] = out;
out_more = in_more;
out_kv.clear();
for (int i = 0; i < in_count; ++i) {
fdb::native::FDBKeyValue nativeKv = *in_kvs++;
fdb::KeyValue kv;
kv.key = fdb::Key(nativeKv.key, nativeKv.key_length);
kv.value = fdb::Value(nativeKv.value, nativeKv.value_length);
out_kv.push_back(kv);
}
return out;
};
KeyRangeArray copyKeyRangeArray(fdb::future_var::KeyRangeRefArray::Type array) {
auto& [in_ranges, in_count] = array;
KeyRangeArray out;
for (int i = 0; i < in_count; ++i) {
fdb::native::FDBKeyRange nativeKr = *in_ranges++;
fdb::KeyRange range;
range.beginKey = fdb::Key(nativeKr.begin_key, nativeKr.begin_key_length);
range.endKey = fdb::Key(nativeKr.end_key, nativeKr.end_key_length);
out.push_back(range);
}
return out;
};
} // namespace FdbApiTester

View File

@ -29,6 +29,13 @@
#include <fmt/format.h>
#include <chrono>
#include "test/fdb_api.hpp"
#undef ERROR
#define ERROR(name, number, description) enum { error_code_##name = number };
#include "flow/error_definitions.h"
namespace fmt {
// fmt::format formatting for std::optional<T>
@ -49,12 +56,7 @@ struct formatter<std::optional<T>> : fmt::formatter<T> {
namespace FdbApiTester {
struct KeyValue {
std::string key;
std::string value;
};
std::string lowerCase(const std::string& str);
fdb::ByteString lowerCase(fdb::BytesRef str);
class Random {
public:
@ -64,7 +66,7 @@ public:
int randomInt(int min, int max);
std::string randomStringLowerCase(int minLength, int maxLength);
fdb::ByteString randomStringLowerCase(int minLength, int maxLength);
bool randomBool(double trueRatio);
@ -110,6 +112,14 @@ static inline double microsecToSec(TimeDuration timeUs) {
return timeUs / 1000000.0;
}
std::optional<fdb::Value> copyValueRef(fdb::future_var::ValueRef::Type value);
using KeyValueArray = std::pair<std::vector<fdb::KeyValue>, bool>;
KeyValueArray copyKeyValueArray(fdb::future_var::KeyValueRefArray::Type array);
using KeyRangeArray = std::vector<fdb::KeyRange>;
KeyRangeArray copyKeyRangeArray(fdb::future_var::KeyRangeRefArray::Type array);
} // namespace FdbApiTester
#endif

View File

@ -66,7 +66,7 @@ bool WorkloadConfig::getBoolOption(const std::string& name, bool defaultVal) con
if (iter == options.end()) {
return defaultVal;
} else {
std::string val = lowerCase(iter->second);
std::string val(fdb::toCharsRef(lowerCase(fdb::toBytesRef(iter->second))));
if (val == "true") {
return true;
} else if (val == "false") {
@ -111,11 +111,11 @@ void WorkloadBase::execTransaction(std::shared_ptr<ITransactionActor> tx, TTaskF
tasksScheduled++;
manager->txExecutor->execute(tx, [this, tx, cont, failOnError]() {
numTxCompleted++;
fdb_error_t err = tx->getErrorCode();
if (tx->getErrorCode() == error_code_success) {
fdb::Error err = tx->getError();
if (err.code() == error_code_success) {
cont();
} else {
std::string msg = fmt::format("Transaction failed with error: {} ({})", err, fdb_get_error(err));
std::string msg = fmt::format("Transaction failed with error: {} ({})", err.code(), err.what());
if (failOnError) {
error(msg);
failed = true;

View File

@ -25,7 +25,7 @@
#include "TesterTestSpec.h"
#include "TesterUtil.h"
#include "flow/SimpleOpt.h"
#include "bindings/c/foundationdb/fdb_c.h"
#include "test/fdb_api.hpp"
#include <memory>
#include <stdexcept>
@ -270,27 +270,26 @@ bool parseArgs(TesterOptions& options, int argc, char** argv) {
return true;
}
void fdb_check(fdb_error_t e) {
void fdb_check(fdb::Error e) {
if (e) {
fmt::print(stderr, "Unexpected FDB error: {}({})\n", e, fdb_get_error(e));
fmt::print(stderr, "Unexpected FDB error: {}({})\n", e.code(), e.what());
std::abort();
}
}
void applyNetworkOptions(TesterOptions& options) {
if (!options.tmpDir.empty()) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_TMP_DIR, options.tmpDir));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_TMP_DIR, options.tmpDir);
}
if (!options.externalClientLibrary.empty()) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT));
fdb_check(
FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY, options.externalClientLibrary));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT);
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY,
options.externalClientLibrary);
} else if (!options.externalClientDir.empty()) {
if (options.disableLocalClient) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT);
}
fdb_check(
FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_DIRECTORY, options.externalClientDir));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_DIRECTORY, options.externalClientDir);
} else {
if (options.disableLocalClient) {
throw TesterError("Invalid options: Cannot disable local client if no external library is provided");
@ -298,39 +297,38 @@ void applyNetworkOptions(TesterOptions& options) {
}
if (options.testSpec.multiThreaded) {
fdb_check(
FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_THREADS_PER_VERSION, options.numFdbThreads));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_THREADS_PER_VERSION, options.numFdbThreads);
}
if (options.testSpec.fdbCallbacksOnExternalThreads) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CALLBACKS_ON_EXTERNAL_THREADS));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_CALLBACKS_ON_EXTERNAL_THREADS);
}
if (options.testSpec.buggify) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_BUGGIFY_ENABLE));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_BUGGIFY_ENABLE);
}
if (options.trace) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_ENABLE, options.traceDir));
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_FORMAT, options.traceFormat));
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_LOG_GROUP, options.logGroup));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_ENABLE, options.traceDir);
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_FORMAT, options.traceFormat);
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_LOG_GROUP, options.logGroup);
}
for (auto knob : options.knobs) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_KNOB,
fmt::format("{}={}", knob.first.c_str(), knob.second.c_str())));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_KNOB,
fmt::format("{}={}", knob.first.c_str(), knob.second.c_str()));
}
if (!options.tlsCertFile.empty()) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_CERT_PATH, options.tlsCertFile));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_CERT_PATH, options.tlsCertFile);
}
if (!options.tlsKeyFile.empty()) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_KEY_PATH, options.tlsKeyFile));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_KEY_PATH, options.tlsKeyFile);
}
if (!options.tlsCaFile.empty()) {
fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_CA_PATH, options.tlsCaFile));
fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_CA_PATH, options.tlsCaFile);
}
}
@ -400,17 +398,17 @@ int main(int argc, char** argv) {
}
randomizeOptions(options);
fdb_check(fdb_select_api_version(options.apiVersion));
fdb::selectApiVersion(options.apiVersion);
applyNetworkOptions(options);
fdb_check(fdb_setup_network());
fdb::network::setup();
std::thread network_thread{ &fdb_run_network };
std::thread network_thread{ &fdb::network::run };
if (!runWorkloads(options)) {
retCode = 1;
}
fdb_check(fdb_stop_network());
fdb_check(fdb::network::stop());
network_thread.join();
} catch (const std::runtime_error& err) {
fmt::print(stderr, "ERROR: {}\n", err.what());

View File

@ -29,9 +29,11 @@
#include <cassert>
#include <cstdint>
#include <memory>
#include <optional>
#include <stdexcept>
#include <string>
#include <string_view>
#include <vector>
#include <fmt/format.h>
// introduce the option enums
@ -47,9 +49,20 @@ namespace native {
using ByteString = std::basic_string<uint8_t>;
using BytesRef = std::basic_string_view<uint8_t>;
using CharsRef = std::string_view;
using Key = ByteString;
using KeyRef = BytesRef;
using Value = ByteString;
using ValueRef = BytesRef;
struct KeyValue {
Key key;
Value value;
};
struct KeyRange {
Key beginKey;
Key endKey;
};
inline uint8_t const* toBytePtr(char const* ptr) noexcept {
return reinterpret_cast<uint8_t const*>(ptr);
}
@ -96,6 +109,8 @@ public:
bool retryable() const noexcept { return native::fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err) != 0; }
static Error success() { return Error(); }
private:
CodeType err;
};
@ -113,20 +128,24 @@ struct Int64 {
return Error(native::fdb_future_get_int64(f, &out));
}
};
struct Key {
using Type = std::pair<uint8_t const*, int>;
struct KeyRef {
using Type = fdb::KeyRef;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_key, out_key_length] = out;
return Error(native::fdb_future_get_key(f, &out_key, &out_key_length));
uint8_t const* out_key = nullptr;
int out_key_length = 0;
auto err = Error(native::fdb_future_get_key(f, &out_key, &out_key_length));
out = fdb::KeyRef(out_key, out_key_length);
return Error(err);
}
};
struct Value {
using Type = std::tuple<bool, uint8_t const*, int>;
struct ValueRef {
using Type = std::optional<fdb::ValueRef>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_present, out_value, out_value_length] = out;
auto out_present_native = native::fdb_bool_t{};
auto err = native::fdb_future_get_value(f, &out_present_native, &out_value, &out_value_length);
out_present = (out_present_native != 0);
auto out_present = native::fdb_bool_t{};
uint8_t const* out_value = nullptr;
int out_value_length = 0;
auto err = native::fdb_future_get_value(f, &out_present, &out_value, &out_value_length);
out = out_present != 0 ? std::make_optional(fdb::ValueRef(out_value, out_value_length)) : std::nullopt;
return Error(err);
}
};
@ -137,16 +156,35 @@ struct StringArray {
return Error(native::fdb_future_get_string_array(f, &out_strings, &out_count));
}
};
struct KeyValueArray {
using Type = std::tuple<native::FDBKeyValue const*, int, bool>;
struct KeyValueRef : native::FDBKeyValue {
fdb::KeyRef key() const noexcept { return fdb::KeyRef(native::FDBKeyValue::key, key_length); }
fdb::ValueRef value() const noexcept { return fdb::ValueRef(native::FDBKeyValue::value, value_length); }
};
struct KeyValueRefArray {
using Type = std::tuple<KeyValueRef const*, int, bool>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_kv, out_count, out_more] = out;
auto out_more_native = native::fdb_bool_t{};
auto err = native::fdb_future_get_keyvalue_array(f, &out_kv, &out_count, &out_more_native);
auto err = native::fdb_future_get_keyvalue_array(
f, reinterpret_cast<const native::FDBKeyValue**>(&out_kv), &out_count, &out_more_native);
out_more = (out_more_native != 0);
return Error(err);
}
};
struct KeyRangeRef : native::FDBKeyRange {
fdb::KeyRef beginKey() const noexcept { return fdb::KeyRef(native::FDBKeyRange::begin_key, begin_key_length); }
fdb::KeyRef endKey() const noexcept { return fdb::KeyRef(native::FDBKeyRange::end_key, end_key_length); }
};
struct KeyRangeRefArray {
using Type = std::tuple<KeyRangeRef const*, int>;
static Error extract(native::FDBFuture* f, Type& out) noexcept {
auto& [out_ranges, out_count] = out;
auto err = native::fdb_future_get_keyrange_array(
f, reinterpret_cast<const native::FDBKeyRange**>(&out_ranges), &out_count);
return Error(err);
}
};
} // namespace future_var
[[noreturn]] inline void throwError(std::string_view preamble, Error err) {
@ -175,11 +213,19 @@ inline Error setOptionNothrow(FDBNetworkOption option, BytesRef str) noexcept {
return Error(native::fdb_network_set_option(option, str.data(), intSize(str)));
}
inline Error setOptionNothrow(FDBNetworkOption option, CharsRef str) noexcept {
return setOptionNothrow(option, toBytesRef(str));
}
inline Error setOptionNothrow(FDBNetworkOption option, int64_t value) noexcept {
return Error(native::fdb_network_set_option(
option, reinterpret_cast<const uint8_t*>(&value), static_cast<int>(sizeof(value))));
}
inline Error setOptionNothrow(FDBNetworkOption option) noexcept {
return setOptionNothrow(option, "");
}
inline void setOption(FDBNetworkOption option, BytesRef str) {
if (auto err = setOptionNothrow(option, str)) {
throwError(fmt::format("ERROR: fdb_network_set_option({}): ",
@ -188,6 +234,10 @@ inline void setOption(FDBNetworkOption option, BytesRef str) {
}
}
inline void setOption(FDBNetworkOption option, CharsRef str) {
setOption(option, toBytesRef(str));
}
inline void setOption(FDBNetworkOption option, int64_t value) {
if (auto err = setOptionNothrow(option, value)) {
throwError(fmt::format("ERROR: fdb_network_set_option({}, {}): ",
@ -197,6 +247,10 @@ inline void setOption(FDBNetworkOption option, int64_t value) {
}
}
inline void setOption(FDBNetworkOption option) {
setOption(option, "");
}
inline Error setupNothrow() noexcept {
return Error(native::fdb_setup_network());
}
@ -229,18 +283,19 @@ class Result {
}
public:
using KeyValueArray = future_var::KeyValueArray::Type;
using KeyValueRefArray = future_var::KeyValueRefArray::Type;
Error getKeyValueArrayNothrow(KeyValueArray& out) const noexcept {
Error getKeyValueArrayNothrow(KeyValueRefArray& out) const noexcept {
auto out_more_native = native::fdb_bool_t{};
auto& [out_kv, out_count, out_more] = out;
auto err_raw = native::fdb_result_get_keyvalue_array(r.get(), &out_kv, &out_count, &out_more_native);
auto err_raw = native::fdb_result_get_keyvalue_array(
r.get(), reinterpret_cast<const native::FDBKeyValue**>(&out_kv), &out_count, &out_more_native);
out_more = out_more_native != 0;
return Error(err_raw);
}
KeyValueArray getKeyValueArray() const {
auto ret = KeyValueArray{};
KeyValueRefArray getKeyValueArray() const {
auto ret = KeyValueRefArray{};
if (auto err = getKeyValueArrayNothrow(ret))
throwError("ERROR: result_get_keyvalue_array(): ", err);
return ret;
@ -250,6 +305,7 @@ public:
class Future {
protected:
friend class Transaction;
friend std::hash<Future>;
std::shared_ptr<native::FDBFuture> f;
Future(native::FDBFuture* future) {
@ -257,6 +313,8 @@ protected:
f = std::shared_ptr<native::FDBFuture>(future, &native::fdb_future_destroy);
}
native::FDBFuture* nativeHandle() const noexcept { return f.get(); }
// wrap any capturing lambda as callback passable to fdb_future_set_callback().
// destroy after invocation.
template <class Fn>
@ -330,6 +388,9 @@ public:
void then(UserFunc&& fn) {
then<Future>(std::forward<UserFunc>(fn));
}
bool operator==(const Future& other) const { return nativeHandle() == other.nativeHandle(); }
bool operator!=(const Future& other) const { return !(*this == other); }
};
template <typename VarTraits>
@ -388,6 +449,7 @@ inline KeySelector lastLessOrEqual(KeyRef key, int offset = 0) {
class Transaction {
friend class Database;
friend class Tenant;
std::shared_ptr<native::FDBTransaction> tr;
explicit Transaction(native::FDBTransaction* tr_raw) {
@ -413,6 +475,12 @@ public:
return Error(native::fdb_transaction_set_option(tr.get(), option, str.data(), intSize(str)));
}
Error setOptionNothrow(FDBTransactionOption option, CharsRef str) noexcept {
return setOptionNothrow(option, toBytesRef(str));
}
Error setOptionNothrow(FDBTransactionOption option) noexcept { return setOptionNothrow(option, ""); }
void setOption(FDBTransactionOption option, int64_t value) {
if (auto err = setOptionNothrow(option, value)) {
throwError(fmt::format("transaction_set_option({}, {}) returned error: ",
@ -430,6 +498,10 @@ public:
}
}
void setOption(FDBTransactionOption option, CharsRef str) { setOption(option, toBytesRef(str)); }
void setOption(FDBTransactionOption option) { setOption(option, ""); }
TypedFuture<future_var::Int64> getReadVersion() { return native::fdb_transaction_get_read_version(tr.get()); }
Error getCommittedVersionNothrow(int64_t& out) {
@ -444,24 +516,24 @@ public:
return out;
}
TypedFuture<future_var::Key> getKey(KeySelector sel, bool snapshot) {
TypedFuture<future_var::KeyRef> getKey(KeySelector sel, bool snapshot) {
return native::fdb_transaction_get_key(tr.get(), sel.key, sel.keyLength, sel.orEqual, sel.offset, snapshot);
}
TypedFuture<future_var::Value> get(KeyRef key, bool snapshot) {
TypedFuture<future_var::ValueRef> get(KeyRef key, bool snapshot) {
return native::fdb_transaction_get(tr.get(), key.data(), intSize(key), snapshot);
}
// Usage: tx.getRange(key_select::firstGreaterOrEqual(firstKey), key_select::lastLessThan(lastKey), ...)
// gets key-value pairs in key range [begin, end)
TypedFuture<future_var::KeyValueArray> getRange(KeySelector first,
KeySelector last,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
bool snapshot,
bool reverse) {
TypedFuture<future_var::KeyValueRefArray> getRange(KeySelector first,
KeySelector last,
int limit,
int target_bytes,
FDBStreamingMode mode,
int iteration,
bool snapshot,
bool reverse) {
return native::fdb_transaction_get_range(tr.get(),
first.key,
first.keyLength,
@ -479,6 +551,11 @@ public:
reverse);
}
TypedFuture<future_var::KeyRangeRefArray> getBlobGranuleRanges(KeyRef begin, KeyRef end) {
return native::fdb_transaction_get_blob_granule_ranges(
tr.get(), begin.data(), intSize(begin), end.data(), intSize(end));
}
Result readBlobGranules(KeyRef begin,
KeyRef end,
int64_t begin_version,
@ -494,6 +571,8 @@ public:
void reset() { return native::fdb_transaction_reset(tr.get()); }
void cancel() { return native::fdb_transaction_cancel(tr.get()); }
void set(KeyRef key, ValueRef value) {
native::fdb_transaction_set(tr.get(), key.data(), intSize(key), value.data(), intSize(value));
}
@ -505,7 +584,46 @@ public:
}
};
class Tenant final {
friend class Database;
std::shared_ptr<native::FDBTenant> tenant;
static constexpr CharsRef tenantManagementMapPrefix = "\xff\xff/management/tenant_map/";
explicit Tenant(native::FDBTenant* tenant_raw) {
if (tenant_raw)
tenant = std::shared_ptr<native::FDBTenant>(tenant_raw, &native::fdb_tenant_destroy);
}
public:
Tenant(const Tenant&) noexcept = default;
Tenant& operator=(const Tenant&) noexcept = default;
Tenant() noexcept : tenant(nullptr) {}
static void createTenant(Transaction tr, BytesRef name) {
tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef());
tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef());
tr.set(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name))), BytesRef());
}
static void deleteTenant(Transaction tr, BytesRef name) {
tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef());
tr.setOption(FDBTransactionOption::FDB_TR_OPTION_RAW_ACCESS, BytesRef());
tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef());
tr.clear(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name))));
}
Transaction createTransaction() {
auto tx_native = static_cast<native::FDBTransaction*>(nullptr);
auto err = Error(native::fdb_tenant_create_transaction(tenant.get(), &tx_native));
if (err)
throwError("Failed to create transaction: ", err);
return Transaction(tx_native);
}
};
class Database {
friend class Tenant;
std::shared_ptr<native::FDBDatabase> db;
public:
@ -545,6 +663,16 @@ public:
}
}
Tenant openTenant(BytesRef name) {
if (!db)
throw std::runtime_error("openTenant from null database");
auto tenant_native = static_cast<native::FDBTenant*>(nullptr);
if (auto err = Error(native::fdb_database_open_tenant(db.get(), name.data(), name.size(), &tenant_native))) {
throwError(fmt::format("Failed to open tenant with name '{}': ", toCharsRef(name)), err);
}
return Tenant(tenant_native);
}
Transaction createTransaction() {
if (!db)
throw std::runtime_error("create_transaction from null database");
@ -558,4 +686,9 @@ public:
} // namespace fdb
template <>
struct std::hash<fdb::Future> {
size_t operator()(const fdb::Future& f) const { return std::hash<fdb::native::FDBFuture*>{}(f.nativeHandle()); }
};
#endif /*FDB_API_HPP*/

View File

@ -70,8 +70,6 @@ void ResumableStateForPopulate::runOneTick() {
const auto tx_duration = watch_tx.diff();
stats.addLatency(OP_COMMIT, commit_latency);
stats.addLatency(OP_TRANSACTION, tx_duration);
sample_bins[OP_COMMIT].put(commit_latency);
sample_bins[OP_TRANSACTION].put(tx_duration);
}
stats.incrOpCount(OP_COMMIT);
stats.incrOpCount(OP_TRANSACTION);
@ -190,7 +188,6 @@ void ResumableStateForRunWorkload::updateStepStats() {
const auto step_latency = watch_step.diff();
if (do_sample) {
stats.addLatency(OP_COMMIT, step_latency);
sample_bins[OP_COMMIT].put(step_latency);
}
tx.reset();
stats.incrOpCount(OP_COMMIT);
@ -204,7 +201,6 @@ void ResumableStateForRunWorkload::updateStepStats() {
if (do_sample) {
const auto op_latency = watch_op.diff();
stats.addLatency(iter.op, op_latency);
sample_bins[iter.op].put(op_latency);
}
stats.incrOpCount(iter.op);
}
@ -248,8 +244,6 @@ void ResumableStateForRunWorkload::onTransactionSuccess() {
const auto tx_duration = watch_tx.diff();
stats.addLatency(OP_COMMIT, commit_latency);
stats.addLatency(OP_TRANSACTION, tx_duration);
sample_bins[OP_COMMIT].put(commit_latency);
sample_bins[OP_TRANSACTION].put(tx_duration);
}
stats.incrOpCount(OP_COMMIT);
stats.incrOpCount(OP_TRANSACTION);
@ -270,7 +264,6 @@ void ResumableStateForRunWorkload::onTransactionSuccess() {
if (stats.getOpCount(OP_TRANSACTION) % args.sampling == 0) {
const auto tx_duration = watch_tx.diff();
stats.addLatency(OP_TRANSACTION, tx_duration);
sample_bins[OP_TRANSACTION].put(tx_duration);
}
stats.incrOpCount(OP_TRANSACTION);
watch_tx.startFromStop();

View File

@ -42,7 +42,6 @@ struct ResumableStateForPopulate : std::enable_shared_from_this<ResumableStateFo
Arguments const& args;
ThreadStatistics& stats;
std::atomic<int>& stopcount;
LatencySampleBinArray sample_bins;
int key_begin;
int key_end;
int key_checkpoint;
@ -84,7 +83,6 @@ struct ResumableStateForRunWorkload : std::enable_shared_from_this<ResumableStat
std::atomic<int> const& signal;
int max_iters;
OpIterator iter;
LatencySampleBinArray sample_bins;
fdb::ByteString key1;
fdb::ByteString key2;
fdb::ByteString val;

View File

@ -0,0 +1,275 @@
/*
* DDSketch.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DDSKETCH_H
#define DDSKETCH_H
#include <iterator>
#include <limits>
#include <type_traits>
#pragma once
#include <algorithm>
#include <cassert>
#include <cmath>
#include <vector>
// A namespace for fast log() computation.
namespace fastLogger {
// Basically, the goal is to compute log(x)/log(r).
// For double, it is represented as 2^e*(1+s) (0<=s<1), so our goal becomes
// e*log(2)/log(r)*log(1+s), and we approximate log(1+s) with a cubic function.
// See more details on Datadog's paper, or CubicallyInterpolatedMapping.java in
// https://github.com/DataDog/sketches-java/
inline const double correctingFactor = 1.00988652862227438516; // = 7 / (10 * log(2));
constexpr inline const double A = 6.0 / 35.0, B = -3.0 / 5.0, C = 10.0 / 7.0;
inline double fastlog(double value) {
int e;
double s = frexp(value, &e);
s = s * 2 - 1;
return ((A * s + B) * s + C) * s + e - 1;
}
inline double reverseLog(double index) {
long exponent = floor(index);
// Derived from Cardano's formula
double d0 = B * B - 3 * A * C;
double d1 = 2 * B * B * B - 9 * A * B * C - 27 * A * A * (index - exponent);
double p = cbrt((d1 - sqrt(d1 * d1 - 4 * d0 * d0 * d0)) / 2);
double significandPlusOne = -(B + p + d0 / p) / (3 * A) + 1;
return ldexp(significandPlusOne / 2, exponent + 1);
}
} // namespace fastLogger
// DDSketch for non-negative numbers (those < EPS = 10^-18 are
// treated as 0, and huge numbers (>1/EPS) fail ASSERT). This is the base
// class without a concrete log() implementation.
template <class Impl, class T>
class DDSketchBase {
static constexpr T defaultMin() { return std::numeric_limits<T>::max(); }
static constexpr T defaultMax() {
if constexpr (std::is_floating_point_v<T>) {
return -std::numeric_limits<T>::max();
} else {
return std::numeric_limits<T>::min();
}
}
public:
explicit DDSketchBase(double errorGuarantee)
: errorGuarantee(errorGuarantee), populationSize(0), zeroPopulationSize(0), minValue(defaultMin()),
maxValue(defaultMax()), sum(T()) {}
DDSketchBase<Impl, T>& addSample(T sample) {
// Call it addSample for now, while it is not a sample anymore
if (!populationSize)
minValue = maxValue = sample;
if (sample <= EPS) {
zeroPopulationSize++;
} else {
int index = static_cast<Impl*>(this)->getIndex(sample);
assert(index >= 0 && index < int(buckets.size()));
buckets[index]++;
}
populationSize++;
sum += sample;
maxValue = std::max(maxValue, sample);
minValue = std::min(minValue, sample);
return *this;
}
double mean() const {
if (populationSize == 0)
return 0;
return (double)sum / populationSize;
}
T median() { return percentile(0.5); }
T percentile(double percentile) {
assert(percentile >= 0 && percentile <= 1);
if (populationSize == 0)
return T();
uint64_t targetPercentilePopulation = percentile * (populationSize - 1);
// Now find the tPP-th (0-indexed) element
if (targetPercentilePopulation < zeroPopulationSize)
return T(0);
int index = -1;
bool found = false;
if (percentile <= 0.5) { // count up
uint64_t count = zeroPopulationSize;
for (size_t i = 0; i < buckets.size(); i++) {
if (targetPercentilePopulation < count + buckets[i]) {
// count + buckets[i] = # of numbers so far (from the rightmost to
// this bucket, inclusive), so if target is in this bucket, it should
// means tPP < cnt + bck[i]
found = true;
index = i;
break;
}
count += buckets[i];
}
} else { // and count down
uint64_t count = 0;
for (auto rit = buckets.rbegin(); rit != buckets.rend(); rit++) {
if (targetPercentilePopulation + count + *rit >= populationSize) {
// cnt + bkt[i] is # of numbers to the right of this bucket (incl.),
// so if target is not in this bucket (i.e., to the left of this
// bucket), it would be as right as the left bucket's rightmost
// number, so we would have tPP + cnt + bkt[i] < total population (tPP
// is 0-indexed), that means target is in this bucket if this
// condition is not satisfied.
found = true;
index = std::distance(rit, buckets.rend()) - 1;
break;
}
count += *rit;
}
}
assert(found);
return static_cast<Impl*>(this)->getValue(index);
}
T min() const { return minValue; }
T max() const { return maxValue; }
void clear() {
std::fill(buckets.begin(), buckets.end(), 0);
populationSize = zeroPopulationSize = 0;
sum = 0;
minValue = defaultMin();
maxValue = defaultMax();
}
uint64_t getPopulationSize() const { return populationSize; }
double getErrorGurantee() const { return errorGuarantee; }
DDSketchBase<Impl, T>& mergeWith(const DDSketchBase<Impl, T>& anotherSketch) {
// Must have the same guarantee
assert(fabs(errorGuarantee - anotherSketch.errorGuarantee) < EPS &&
anotherSketch.buckets.size() == buckets.size());
for (size_t i = 0; i < anotherSketch.buckets.size(); i++) {
buckets[i] += anotherSketch.buckets[i];
}
populationSize += anotherSketch.populationSize;
zeroPopulationSize += anotherSketch.zeroPopulationSize;
minValue = std::min(minValue, anotherSketch.minValue);
maxValue = std::max(maxValue, anotherSketch.maxValue);
sum += anotherSketch.sum;
return *this;
}
constexpr static double EPS = 1e-18; // smaller numbers are considered as 0
protected:
double errorGuarantee; // As defined in the paper
uint64_t populationSize, zeroPopulationSize; // we need to separately count 0s
std::vector<uint64_t> buckets;
T minValue, maxValue, sum;
void setBucketSize(int capacity) { buckets.resize(capacity, 0); }
};
// DDSketch with fast log implementation for float numbers
template <class T>
class DDSketch : public DDSketchBase<DDSketch<T>, T> {
public:
explicit DDSketch(double errorGuarantee = 0.005)
: DDSketchBase<DDSketch<T>, T>(errorGuarantee), gamma((1.0 + errorGuarantee) / (1.0 - errorGuarantee)),
multiplier(fastLogger::correctingFactor * log(2) / log(gamma)) {
offset = getIndex(1.0 / DDSketchBase<DDSketch<T>, T>::EPS);
this->setBucketSize(2 * offset);
}
int getIndex(T sample) {
static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems");
return ceil(fastLogger::fastlog(sample) * multiplier) + offset;
}
T getValue(int index) { return fastLogger::reverseLog((index - offset) / multiplier) * 2.0 / (1 + gamma); }
private:
double gamma, multiplier;
int offset = 0;
};
// DDSketch with <cmath> log. Slow and only use this when others doesn't work.
template <class T>
class DDSketchSlow : public DDSketchBase<DDSketchSlow<T>, T> {
public:
DDSketchSlow(double errorGuarantee = 0.1)
: DDSketchBase<DDSketchSlow<T>, T>(errorGuarantee), gamma((1.0 + errorGuarantee) / (1.0 - errorGuarantee)),
logGamma(log(gamma)) {
offset = getIndex(1.0 / DDSketchBase<DDSketch<T>, T>::EPS) + 5;
this->setBucketSize(2 * offset);
}
int getIndex(T sample) { return ceil(log(sample) / logGamma) + offset; }
T getValue(int index) { return (T)(2.0 * pow(gamma, (index - offset)) / (1 + gamma)); }
private:
double gamma, logGamma;
int offset = 0;
};
// DDSketch for unsigned int. Faster than the float version. Fixed accuracy.
class DDSketchFastUnsigned : public DDSketchBase<DDSketchFastUnsigned, unsigned> {
public:
DDSketchFastUnsigned() : DDSketchBase<DDSketchFastUnsigned, unsigned>(errorGuarantee) { this->setBucketSize(129); }
int getIndex(unsigned sample) {
__uint128_t v = sample;
v *= v;
v *= v; // sample^4
uint64_t low = (uint64_t)v, high = (uint64_t)(v >> 64);
return 128 - (high == 0 ? ((low == 0 ? 64 : __builtin_clzll(low)) + 64) : __builtin_clzll(high));
}
unsigned getValue(int index) {
double r = 1, g = gamma;
while (index) { // quick power method for power(gamma, index)
if (index & 1)
r *= g;
g *= g;
index >>= 1;
}
// 2.0 * pow(gamma, index) / (1 + gamma) is what we need
return (unsigned)(2.0 * r / (1 + gamma) + 0.5); // round to nearest int
}
private:
constexpr static double errorGuarantee = 0.08642723372;
// getIndex basically calc floor(log_2(x^4)) + 1,
// which is almost ceil(log_2(x^4)) as it only matters when x is a power of 2,
// and it does not change the error bound. Original sketch asks for
// ceil(log_r(x)), so we know r = pow(2, 1/4) = 1.189207115. And r = (1 + eG)
// / (1 - eG) so eG = 0.08642723372.
constexpr static double gamma = 1.189207115;
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -44,11 +44,13 @@ constexpr const int MODE_INVALID = -1;
constexpr const int MODE_CLEAN = 0;
constexpr const int MODE_BUILD = 1;
constexpr const int MODE_RUN = 2;
constexpr const int MODE_REPORT = 3;
/* for long arguments */
enum ArgKind {
ARG_KEYLEN,
ARG_VALLEN,
ARG_TENANTS,
ARG_TPS,
ARG_ASYNC,
ARG_COMMITGET,
@ -73,6 +75,7 @@ enum ArgKind {
ARG_CLIENT_THREADS_PER_VERSION,
ARG_JSON_REPORT,
ARG_BG_FILE_PATH, // if blob granule files are stored locally, mako will read and materialize them if this is set
ARG_EXPORT_PATH,
ARG_DISTRIBUTED_TRACER_CLIENT
};
@ -103,6 +106,8 @@ enum OpKind {
enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE };
enum DistributedTracerClient { DISABLED, NETWORK_LOSSY, LOG_FILE };
/* we set WorkloadSpec and Arguments only once in the master process,
* and won't be touched by child processes.
*/
@ -119,6 +124,7 @@ constexpr const int NUM_CLUSTERS_MAX = 3;
constexpr const int NUM_DATABASES_MAX = 10;
constexpr const std::string_view KEY_PREFIX{ "mako" };
constexpr const std::string_view TEMP_DATA_STORE{ "/tmp/makoTemp" };
constexpr const int MAX_REPORT_FILES = 200;
/* benchmark parameters */
struct Arguments {
@ -139,6 +145,7 @@ struct Arguments {
int sampling;
int key_length;
int value_length;
int tenants;
int zipf;
int commit_get;
int verbose;
@ -162,6 +169,9 @@ struct Arguments {
char json_output_path[PATH_MAX];
bool bg_materialize_files;
char bg_file_path[PATH_MAX];
char stats_export_path[PATH_MAX];
char report_files[MAX_REPORT_FILES][PATH_MAX];
int num_report_files;
int distributed_tracer_client;
};

View File

@ -51,7 +51,7 @@ const std::array<Operation, MAX_OP> opTable{
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::Value>();
f.get<future_var::ValueRef>();
}
} } },
1,
@ -72,7 +72,7 @@ const std::array<Operation, MAX_OP> opTable{
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::KeyValueArray>();
f.get<future_var::KeyValueRefArray>();
}
} } },
1,
@ -84,7 +84,7 @@ const std::array<Operation, MAX_OP> opTable{
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::Value>();
f.get<future_var::ValueRef>();
}
} } },
1,
@ -107,7 +107,7 @@ const std::array<Operation, MAX_OP> opTable{
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::KeyValueArray>();
f.get<future_var::KeyValueRefArray>();
}
} } },
1,
@ -119,7 +119,7 @@ const std::array<Operation, MAX_OP> opTable{
},
[](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) {
if (f && !f.error()) {
f.get<future_var::Value>();
f.get<future_var::ValueRef>();
}
} },
{ StepKind::IMM,
@ -257,7 +257,7 @@ const std::array<Operation, MAX_OP> opTable{
user_context.clear();
auto out = Result::KeyValueArray{};
auto out = Result::KeyValueRefArray{};
err = r.getKeyValueArrayNothrow(out);
if (!err || err.is(2037 /*blob_granule_not_materialized*/))
return Future();

View File

@ -24,76 +24,66 @@
#include <array>
#include <cstdint>
#include <cstring>
#include <fstream>
#include <istream>
#include <limits>
#include <list>
#include <new>
#include <ostream>
#include <utility>
#include "mako/mako.hpp"
#include "operations.hpp"
#include "time.hpp"
#include "ddsketch.hpp"
#include "contrib/rapidjson/rapidjson/document.h"
#include "contrib/rapidjson/rapidjson/rapidjson.h"
#include "contrib/rapidjson/rapidjson/stringbuffer.h"
#include "contrib/rapidjson/rapidjson/writer.h"
#include <iostream>
#include <sstream>
#include <vector>
namespace mako {
/* rough cap on the number of samples to avoid OOM hindering benchmark */
constexpr const size_t SAMPLE_CAP = 2000000;
/* size of each block to get detailed latency for each operation */
constexpr const size_t LAT_BLOCK_SIZE = 4093;
/* hard cap on the number of sample blocks = 488 */
constexpr const size_t MAX_LAT_BLOCKS = SAMPLE_CAP / LAT_BLOCK_SIZE;
/* memory block allocated to each operation when collecting detailed latency */
class LatencySampleBlock {
uint64_t samples[LAT_BLOCK_SIZE]{
0,
};
uint64_t index{ 0 };
class DDSketchMako : public DDSketch<uint64_t> {
public:
LatencySampleBlock() noexcept = default;
bool full() const noexcept { return index >= LAT_BLOCK_SIZE; }
void put(timediff_t td) {
assert(!full());
samples[index++] = toIntegerMicroseconds(td);
}
// return {data block, number of samples}
std::pair<uint64_t const*, size_t> data() const noexcept { return { samples, index }; }
};
void serialize(rapidjson::Writer<rapidjson::StringBuffer>& writer) const {
writer.StartObject();
writer.String("errorGuarantee");
writer.Double(errorGuarantee);
writer.String("minValue");
writer.Uint64(minValue);
writer.String("maxValue");
writer.Uint64(maxValue);
writer.String("populationSize");
writer.Uint64(populationSize);
writer.String("zeroPopulationSize");
writer.Uint64(zeroPopulationSize);
writer.String("sum");
writer.Uint64(sum);
/* collect sampled latencies until OOM is hit */
class LatencySampleBin {
std::list<LatencySampleBlock> blocks;
bool noMoreAlloc{ false };
bool tryAlloc() {
try {
blocks.emplace_back();
} catch (const std::bad_alloc&) {
noMoreAlloc = true;
return false;
writer.String("buckets");
writer.StartArray();
for (auto b : buckets) {
writer.Uint64(b);
}
return true;
}
writer.EndArray();
public:
void reserveOneBlock() {
if (blocks.empty())
tryAlloc();
writer.EndObject();
}
void deserialize(const rapidjson::Value& obj) {
errorGuarantee = obj["errorGuarantee"].GetDouble();
minValue = obj["minValue"].GetUint64();
maxValue = obj["maxValue"].GetUint64();
populationSize = obj["populationSize"].GetUint64();
zeroPopulationSize = obj["zeroPopulationSize"].GetUint64();
sum = obj["sum"].GetUint64();
void put(timediff_t td) {
if (blocks.empty() || blocks.back().full()) {
if (blocks.size() >= MAX_LAT_BLOCKS || noMoreAlloc || !tryAlloc())
return;
}
blocks.back().put(td);
}
// iterate & apply for each block user function void(uint64_t const*, size_t)
template <typename Func>
void forEachBlock(Func&& fn) const {
for (const auto& block : blocks) {
auto [ptr, cnt] = block.data();
fn(ptr, cnt);
auto jsonBuckets = obj["buckets"].GetArray();
uint64_t idx = 0;
for (auto it = jsonBuckets.Begin(); it != jsonBuckets.End(); it++) {
buckets[idx] = it->GetUint64();
idx++;
}
}
};
@ -101,21 +91,20 @@ public:
class alignas(64) ThreadStatistics {
uint64_t conflicts;
uint64_t total_errors;
uint64_t ops[MAX_OP];
uint64_t errors[MAX_OP];
uint64_t latency_samples[MAX_OP];
uint64_t latency_us_total[MAX_OP];
uint64_t latency_us_min[MAX_OP];
uint64_t latency_us_max[MAX_OP];
std::array<uint64_t, MAX_OP> ops;
std::array<uint64_t, MAX_OP> errors;
std::array<uint64_t, MAX_OP> latency_samples;
std::array<uint64_t, MAX_OP> latency_us_total;
std::vector<DDSketchMako> sketches;
public:
ThreadStatistics() noexcept {
memset(this, 0, sizeof(ThreadStatistics));
memset(latency_us_min, 0xff, sizeof(latency_us_min));
sketches.resize(MAX_OP);
}
ThreadStatistics(const ThreadStatistics& other) noexcept = default;
ThreadStatistics& operator=(const ThreadStatistics& other) noexcept = default;
ThreadStatistics(const ThreadStatistics& other) = default;
ThreadStatistics& operator=(const ThreadStatistics& other) = default;
uint64_t getConflictCount() const noexcept { return conflicts; }
@ -129,23 +118,24 @@ public:
uint64_t getLatencyUsTotal(int op) const noexcept { return latency_us_total[op]; }
uint64_t getLatencyUsMin(int op) const noexcept { return latency_us_min[op]; }
uint64_t getLatencyUsMin(int op) const noexcept { return sketches[op].min(); }
uint64_t getLatencyUsMax(int op) const noexcept { return latency_us_max[op]; }
uint64_t getLatencyUsMax(int op) const noexcept { return sketches[op].max(); }
uint64_t percentile(int op, double quantile) { return sketches[op].percentile(quantile); }
uint64_t mean(int op) const noexcept { return sketches[op].mean(); }
// with 'this' as final aggregation, factor in 'other'
void combine(const ThreadStatistics& other) {
conflicts += other.conflicts;
for (auto op = 0; op < MAX_OP; op++) {
sketches[op].mergeWith(other.sketches[op]);
ops[op] += other.ops[op];
errors[op] += other.errors[op];
total_errors += other.errors[op];
latency_samples[op] += other.latency_samples[op];
latency_us_total[op] += other.latency_us_total[op];
if (latency_us_min[op] > other.latency_us_min[op])
latency_us_min[op] = other.latency_us_min[op];
if (latency_us_max[op] < other.latency_us_max[op])
latency_us_max[op] = other.latency_us_max[op];
}
}
@ -162,15 +152,106 @@ public:
void addLatency(int op, timediff_t diff) noexcept {
const auto latency_us = toIntegerMicroseconds(diff);
latency_samples[op]++;
sketches[op].addSample(latency_us);
latency_us_total[op] += latency_us;
if (latency_us_min[op] > latency_us)
latency_us_min[op] = latency_us;
if (latency_us_max[op] < latency_us)
latency_us_max[op] = latency_us;
}
void writeToFile(const std::string& filename, int op) const {
rapidjson::StringBuffer ss;
rapidjson::Writer<rapidjson::StringBuffer> writer(ss);
sketches[op].serialize(writer);
std::ofstream f(filename);
f << ss.GetString();
}
void updateLatencies(const std::vector<DDSketchMako> other_sketches) { sketches = other_sketches; }
friend std::ofstream& operator<<(std::ofstream& os, ThreadStatistics& stats);
friend std::ifstream& operator>>(std::ifstream& is, ThreadStatistics& stats);
};
using LatencySampleBinArray = std::array<LatencySampleBin, MAX_OP>;
inline std::ofstream& operator<<(std::ofstream& os, ThreadStatistics& stats) {
rapidjson::StringBuffer ss;
rapidjson::Writer<rapidjson::StringBuffer> writer(ss);
writer.StartObject();
writer.String("conflicts");
writer.Uint64(stats.conflicts);
writer.String("total_errors");
writer.Uint64(stats.total_errors);
writer.String("ops");
writer.StartArray();
for (auto op = 0; op < MAX_OP; op++) {
writer.Uint64(stats.ops[op]);
}
writer.EndArray();
writer.String("errors");
writer.StartArray();
for (auto op = 0; op < MAX_OP; op++) {
writer.Uint64(stats.errors[op]);
}
writer.EndArray();
writer.String("latency_samples");
writer.StartArray();
for (auto op = 0; op < MAX_OP; op++) {
writer.Uint64(stats.latency_samples[op]);
}
writer.EndArray();
writer.String("latency_us_total");
writer.StartArray();
for (auto op = 0; op < MAX_OP; op++) {
writer.Uint64(stats.latency_us_total[op]);
}
writer.EndArray();
for (auto op = 0; op < MAX_OP; op++) {
if (stats.sketches[op].getPopulationSize() > 0) {
std::string op_name = getOpName(op);
writer.String(op_name.c_str());
stats.sketches[op].serialize(writer);
}
}
writer.EndObject();
os << ss.GetString();
return os;
}
inline void populateArray(std::array<uint64_t, MAX_OP>& arr,
rapidjson::GenericArray<false, rapidjson::GenericValue<rapidjson::UTF8<>>>& json) {
uint64_t idx = 0;
for (auto it = json.Begin(); it != json.End(); it++) {
arr[idx] = it->GetUint64();
idx++;
}
}
inline std::ifstream& operator>>(std::ifstream& is, ThreadStatistics& stats) {
std::stringstream buffer;
buffer << is.rdbuf();
rapidjson::Document doc;
doc.Parse(buffer.str().c_str());
stats.conflicts = doc["conflicts"].GetUint64();
stats.total_errors = doc["total_errors"].GetUint64();
auto jsonOps = doc["ops"].GetArray();
auto jsonErrors = doc["errors"].GetArray();
auto jsonLatencySamples = doc["latency_samples"].GetArray();
auto jsonLatencyUsTotal = doc["latency_us_total"].GetArray();
populateArray(stats.ops, jsonOps);
populateArray(stats.errors, jsonErrors);
populateArray(stats.latency_samples, jsonLatencySamples);
populateArray(stats.latency_us_total, jsonLatencyUsTotal);
for (int op = 0; op < MAX_OP; op++) {
const std::string op_name = getOpName(op);
stats.sketches[op].deserialize(doc[op_name.c_str()]);
}
return is;
}
} // namespace mako

View File

@ -6,7 +6,7 @@ ExternalProject_Add(
doctest
PREFIX ${CMAKE_BINARY_DIR}/doctest
GIT_REPOSITORY https://github.com/onqtam/doctest.git
GIT_TAG 8424be522357e68d8c6178375546bb0cf9d5f6b3 # v2.4.1
GIT_TAG 7b9885133108ae301ddd16e2651320f54cafeba7 # v2.4.8
TIMEOUT 10
CONFIGURE_COMMAND ""
BUILD_COMMAND ""

View File

@ -177,13 +177,24 @@ struct GetRangeResult {
};
struct GetMappedRangeResult {
std::vector<std::tuple<std::string, // key
std::string, // value
std::string, // begin
std::string, // end
std::vector<std::pair<std::string, std::string>>, // range results
fdb_bool_t>>
mkvs;
struct MappedKV {
MappedKV(const std::string& key,
const std::string& value,
const std::string& begin,
const std::string& end,
const std::vector<std::pair<std::string, std::string>>& range_results,
fdb_bool_t boundaryAndExist)
: key(key), value(value), begin(begin), end(end), range_results(range_results),
boundaryAndExist(boundaryAndExist) {}
std::string key;
std::string value;
std::string begin;
std::string end;
std::vector<std::pair<std::string, std::string>> range_results;
fdb_bool_t boundaryAndExist;
};
std::vector<MappedKV> mkvs;
// True if values remain in the key range requested.
bool more;
// Set to a non-zero value if an error occurred during the transaction.
@ -1093,24 +1104,24 @@ TEST_CASE("fdb_transaction_get_mapped_range") {
bool boundary;
for (int i = 0; i < expectSize; i++, id++) {
boundary = i == 0 || i == expectSize - 1;
const auto& [key, value, begin, end, range_results, boundaryAndExist] = result.mkvs[i];
const auto& mkv = result.mkvs[i];
if (matchIndex == MATCH_INDEX_ALL || i == 0 || i == expectSize - 1) {
CHECK(indexEntryKey(id).compare(key) == 0);
CHECK(indexEntryKey(id).compare(mkv.key) == 0);
} else if (matchIndex == MATCH_INDEX_MATCHED_ONLY) {
CHECK(indexEntryKey(id).compare(key) == 0);
CHECK(indexEntryKey(id).compare(mkv.key) == 0);
} else if (matchIndex == MATCH_INDEX_UNMATCHED_ONLY) {
CHECK(EMPTY.compare(key) == 0);
CHECK(EMPTY.compare(mkv.key) == 0);
} else {
CHECK(EMPTY.compare(key) == 0);
CHECK(EMPTY.compare(mkv.key) == 0);
}
bool empty = range_results.empty();
CHECK(boundaryAndExist == (boundary && !empty));
CHECK(EMPTY.compare(value) == 0);
CHECK(range_results.size() == SPLIT_SIZE);
bool empty = mkv.range_results.empty();
CHECK(mkv.boundaryAndExist == (boundary && !empty));
CHECK(EMPTY.compare(mkv.value) == 0);
CHECK(mkv.range_results.size() == SPLIT_SIZE);
for (int split = 0; split < SPLIT_SIZE; split++) {
auto& [k, v] = range_results[split];
CHECK(recordKey(id, split).compare(k) == 0);
CHECK(recordValue(id, split).compare(v) == 0);
auto& kv = mkv.range_results[split];
CHECK(recordKey(id, split).compare(kv.first) == 0);
CHECK(recordValue(id, split).compare(kv.second) == 0);
}
}
break;
@ -1151,19 +1162,19 @@ TEST_CASE("fdb_transaction_get_mapped_range_missing_all_secondary") {
bool boundary;
for (int i = 0; i < expectSize; i++, id++) {
boundary = i == 0 || i == expectSize - 1;
const auto& [key, value, begin, end, range_results, boundaryAndExist] = result.mkvs[i];
const auto& mkv = result.mkvs[i];
if (matchIndex == MATCH_INDEX_ALL || i == 0 || i == expectSize - 1) {
CHECK(indexEntryKey(id).compare(key) == 0);
CHECK(indexEntryKey(id).compare(mkv.key) == 0);
} else if (matchIndex == MATCH_INDEX_MATCHED_ONLY) {
CHECK(EMPTY.compare(key) == 0);
CHECK(EMPTY.compare(mkv.key) == 0);
} else if (matchIndex == MATCH_INDEX_UNMATCHED_ONLY) {
CHECK(indexEntryKey(id).compare(key) == 0);
CHECK(indexEntryKey(id).compare(mkv.key) == 0);
} else {
CHECK(EMPTY.compare(key) == 0);
CHECK(EMPTY.compare(mkv.key) == 0);
}
bool empty = range_results.empty();
CHECK(boundaryAndExist == (boundary && !empty));
CHECK(EMPTY.compare(value) == 0);
bool empty = mkv.range_results.empty();
CHECK(mkv.boundaryAndExist == (boundary && !empty));
CHECK(EMPTY.compare(mkv.value) == 0);
}
break;
}
@ -1269,10 +1280,8 @@ TEST_CASE("fdb_transaction_get_range reverse") {
std::string data_key = it->first;
std::string data_value = it->second;
auto [key, value] = *results_it;
CHECK(data_key.compare(key) == 0);
CHECK(data[data_key].compare(value) == 0);
CHECK(data_key.compare(results_it->first /*key*/) == 0);
CHECK(data[data_key].compare(results_it->second /*value*/) == 0);
}
break;
}
@ -1306,8 +1315,8 @@ TEST_CASE("fdb_transaction_get_range limit") {
CHECK(result.more);
}
for (const auto& [key, value] : result.kvs) {
CHECK(data[key].compare(value) == 0);
for (const auto& kv : result.kvs) {
CHECK(data[kv.first].compare(kv.second) == 0);
}
break;
}
@ -1338,8 +1347,8 @@ TEST_CASE("fdb_transaction_get_range FDB_STREAMING_MODE_EXACT") {
CHECK(result.kvs.size() == 3);
CHECK(result.more);
for (const auto& [key, value] : result.kvs) {
CHECK(data[key].compare(value) == 0);
for (const auto& kv : result.kvs) {
CHECK(data[kv.first].compare(kv.second) == 0);
}
break;
}
@ -2212,7 +2221,7 @@ TEST_CASE("special-key-space custom transaction ID") {
fdb_check(f1.get(&out_present, (const uint8_t**)&val, &vallen));
REQUIRE(out_present);
UID transaction_id = UID::fromString(val);
UID transaction_id = UID::fromString(std::string(val, vallen));
CHECK(transaction_id == randomTransactionID);
break;
}

View File

@ -26,7 +26,7 @@ import java.util.List;
public class KeyArrayResult {
final List<byte[]> keys;
KeyArrayResult(byte[] keyBytes, int[] keyLengths) {
public KeyArrayResult(byte[] keyBytes, int[] keyLengths) {
int count = keyLengths.length;
keys = new ArrayList<byte[]>(count);

View File

@ -70,6 +70,7 @@ include_directories(${CMAKE_BINARY_DIR})
if(WIN32)
add_definitions(-DBOOST_USE_WINDOWS_H)
add_definitions(-DWIN32_LEAN_AND_MEAN)
add_definitions(-D_ITERATOR_DEBUG_LEVEL=0)
endif()
if (USE_CCACHE)

View File

@ -260,7 +260,8 @@
"available_bytes":0, // an estimate of the process' fair share of the memory available to fdbservers
"limit_bytes":0, // memory limit per process
"unused_allocated_memory":0,
"used_bytes":0 // virtual memory size of the process
"used_bytes":0, // virtual memory size of the process
"rss_bytes":0 // resident memory size of the process
},
"messages":[
{

View File

@ -428,7 +428,7 @@ public:
platform::createDirectory(path);
}
}
self->lfd = open(self->file.fileName.c_str(), O_WRONLY | O_CREAT | O_TRUNC);
self->lfd = open(self->file.fileName.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0600);
if (self->lfd == -1) {
TraceEvent(SevError, "OpenLocalFileFailed").detail("File", self->file.fileName);
throw platform_error();

View File

@ -253,6 +253,8 @@ public:
if (read.begin.getKey() < read.end.getKey()) {
rangeBegin = read.begin.getKey();
// If the end offset is 1 (first greater than / first greater or equal) or more, then no changes to the
// range after the returned results can change the outcome.
rangeEnd = read.end.offset > 0 && result.more ? read.begin.getKey() : read.end.getKey();
} else {
rangeBegin = read.end.getKey();
@ -289,7 +291,9 @@ public:
bool endInArena = false;
if (read.begin.getKey() < read.end.getKey()) {
rangeBegin = read.begin.offset <= 0 && result.more ? read.end.getKey() : read.begin.getKey();
// If the begin offset is 1 (first greater than / first greater or equal) or less, then no changes to the
// range prior to the returned results can change the outcome.
rangeBegin = read.begin.offset <= 1 && result.more ? read.end.getKey() : read.begin.getKey();
rangeEnd = read.end.getKey();
} else {
rangeBegin = read.end.getKey();

View File

@ -301,7 +301,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"available_bytes":0,
"limit_bytes":0,
"unused_allocated_memory":0,
"used_bytes":0
"used_bytes":0,
"rss_bytes":0
},
"messages":[
{

View File

@ -43,6 +43,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( ENABLE_VERSION_VECTOR_TLOG_UNICAST, false );
init( MAX_VERSION_RATE_MODIFIER, 0.1 );
init( MAX_VERSION_RATE_OFFSET, VERSIONS_PER_SECOND ); // If the calculated version is more than this amount away from the expected version, it will be clamped to this value. This prevents huge version jumps.
init( ENABLE_VERSION_VECTOR_HA_OPTIMIZATION, false );
// TLogs
init( TLOG_TIMEOUT, 0.4 ); //cannot buggify because of availability
@ -734,7 +735,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( WORKER_LOGGING_INTERVAL, 5.0 );
init( HEAP_PROFILER_INTERVAL, 30.0 );
init( UNKNOWN_CC_TIMEOUT, 600.0 );
init( DEGRADED_RESET_INTERVAL, 24*60*60 ); if ( randomize && BUGGIFY ) DEGRADED_RESET_INTERVAL = 10;
init( DEGRADED_RESET_INTERVAL, 24*60*60 ); // FIXME: short interval causes false positive degraded state to flap, e.g. when everyone tries and fails to connect to dead coordinator: if ( randomize && BUGGIFY ) DEGRADED_RESET_INTERVAL = 10;
init( DEGRADED_WARNING_LIMIT, 1 );
init( DEGRADED_WARNING_RESET_DELAY, 7*24*60*60 );
init( TRACE_LOG_FLUSH_FAILURE_CHECK_INTERVAL_SECONDS, 10 );
@ -864,16 +865,17 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( LATENCY_METRICS_LOGGING_INTERVAL, 60.0 );
// Cluster recovery
init ( CLUSTER_RECOVERY_EVENT_NAME_PREFIX, "Master");
init ( CLUSTER_RECOVERY_EVENT_NAME_PREFIX, "Master" );
// encrypt key proxy
init( ENABLE_ENCRYPTION, false );
init( ENCRYPTION_MODE, "AES-256-CTR");
init( SIM_KMS_MAX_KEYS, 4096);
init( ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH, 100000);
// encrypt key proxy
init( ENABLE_ENCRYPTION, false ); if ( randomize && BUGGIFY ) { ENABLE_ENCRYPTION = deterministicRandom()->coinflip(); }
init( ENCRYPTION_MODE, "AES-256-CTR" );
init( SIM_KMS_MAX_KEYS, 4096 );
init( ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH, 100000 );
init( ENABLE_TLOG_ENCRYPTION, ENABLE_ENCRYPTION ); if ( randomize && BUGGIFY) { ENABLE_TLOG_ENCRYPTION = (ENABLE_ENCRYPTION && !PROXY_USE_RESOLVER_PRIVATE_MUTATIONS && deterministicRandom()->coinflip()); }
// KMS connector type
init( KMS_CONNECTOR_TYPE, "RESTKmsConnector");
init( KMS_CONNECTOR_TYPE, "RESTKmsConnector" );
// Blob granlues
init( BG_URL, isSimulated ? "file://fdbblob/" : "" ); // TODO: store in system key space or something, eventually

View File

@ -44,6 +44,7 @@ public:
// often, so that versions always advance smoothly
double MAX_VERSION_RATE_MODIFIER;
int64_t MAX_VERSION_RATE_OFFSET;
bool ENABLE_VERSION_VECTOR_HA_OPTIMIZATION;
// TLogs
bool PEEK_USING_STREAMING;
@ -837,6 +838,7 @@ public:
std::string ENCRYPTION_MODE;
int SIM_KMS_MAX_KEYS;
int ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH;
bool ENABLE_TLOG_ENCRYPTION;
// Key Management Service (KMS) Connector
std::string KMS_CONNECTOR_TYPE;

View File

@ -79,12 +79,14 @@ public:
invalidateCachedEncodedSize();
}
void setVersion(const std::set<Tag>& tags, Version version) {
void setVersion(const std::set<Tag>& tags, Version version, int8_t localityFilter = tagLocalityInvalid) {
ASSERT(version > maxVersion);
for (auto& tag : tags) {
ASSERT(tag != invalidTag);
ASSERT(tag.locality > tagLocalityInvalid);
versions[tag] = version;
if (localityFilter == tagLocalityInvalid || tag.locality == localityFilter) {
versions[tag] = version;
}
}
maxVersion = version;
invalidateCachedEncodedSize();

View File

@ -1681,6 +1681,25 @@ public:
}
return result;
}
bool killDataHall(Optional<Standalone<StringRef>> dataHallId,
KillType kt,
bool forceKill,
KillType* ktFinal) override {
auto processes = getAllProcesses();
std::set<Optional<Standalone<StringRef>>> dataHallMachines;
for (auto& process : processes) {
if (process->locality.dataHallId() == dataHallId) {
dataHallMachines.insert(process->locality.machineId());
}
}
bool result = false;
for (auto& machineId : dataHallMachines) {
if (killMachine(machineId, kt, forceKill, ktFinal)) {
result = true;
}
}
return result;
}
bool killMachine(Optional<Standalone<StringRef>> machineId,
KillType kt,
bool forceKill,

View File

@ -266,6 +266,10 @@ public:
KillType kt,
bool forceKill = false,
KillType* ktFinal = nullptr) = 0;
virtual bool killDataHall(Optional<Standalone<StringRef>> dcId,
KillType kt,
bool forceKill = false,
KillType* ktFinal = nullptr) = 0;
// virtual KillType getMachineKillState( UID zoneID ) = 0;
virtual bool canKillProcesses(std::vector<ProcessInfo*> const& availableProcesses,
std::vector<ProcessInfo*> const& deadProcesses,

View File

@ -2298,6 +2298,7 @@ ACTOR Future<Reference<BlobConnectionProvider>> getBStoreForGranule(Reference<Bl
state Reference<GranuleTenantData> data = self->tenantData.getDataForGranule(granuleRange);
if (data.isValid()) {
wait(data->bstoreLoaded.getFuture());
wait(delay(0));
return data->bstore;
} else {
// race on startup between loading tenant ranges and bgcc/purging. just wait

View File

@ -2748,6 +2748,7 @@ ACTOR Future<Reference<BlobConnectionProvider>> loadBStoreForTenant(Reference<Bl
state Reference<GranuleTenantData> data = bwData->tenantData.getDataForGranule(keyRange);
if (data.isValid()) {
wait(data->bstoreLoaded.getFuture());
wait(delay(0));
return data->bstore;
} else {
TEST(true); // bstore for unknown tenant

View File

@ -44,6 +44,8 @@ set(FDBSERVER_SRCS
FDBExecHelper.actor.cpp
FDBExecHelper.actor.h
fdbserver.actor.cpp
GetEncryptCipherKeys.actor.cpp
GetEncryptCipherKeys.h
GrvProxyServer.actor.cpp
IConfigConsumer.cpp
IConfigConsumer.h

View File

@ -619,7 +619,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
}
WorkerDetails newEKPWorker;
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) {
newEKPWorker = findNewProcessForSingleton(self, ProcessClass::EncryptKeyProxy, id_used);
}
@ -633,7 +633,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
}
ProcessClass::Fitness bestFitnessForEKP;
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) {
bestFitnessForEKP = findBestFitnessForSingleton(self, newEKPWorker, ProcessClass::EncryptKeyProxy);
}
@ -658,7 +658,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
}
bool ekpHealthy = true;
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) {
ekpHealthy = isHealthySingleton<EncryptKeyProxyInterface>(
self, newEKPWorker, ekpSingleton, bestFitnessForEKP, self->recruitingEncryptKeyProxyID);
}
@ -682,7 +682,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
}
Optional<Standalone<StringRef>> currEKPProcessId, newEKPProcessId;
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) {
currEKPProcessId = ekpSingleton.interface.get().locality.processId();
newEKPProcessId = newEKPWorker.interf.locality.processId();
}
@ -694,7 +694,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
newPids.emplace_back(newBMProcessId);
}
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) {
currPids.emplace_back(currEKPProcessId);
newPids.emplace_back(newEKPProcessId);
}
@ -709,7 +709,7 @@ void checkBetterSingletons(ClusterControllerData* self) {
}
// if the knob is disabled, the EKP coloc counts should have no affect on the coloc counts check below
if (!SERVER_KNOBS->ENABLE_ENCRYPTION) {
if (!SERVER_KNOBS->ENABLE_ENCRYPTION && !g_network->isSimulated()) {
ASSERT(currColocMap[currEKPProcessId] == 0);
ASSERT(newColocMap[newEKPProcessId] == 0);
}
@ -1266,7 +1266,7 @@ ACTOR Future<Void> registerWorker(RegisterWorkerRequest req,
self, w, currSingleton, registeringSingleton, self->recruitingBlobManagerID);
}
if (SERVER_KNOBS->ENABLE_ENCRYPTION && req.encryptKeyProxyInterf.present()) {
if ((SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) && req.encryptKeyProxyInterf.present()) {
auto currSingleton = EncryptKeyProxySingleton(self->db.serverInfo->get().encryptKeyProxy);
auto registeringSingleton = EncryptKeyProxySingleton(req.encryptKeyProxyInterf);
haltRegisteringOrCurrentSingleton<EncryptKeyProxyInterface>(
@ -2519,7 +2519,7 @@ ACTOR Future<Void> clusterControllerCore(ClusterControllerFullInterface interf,
state Future<ErrorOr<Void>> error = errorOr(actorCollection(self.addActor.getFuture()));
// EncryptKeyProxy is necessary for TLog recovery, recruit it as the first process
if (SERVER_KNOBS->ENABLE_ENCRYPTION) {
if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) {
self.addActor.send(monitorEncryptKeyProxy(&self));
}
self.addActor.send(clusterWatchDatabase(&self, &self.db, coordinators, leaderFail)); // Start the master database

View File

@ -190,8 +190,8 @@ public:
}
bool foundSrc = false;
for (int i = 0; i < req.src.size(); i++) {
if (self->server_info.count(req.src[i])) {
for (const auto& id : req.src) {
if (self->server_info.count(id)) {
foundSrc = true;
break;
}
@ -516,7 +516,7 @@ public:
}
}
for (auto& [serverID, server] : self->server_info) {
for (const auto& [serverID, server] : self->server_info) {
if (!self->server_status.get(serverID).isUnhealthy()) {
++serverCount;
LocalityData const& serverLocation = server->getLastKnownInterface().locality;
@ -3946,14 +3946,14 @@ void DDTeamCollection::traceMachineInfo() const {
int i = 0;
TraceEvent("MachineInfo").detail("Size", machine_info.size());
for (auto& machine : machine_info) {
for (auto& [machineName, machineInfo] : machine_info) {
TraceEvent("MachineInfo", distributorId)
.detail("MachineInfoIndex", i++)
.detail("Healthy", isMachineHealthy(machine.second))
.detail("MachineID", machine.first.contents().toString())
.detail("MachineTeamOwned", machine.second->machineTeams.size())
.detail("ServerNumOnMachine", machine.second->serversOnMachine.size())
.detail("ServersID", machine.second->getServersIDStr());
.detail("Healthy", isMachineHealthy(machineInfo))
.detail("MachineID", machineName.contents().toString())
.detail("MachineTeamOwned", machineInfo->machineTeams.size())
.detail("ServerNumOnMachine", machineInfo->serversOnMachine.size())
.detail("ServersID", machineInfo->getServersIDStr());
}
}
@ -4196,20 +4196,20 @@ int DDTeamCollection::addBestMachineTeams(int machineTeamsToBuild) {
Reference<TCServerInfo> DDTeamCollection::findOneLeastUsedServer() const {
std::vector<Reference<TCServerInfo>> leastUsedServers;
int minTeams = std::numeric_limits<int>::max();
for (auto& server : server_info) {
for (auto& [serverID, server] : server_info) {
// Only pick healthy server, which is not failed or excluded.
if (server_status.get(server.first).isUnhealthy())
if (server_status.get(serverID).isUnhealthy())
continue;
if (!isValidLocality(configuration.storagePolicy, server.second->getLastKnownInterface().locality))
if (!isValidLocality(configuration.storagePolicy, server->getLastKnownInterface().locality))
continue;
int numTeams = server.second->getTeams().size();
int numTeams = server->getTeams().size();
if (numTeams < minTeams) {
minTeams = numTeams;
leastUsedServers.clear();
}
if (minTeams == numTeams) {
leastUsedServers.push_back(server.second);
leastUsedServers.push_back(server);
}
}
@ -4299,12 +4299,12 @@ int DDTeamCollection::calculateHealthyMachineCount() const {
std::pair<int64_t, int64_t> DDTeamCollection::calculateMinMaxServerTeamsOnServer() const {
int64_t minTeams = std::numeric_limits<int64_t>::max();
int64_t maxTeams = 0;
for (auto& server : server_info) {
if (server_status.get(server.first).isUnhealthy()) {
for (auto& [serverID, server] : server_info) {
if (server_status.get(serverID).isUnhealthy()) {
continue;
}
minTeams = std::min((int64_t)server.second->getTeams().size(), minTeams);
maxTeams = std::max((int64_t)server.second->getTeams().size(), maxTeams);
minTeams = std::min((int64_t)server->getTeams().size(), minTeams);
maxTeams = std::max((int64_t)server->getTeams().size(), maxTeams);
}
return std::make_pair(minTeams, maxTeams);
}
@ -4312,12 +4312,12 @@ std::pair<int64_t, int64_t> DDTeamCollection::calculateMinMaxServerTeamsOnServer
std::pair<int64_t, int64_t> DDTeamCollection::calculateMinMaxMachineTeamsOnMachine() const {
int64_t minTeams = std::numeric_limits<int64_t>::max();
int64_t maxTeams = 0;
for (auto& machine : machine_info) {
if (!isMachineHealthy(machine.second)) {
for (auto& [_, machine] : machine_info) {
if (!isMachineHealthy(machine)) {
continue;
}
minTeams = std::min<int64_t>((int64_t)machine.second->machineTeams.size(), minTeams);
maxTeams = std::max<int64_t>((int64_t)machine.second->machineTeams.size(), maxTeams);
minTeams = std::min<int64_t>((int64_t)machine->machineTeams.size(), minTeams);
maxTeams = std::max<int64_t>((int64_t)machine->machineTeams.size(), maxTeams);
}
return std::make_pair(minTeams, maxTeams);
}
@ -4581,8 +4581,8 @@ int DDTeamCollection::addTeamsBestOf(int teamsToBuild, int desiredTeams, int max
healthyMachineTeamCount = getHealthyMachineTeamCount();
std::pair<uint64_t, uint64_t> minMaxTeamsOnServer = calculateMinMaxServerTeamsOnServer();
std::pair<uint64_t, uint64_t> minMaxMachineTeamsOnMachine = calculateMinMaxMachineTeamsOnMachine();
auto [minTeamsOnServer, maxTeamsOnServer] = calculateMinMaxServerTeamsOnServer();
auto [minMachineTeamsOnMachine, maxMachineTeamsOnMachine] = calculateMinMaxMachineTeamsOnMachine();
TraceEvent("TeamCollectionInfo", distributorId)
.detail("Primary", primary)
@ -4597,10 +4597,10 @@ int DDTeamCollection::addTeamsBestOf(int teamsToBuild, int desiredTeams, int max
.detail("DesiredMachineTeams", desiredMachineTeams)
.detail("MaxMachineTeams", maxMachineTeams)
.detail("TotalHealthyMachines", totalHealthyMachineCount)
.detail("MinTeamsOnServer", minMaxTeamsOnServer.first)
.detail("MaxTeamsOnServer", minMaxTeamsOnServer.second)
.detail("MinMachineTeamsOnMachine", minMaxMachineTeamsOnMachine.first)
.detail("MaxMachineTeamsOnMachine", minMaxMachineTeamsOnMachine.second)
.detail("MinTeamsOnServer", minTeamsOnServer)
.detail("MaxTeamsOnServer", maxTeamsOnServer)
.detail("MinMachineTeamsOnMachine", minMachineTeamsOnMachine)
.detail("MaxMachineTeamsOnMachine", maxMachineTeamsOnMachine)
.detail("DoBuildTeams", doBuildTeams)
.trackLatest(teamCollectionInfoEventHolder->trackingKey);
@ -4617,8 +4617,8 @@ void DDTeamCollection::traceTeamCollectionInfo() const {
int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount;
int healthyMachineTeamCount = getHealthyMachineTeamCount();
std::pair<uint64_t, uint64_t> minMaxTeamsOnServer = calculateMinMaxServerTeamsOnServer();
std::pair<uint64_t, uint64_t> minMaxMachineTeamsOnMachine = calculateMinMaxMachineTeamsOnMachine();
auto [minTeamsOnServer, maxTeamsOnServer] = calculateMinMaxServerTeamsOnServer();
auto [minMachineTeamsOnMachine, maxMachineTeamsOnMachine] = calculateMinMaxMachineTeamsOnMachine();
TraceEvent("TeamCollectionInfo", distributorId)
.detail("Primary", primary)
@ -4633,10 +4633,10 @@ void DDTeamCollection::traceTeamCollectionInfo() const {
.detail("DesiredMachineTeams", desiredMachineTeams)
.detail("MaxMachineTeams", maxMachineTeams)
.detail("TotalHealthyMachines", totalHealthyMachineCount)
.detail("MinTeamsOnServer", minMaxTeamsOnServer.first)
.detail("MaxTeamsOnServer", minMaxTeamsOnServer.second)
.detail("MinMachineTeamsOnMachine", minMaxMachineTeamsOnMachine.first)
.detail("MaxMachineTeamsOnMachine", minMaxMachineTeamsOnMachine.second)
.detail("MinTeamsOnServer", minTeamsOnServer)
.detail("MaxTeamsOnServer", maxTeamsOnServer)
.detail("MinMachineTeamsOnMachine", minMachineTeamsOnMachine)
.detail("MaxMachineTeamsOnMachine", maxMachineTeamsOnMachine)
.detail("DoBuildTeams", doBuildTeams)
.trackLatest(teamCollectionInfoEventHolder->trackingKey);
@ -5281,8 +5281,8 @@ public:
ASSERT(result >= 8);
for (auto process = collection->server_info.begin(); process != collection->server_info.end(); process++) {
auto teamCount = process->second->getTeams().size();
for (const auto& [serverID, server] : collection->server_info) {
auto teamCount = server->getTeams().size();
ASSERT(teamCount >= 1);
// ASSERT(teamCount <= targetTeamsPerServer);
}
@ -5319,8 +5319,8 @@ public:
// We need to guarantee a server always have at least a team so that the server can participate in data
// distribution
for (auto process = collection->server_info.begin(); process != collection->server_info.end(); process++) {
auto teamCount = process->second->getTeams().size();
for (const auto& [serverID, server] : collection->server_info) {
auto teamCount = server->getTeams().size();
ASSERT(teamCount >= 1);
}
@ -5370,11 +5370,11 @@ public:
wait(collection->getTeam(req));
std::pair<Optional<Reference<IDataDistributionTeam>>, bool> resTeam = req.reply.getFuture().get();
const auto [resTeam, srcFound] = req.reply.getFuture().get();
std::set<UID> expectedServers{ UID(1, 0), UID(2, 0), UID(3, 0) };
ASSERT(resTeam.first.present());
auto servers = resTeam.first.get()->getServerIDs();
ASSERT(resTeam.present());
auto servers = resTeam.get()->getServerIDs();
const std::set<UID> selectedServers(servers.begin(), servers.end());
ASSERT(expectedServers == selectedServers);
@ -5422,11 +5422,11 @@ public:
wait(collection->getTeam(req));
std::pair<Optional<Reference<IDataDistributionTeam>>, bool> resTeam = req.reply.getFuture().get();
const auto [resTeam, srcFound] = req.reply.getFuture().get();
std::set<UID> expectedServers{ UID(2, 0), UID(3, 0), UID(4, 0) };
ASSERT(resTeam.first.present());
auto servers = resTeam.first.get()->getServerIDs();
ASSERT(resTeam.present());
auto servers = resTeam.get()->getServerIDs();
const std::set<UID> selectedServers(servers.begin(), servers.end());
ASSERT(expectedServers == selectedServers);
@ -5472,11 +5472,11 @@ public:
wait(collection->getTeam(req));
std::pair<Optional<Reference<IDataDistributionTeam>>, bool> resTeam = req.reply.getFuture().get();
const auto [resTeam, srcFound] = req.reply.getFuture().get();
std::set<UID> expectedServers{ UID(2, 0), UID(3, 0), UID(4, 0) };
ASSERT(resTeam.first.present());
auto servers = resTeam.first.get()->getServerIDs();
ASSERT(resTeam.present());
auto servers = resTeam.get()->getServerIDs();
const std::set<UID> selectedServers(servers.begin(), servers.end());
ASSERT(expectedServers == selectedServers);
@ -5521,11 +5521,11 @@ public:
wait(collection->getTeam(req));
std::pair<Optional<Reference<IDataDistributionTeam>>, bool> resTeam = req.reply.getFuture().get();
const auto [resTeam, srcFound] = req.reply.getFuture().get();
std::set<UID> expectedServers{ UID(1, 0), UID(2, 0), UID(3, 0) };
ASSERT(resTeam.first.present());
auto servers = resTeam.first.get()->getServerIDs();
ASSERT(resTeam.present());
auto servers = resTeam.get()->getServerIDs();
const std::set<UID> selectedServers(servers.begin(), servers.end());
ASSERT(expectedServers == selectedServers);
@ -5572,9 +5572,9 @@ public:
wait(collection->getTeam(req));
std::pair<Optional<Reference<IDataDistributionTeam>>, bool> resTeam = req.reply.getFuture().get();
const auto [resTeam, srcFound] = req.reply.getFuture().get();
ASSERT(!resTeam.first.present());
ASSERT(!resTeam.present());
return Void();
}
@ -5628,9 +5628,9 @@ public:
wait(collection->getTeam(req));
std::pair<Optional<Reference<IDataDistributionTeam>>, bool> resTeam = req.reply.getFuture().get();
const auto& [resTeam, srcTeamFound] = req.reply.getFuture().get();
ASSERT(!resTeam.first.present());
ASSERT(!resTeam.present());
return Void();
}
@ -5746,11 +5746,11 @@ public:
wait(collection->getTeam(req));
std::pair<Optional<Reference<IDataDistributionTeam>>, bool> resTeam = req.reply.getFuture().get();
const auto [resTeam, srcFound] = req.reply.getFuture().get();
std::set<UID> expectedServers{ UID(1, 0), UID(2, 0), UID(3, 0) };
ASSERT(resTeam.first.present());
auto servers = resTeam.first.get()->getServerIDs();
ASSERT(resTeam.present());
auto servers = resTeam.get()->getServerIDs();
const std::set<UID> selectedServers(servers.begin(), servers.end());
ASSERT(expectedServers == selectedServers);

View File

@ -492,34 +492,10 @@ struct DataDistributorData : NonCopyable, ReferenceCounted<DataDistributorData>
totalDataInFlightRemoteEventHolder(makeReference<EventCacheHolder>("TotalDataInFlightRemote")) {}
};
ACTOR Future<Void> monitorBatchLimitedTime(Reference<AsyncVar<ServerDBInfo> const> db, double* lastLimited) {
loop {
wait(delay(SERVER_KNOBS->METRIC_UPDATE_RATE));
state Reference<GrvProxyInfo> grvProxies(new GrvProxyInfo(db->get().client.grvProxies));
choose {
when(wait(db->onChange())) {}
when(GetHealthMetricsReply reply =
wait(grvProxies->size() ? basicLoadBalance(grvProxies,
&GrvProxyInterface::getHealthMetrics,
GetHealthMetricsRequest(false))
: Never())) {
if (reply.healthMetrics.batchLimited) {
*lastLimited = now();
}
}
}
}
}
// Runs the data distribution algorithm for FDB, including the DD Queue, DD tracker, and DD team collection
ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
PromiseStream<GetMetricsListRequest> getShardMetricsList,
const DDEnabledState* ddEnabledState) {
state double lastLimited = 0;
self->addActor.send(monitorBatchLimitedTime(self->dbInfo, &lastLimited));
state Database cx = openDBOnServer(self->dbInfo, TaskPriority::DataDistributionLaunch, LockAware::True);
cx->locationCacheSize = SERVER_KNOBS->DD_LOCATION_CACHE_SIZE;
@ -762,7 +738,6 @@ ACTOR Future<Void> dataDistribution(Reference<DataDistributorData> self,
self->ddId,
storageTeamSize,
configuration.storageTeamSize,
&lastLimited,
ddEnabledState),
"DDQueue",
self->ddId,

View File

@ -345,7 +345,6 @@ ACTOR Future<Void> dataDistributionQueue(Database cx,
UID distributorId,
int teamSize,
int singleRegionTeamSize,
double* lastLimited,
const DDEnabledState* ddEnabledState);
// Holds the permitted size and IO Bounds for a shard

View File

@ -487,7 +487,6 @@ struct DDQueueData {
PromiseStream<GetMetricsRequest> getShardMetrics;
PromiseStream<GetTopKMetricsRequest> getTopKMetrics;
double* lastLimited;
double lastInterval;
int suppressIntervals;
@ -550,18 +549,17 @@ struct DDQueueData {
PromiseStream<RelocateShard> output,
FutureStream<RelocateShard> input,
PromiseStream<GetMetricsRequest> getShardMetrics,
PromiseStream<GetTopKMetricsRequest> getTopKMetrics,
double* lastLimited)
PromiseStream<GetTopKMetricsRequest> getTopKMetrics)
: distributorId(mid), lock(lock), cx(cx), teamCollections(teamCollections), shardsAffectedByTeamFailure(sABTF),
getAverageShardBytes(getAverageShardBytes),
startMoveKeysParallelismLock(SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM),
finishMoveKeysParallelismLock(SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM),
fetchSourceLock(new FlowLock(SERVER_KNOBS->DD_FETCH_SOURCE_PARALLELISM)), activeRelocations(0),
queuedRelocations(0), bytesWritten(0), teamSize(teamSize), singleRegionTeamSize(singleRegionTeamSize),
output(output), input(input), getShardMetrics(getShardMetrics), getTopKMetrics(getTopKMetrics),
lastLimited(lastLimited), lastInterval(0), suppressIntervals(0),
rawProcessingUnhealthy(new AsyncVar<bool>(false)), rawProcessingWiggle(new AsyncVar<bool>(false)),
unhealthyRelocations(0), movedKeyServersEventHolder(makeReference<EventCacheHolder>("MovedKeyServers")) {}
output(output), input(input), getShardMetrics(getShardMetrics), getTopKMetrics(getTopKMetrics), lastInterval(0),
suppressIntervals(0), rawProcessingUnhealthy(new AsyncVar<bool>(false)),
rawProcessingWiggle(new AsyncVar<bool>(false)), unhealthyRelocations(0),
movedKeyServersEventHolder(makeReference<EventCacheHolder>("MovedKeyServers")) {}
void validate() {
if (EXPENSIVE_VALIDATION) {
@ -1819,7 +1817,6 @@ ACTOR Future<Void> BgDDLoadRebalance(DDQueueData* self, int teamCollectionIndex,
ACTOR Future<Void> BgDDMountainChopper(DDQueueData* self, int teamCollectionIndex) {
state double rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL;
state int resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
state Transaction tr(self->cx);
state double lastRead = 0;
state bool skipCurrentLoop = false;
@ -1829,10 +1826,6 @@ ACTOR Future<Void> BgDDMountainChopper(DDQueueData* self, int teamCollectionInde
state TraceEvent traceEvent("BgDDMountainChopper_Old", self->distributorId);
traceEvent.suppressFor(5.0).detail("PollingInterval", rebalancePollingInterval).detail("Rebalance", "Disk");
if (*self->lastLimited > 0) {
traceEvent.detail("SecondsSinceLastLimited", now() - *self->lastLimited);
}
try {
state Future<Void> delayF = delay(rebalancePollingInterval, TaskPriority::DataDistributionLaunch);
if ((now() - lastRead) > SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL) {
@ -1904,30 +1897,10 @@ ACTOR Future<Void> BgDDMountainChopper(DDQueueData* self, int teamCollectionInde
teamCollectionIndex == 0,
&traceEvent));
moved = _moved;
if (moved) {
resetCount = 0;
} else {
resetCount++;
}
}
}
}
if (now() - (*self->lastLimited) < SERVER_KNOBS->BG_DD_SATURATION_DELAY) {
rebalancePollingInterval = std::min(SERVER_KNOBS->BG_DD_MAX_WAIT,
rebalancePollingInterval * SERVER_KNOBS->BG_DD_INCREASE_RATE);
} else {
rebalancePollingInterval = std::max(SERVER_KNOBS->BG_DD_MIN_WAIT,
rebalancePollingInterval / SERVER_KNOBS->BG_DD_DECREASE_RATE);
}
if (resetCount >= SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT &&
rebalancePollingInterval < SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL) {
rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL;
resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
}
traceEvent.detail("ResetCount", resetCount);
tr.reset();
} catch (Error& e) {
// Log actor_cancelled because it's not legal to suppress an event that's initialized
@ -1942,7 +1915,6 @@ ACTOR Future<Void> BgDDMountainChopper(DDQueueData* self, int teamCollectionInde
ACTOR Future<Void> BgDDValleyFiller(DDQueueData* self, int teamCollectionIndex) {
state double rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL;
state int resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
state Transaction tr(self->cx);
state double lastRead = 0;
state bool skipCurrentLoop = false;
@ -1953,10 +1925,6 @@ ACTOR Future<Void> BgDDValleyFiller(DDQueueData* self, int teamCollectionIndex)
state TraceEvent traceEvent("BgDDValleyFiller_Old", self->distributorId);
traceEvent.suppressFor(5.0).detail("PollingInterval", rebalancePollingInterval).detail("Rebalance", "Disk");
if (*self->lastLimited > 0) {
traceEvent.detail("SecondsSinceLastLimited", now() - *self->lastLimited);
}
try {
state Future<Void> delayF = delay(rebalancePollingInterval, TaskPriority::DataDistributionLaunch);
if ((now() - lastRead) > SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL) {
@ -2028,30 +1996,10 @@ ACTOR Future<Void> BgDDValleyFiller(DDQueueData* self, int teamCollectionIndex)
teamCollectionIndex == 0,
&traceEvent));
moved = _moved;
if (moved) {
resetCount = 0;
} else {
resetCount++;
}
}
}
}
if (now() - (*self->lastLimited) < SERVER_KNOBS->BG_DD_SATURATION_DELAY) {
rebalancePollingInterval = std::min(SERVER_KNOBS->BG_DD_MAX_WAIT,
rebalancePollingInterval * SERVER_KNOBS->BG_DD_INCREASE_RATE);
} else {
rebalancePollingInterval = std::max(SERVER_KNOBS->BG_DD_MIN_WAIT,
rebalancePollingInterval / SERVER_KNOBS->BG_DD_DECREASE_RATE);
}
if (resetCount >= SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT &&
rebalancePollingInterval < SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL) {
rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL;
resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT;
}
traceEvent.detail("ResetCount", resetCount);
tr.reset();
} catch (Error& e) {
// Log actor_cancelled because it's not legal to suppress an event that's initialized
@ -2079,7 +2027,6 @@ ACTOR Future<Void> dataDistributionQueue(Database cx,
UID distributorId,
int teamSize,
int singleRegionTeamSize,
double* lastLimited,
const DDEnabledState* ddEnabledState) {
state DDQueueData self(distributorId,
lock,
@ -2092,8 +2039,7 @@ ACTOR Future<Void> dataDistributionQueue(Database cx,
output,
input,
getShardMetrics,
getTopKMetrics,
lastLimited);
getTopKMetrics);
state std::set<UID> serversToLaunchFrom;
state KeyRange keysToLaunchFrom;
state RelocateData launchData;

View File

@ -0,0 +1,256 @@
/*
* GetCipherKeys.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbserver/GetEncryptCipherKeys.h"
#include <boost/functional/hash.hpp>
namespace {
Optional<UID> getEncryptKeyProxyId(const Reference<AsyncVar<ServerDBInfo> const>& db) {
return db->get().encryptKeyProxy.map<UID>([](EncryptKeyProxyInterface proxy) { return proxy.id(); });
}
ACTOR Future<Void> onEncryptKeyProxyChange(Reference<AsyncVar<ServerDBInfo> const> db) {
state Optional<UID> previousProxyId = getEncryptKeyProxyId(db);
state Optional<UID> currentProxyId;
loop {
wait(db->onChange());
currentProxyId = getEncryptKeyProxyId(db);
if (currentProxyId != previousProxyId) {
break;
}
}
TraceEvent("GetCipherKeys_EncryptKeyProxyChanged")
.detail("PreviousProxyId", previousProxyId.orDefault(UID()))
.detail("CurrentProxyId", currentProxyId.orDefault(UID()));
return Void();
}
ACTOR Future<EKPGetLatestBaseCipherKeysReply> getUncachedLatestEncryptCipherKeys(
Reference<AsyncVar<ServerDBInfo> const> db,
EKPGetLatestBaseCipherKeysRequest request) {
Optional<EncryptKeyProxyInterface> proxy = db->get().encryptKeyProxy;
if (!proxy.present()) {
// Wait for onEncryptKeyProxyChange.
TraceEvent("GetLatestCipherKeys_EncryptKeyProxyNotPresent");
return Never();
}
request.reply.reset();
try {
EKPGetLatestBaseCipherKeysReply reply = wait(proxy.get().getLatestBaseCipherKeys.getReply(request));
if (reply.error.present()) {
TraceEvent("GetLatestCipherKeys_RequestFailed").error(reply.error.get());
throw encrypt_keys_fetch_failed();
}
return reply;
} catch (Error& e) {
TraceEvent("GetLatestCipherKeys_CaughtError").error(e);
if (e.code() == error_code_broken_promise) {
// Wait for onEncryptKeyProxyChange.
return Never();
}
throw;
}
}
} // anonymous namespace
ACTOR Future<std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>>> getLatestEncryptCipherKeys(
Reference<AsyncVar<ServerDBInfo> const> db,
std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> domains) {
state Reference<BlobCipherKeyCache> cipherKeyCache = BlobCipherKeyCache::getInstance();
state std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cipherKeys;
state EKPGetLatestBaseCipherKeysRequest request;
if (!db.isValid()) {
TraceEvent(SevError, "GetLatestCipherKeys_ServerDBInfoNotAvailable");
throw encrypt_ops_error();
}
// Collect cached cipher keys.
for (auto& domain : domains) {
Reference<BlobCipherKey> cachedCipherKey = cipherKeyCache->getLatestCipherKey(domain.first /*domainId*/);
if (cachedCipherKey.isValid()) {
cipherKeys[domain.first] = cachedCipherKey;
} else {
request.encryptDomainInfos.emplace_back(
domain.first /*domainId*/, domain.second /*domainName*/, request.arena);
}
}
if (request.encryptDomainInfos.empty()) {
return cipherKeys;
}
// Fetch any uncached cipher keys.
loop choose {
when(EKPGetLatestBaseCipherKeysReply reply = wait(getUncachedLatestEncryptCipherKeys(db, request))) {
// Insert base cipher keys into cache and construct result.
for (const EKPBaseCipherDetails& details : reply.baseCipherDetails) {
EncryptCipherDomainId domainId = details.encryptDomainId;
if (domains.count(domainId) > 0 && cipherKeys.count(domainId) == 0) {
Reference<BlobCipherKey> cipherKey = cipherKeyCache->insertCipherKey(
domainId, details.baseCipherId, details.baseCipherKey.begin(), details.baseCipherKey.size());
ASSERT(cipherKey.isValid());
cipherKeys[domainId] = cipherKey;
}
}
// Check for any missing cipher keys.
for (auto& domain : request.encryptDomainInfos) {
if (cipherKeys.count(domain.domainId) == 0) {
TraceEvent(SevWarn, "GetLatestCipherKeys_KeyMissing").detail("DomainId", domain.domainId);
throw encrypt_key_not_found();
}
}
break;
}
// In case encryptKeyProxy has changed, retry the request.
when(wait(onEncryptKeyProxyChange(db))) {}
}
return cipherKeys;
}
namespace {
ACTOR Future<EKPGetBaseCipherKeysByIdsReply> getUncachedEncryptCipherKeys(Reference<AsyncVar<ServerDBInfo> const> db,
EKPGetBaseCipherKeysByIdsRequest request) {
Optional<EncryptKeyProxyInterface> proxy = db->get().encryptKeyProxy;
if (!proxy.present()) {
// Wait for onEncryptKeyProxyChange.
TraceEvent("GetCipherKeys_EncryptKeyProxyNotPresent");
return Never();
}
request.reply.reset();
try {
EKPGetBaseCipherKeysByIdsReply reply = wait(proxy.get().getBaseCipherKeysByIds.getReply(request));
if (reply.error.present()) {
TraceEvent(SevWarn, "GetCipherKeys_RequestFailed").error(reply.error.get());
throw encrypt_keys_fetch_failed();
}
return reply;
} catch (Error& e) {
TraceEvent("GetCipherKeys_CaughtError").error(e);
if (e.code() == error_code_broken_promise) {
// Wait for onEncryptKeyProxyChange.
return Never();
}
throw;
}
}
using BaseCipherIndex = std::pair<EncryptCipherDomainId, EncryptCipherBaseKeyId>;
} // anonymous namespace
ACTOR Future<std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>>> getEncryptCipherKeys(
Reference<AsyncVar<ServerDBInfo> const> db,
std::unordered_set<BlobCipherDetails> cipherDetails) {
state Reference<BlobCipherKeyCache> cipherKeyCache = BlobCipherKeyCache::getInstance();
state std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>> cipherKeys;
state std::unordered_set<BaseCipherIndex, boost::hash<BaseCipherIndex>> uncachedBaseCipherIds;
state EKPGetBaseCipherKeysByIdsRequest request;
if (!db.isValid()) {
TraceEvent(SevError, "GetCipherKeys_ServerDBInfoNotAvailable");
throw encrypt_ops_error();
}
// Collect cached cipher keys.
for (const BlobCipherDetails& details : cipherDetails) {
Reference<BlobCipherKey> cachedCipherKey =
cipherKeyCache->getCipherKey(details.encryptDomainId, details.baseCipherId, details.salt);
if (cachedCipherKey.isValid()) {
cipherKeys.emplace(details, cachedCipherKey);
} else {
uncachedBaseCipherIds.insert(std::make_pair(details.encryptDomainId, details.baseCipherId));
}
}
if (uncachedBaseCipherIds.empty()) {
return cipherKeys;
}
for (const BaseCipherIndex& id : uncachedBaseCipherIds) {
request.baseCipherInfos.emplace_back(
id.first /*domainId*/, id.second /*baseCipherId*/, StringRef() /*domainName*/, request.arena);
}
// Fetch any uncached cipher keys.
loop choose {
when(EKPGetBaseCipherKeysByIdsReply reply = wait(getUncachedEncryptCipherKeys(db, request))) {
std::unordered_map<BaseCipherIndex, StringRef, boost::hash<BaseCipherIndex>> baseCipherKeys;
for (const EKPBaseCipherDetails& baseDetails : reply.baseCipherDetails) {
BaseCipherIndex baseIdx = std::make_pair(baseDetails.encryptDomainId, baseDetails.baseCipherId);
baseCipherKeys[baseIdx] = baseDetails.baseCipherKey;
}
// Insert base cipher keys into cache and construct result.
for (const BlobCipherDetails& details : cipherDetails) {
if (cipherKeys.count(details) > 0) {
continue;
}
BaseCipherIndex baseIdx = std::make_pair(details.encryptDomainId, details.baseCipherId);
const auto& itr = baseCipherKeys.find(baseIdx);
if (itr == baseCipherKeys.end()) {
TraceEvent(SevError, "GetCipherKeys_KeyMissing")
.detail("DomainId", details.encryptDomainId)
.detail("BaseCipherId", details.baseCipherId);
throw encrypt_key_not_found();
}
Reference<BlobCipherKey> cipherKey = cipherKeyCache->insertCipherKey(details.encryptDomainId,
details.baseCipherId,
itr->second.begin(),
itr->second.size(),
details.salt);
ASSERT(cipherKey.isValid());
cipherKeys[details] = cipherKey;
}
break;
}
// In case encryptKeyProxy has changed, retry the request.
when(wait(onEncryptKeyProxyChange(db))) {}
}
return cipherKeys;
}
ACTOR Future<TextAndHeaderCipherKeys> getLatestSystemEncryptCipherKeys(Reference<AsyncVar<ServerDBInfo> const> db) {
static std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName> domains = {
{ SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME },
{ ENCRYPT_HEADER_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME }
};
std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>> cipherKeys =
wait(getLatestEncryptCipherKeys(db, domains));
ASSERT(cipherKeys.count(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID) > 0);
ASSERT(cipherKeys.count(ENCRYPT_HEADER_DOMAIN_ID) > 0);
TextAndHeaderCipherKeys result{ cipherKeys.at(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID),
cipherKeys.at(ENCRYPT_HEADER_DOMAIN_ID) };
ASSERT(result.cipherTextKey.isValid());
ASSERT(result.cipherHeaderKey.isValid());
return result;
}
ACTOR Future<TextAndHeaderCipherKeys> getEncryptCipherKeys(Reference<AsyncVar<ServerDBInfo> const> db,
BlobCipherEncryptHeader header) {
std::unordered_set<BlobCipherDetails> cipherDetails{ header.cipherTextDetails, header.cipherHeaderDetails };
std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>> cipherKeys =
wait(getEncryptCipherKeys(db, cipherDetails));
ASSERT(cipherKeys.count(header.cipherTextDetails) > 0);
ASSERT(cipherKeys.count(header.cipherHeaderDetails) > 0);
TextAndHeaderCipherKeys result{ cipherKeys.at(header.cipherTextDetails),
cipherKeys.at(header.cipherHeaderDetails) };
ASSERT(result.cipherTextKey.isValid());
ASSERT(result.cipherHeaderKey.isValid());
return result;
}

View File

@ -0,0 +1,58 @@
/*
* GetCipherKeys.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifndef FDBSERVER_GETCIPHERKEYS_H
#define FDBSERVER_GETCIPHERKEYS_H
#include "fdbserver/ServerDBInfo.h"
#include "flow/BlobCipher.h"
#include <unordered_map>
#include <unordered_set>
// Get latest cipher keys for given encryption domains. It tries to get the cipher keys from local cache.
// In case of cache miss, it fetches the cipher keys from EncryptKeyProxy and put the result in the local cache
// before return.
Future<std::unordered_map<EncryptCipherDomainId, Reference<BlobCipherKey>>> getLatestEncryptCipherKeys(
const Reference<AsyncVar<ServerDBInfo> const>& db,
const std::unordered_map<EncryptCipherDomainId, EncryptCipherDomainName>& domains);
// Get cipher keys specified by the list of cipher details. It tries to get the cipher keys from local cache.
// In case of cache miss, it fetches the cipher keys from EncryptKeyProxy and put the result in the local cache
// before return.
Future<std::unordered_map<BlobCipherDetails, Reference<BlobCipherKey>>> getEncryptCipherKeys(
const Reference<AsyncVar<ServerDBInfo> const>& db,
const std::unordered_set<BlobCipherDetails>& cipherDetails);
struct TextAndHeaderCipherKeys {
Reference<BlobCipherKey> cipherTextKey;
Reference<BlobCipherKey> cipherHeaderKey;
};
// Helper method to get latest cipher text key and cipher header key for system domain,
// used for encrypting system data.
Future<TextAndHeaderCipherKeys> getLatestSystemEncryptCipherKeys(const Reference<AsyncVar<ServerDBInfo> const>& db);
// Helper method to get both text cipher key and header cipher key for the given encryption header,
// used for decrypting given encrypted data with encryption header.
Future<TextAndHeaderCipherKeys> getEncryptCipherKeys(const Reference<AsyncVar<ServerDBInfo> const>& db,
const BlobCipherEncryptHeader& header);
#endif

View File

@ -286,9 +286,7 @@ rocksdb::Options getOptions() {
options.IncreaseParallelism(SERVER_KNOBS->ROCKSDB_BACKGROUND_PARALLELISM);
}
options.statistics = rocksdb::CreateDBStatistics();
options.statistics->set_stats_level(rocksdb::kExceptHistogramOrTimers);
// TODO: enable rocksdb metrics.
options.db_log_dir = SERVER_KNOBS->LOG_DIRECTORY;
return options;
}
@ -555,10 +553,31 @@ public:
TraceEvent(SevError, "ShardedRocksDB").detail("Error", "write to non-exist shard").detail("WriteKey", key);
return;
}
writeBatch->Put(it->value()->physicalShard->cf, toSlice(key), toSlice(value));
writeBatch->Put(it.value()->physicalShard->cf, toSlice(key), toSlice(value));
dirtyShards->insert(it.value()->physicalShard);
}
void clear(KeyRef key) {
auto it = dataShardMap.rangeContaining(key);
if (!it.value()) {
return;
}
writeBatch->Delete(it.value()->physicalShard->cf, toSlice(key));
dirtyShards->insert(it.value()->physicalShard);
}
void clearRange(KeyRangeRef range) {
auto rangeIterator = dataShardMap.intersectingRanges(range);
for (auto it = rangeIterator.begin(); it != rangeIterator.end(); ++it) {
if (it.value() == nullptr) {
continue;
}
writeBatch->DeleteRange(it.value()->physicalShard->cf, toSlice(range.begin), toSlice(range.end));
dirtyShards->insert(it.value()->physicalShard);
}
}
std::unique_ptr<rocksdb::WriteBatch> getWriteBatch() {
std::unique_ptr<rocksdb::WriteBatch> existingWriteBatch = std::move(writeBatch);
writeBatch = std::make_unique<rocksdb::WriteBatch>();
@ -597,12 +616,17 @@ public:
}
rocksdb::DB* getDb() { return db; }
std::unordered_map<std::string, std::shared_ptr<PhysicalShard>>* getAllShards() { return &physicalShards; }
std::unordered_map<uint32_t, rocksdb::ColumnFamilyHandle*>* getColumnFamilyMap() { return &columnFamilyMap; }
private:
std::string path;
rocksdb::DB* db = nullptr;
std::unordered_map<std::string, std::shared_ptr<PhysicalShard>> physicalShards;
// Stores mapping between cf id and cf handle, used during compaction.
std::unordered_map<uint32_t, rocksdb::ColumnFamilyHandle*> columnFamilyMap;
std::unique_ptr<rocksdb::WriteBatch> writeBatch;
std::unique_ptr<std::set<PhysicalShard*>> dirtyShards;
KeyRangeMap<DataShard*> dataShardMap;
@ -1218,11 +1242,14 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
struct Writer : IThreadPoolReceiver {
int threadIndex;
std::unordered_map<uint32_t, rocksdb::ColumnFamilyHandle*>* columnFamilyMap;
std::shared_ptr<RocksDBMetrics> rocksDBMetrics;
std::shared_ptr<rocksdb::RateLimiter> rateLimiter;
explicit Writer(int threadIndex, std::shared_ptr<RocksDBMetrics> rocksDBMetrics)
: threadIndex(threadIndex), rocksDBMetrics(rocksDBMetrics),
explicit Writer(int threadIndex,
std::unordered_map<uint32_t, rocksdb::ColumnFamilyHandle*>* columnFamilyMap,
std::shared_ptr<RocksDBMetrics> rocksDBMetrics)
: threadIndex(threadIndex), columnFamilyMap(columnFamilyMap), rocksDBMetrics(rocksDBMetrics),
rateLimiter(SERVER_KNOBS->ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC > 0
? rocksdb::NewGenericRateLimiter(
SERVER_KNOBS->ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC, // rate_bytes_per_sec
@ -1280,7 +1307,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
PhysicalShard* shard;
ThreadReturnPromise<Void> done;
AddShardAction(PhysicalShard* shard) : shard(shard) {}
AddShardAction(PhysicalShard* shard) : shard(shard) { ASSERT(shard); }
double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; }
};
@ -1289,6 +1316,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
if (!s.ok()) {
a.done.sendError(statusToError(s));
}
(*columnFamilyMap)[a.shard->cf->GetID()] = a.shard->cf;
a.done.send(Void());
}
@ -1321,12 +1349,59 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
}
};
rocksdb::Status doCommit(rocksdb::WriteBatch* batch, rocksdb::DB* db, bool sample) {
struct DeleteVisitor : public rocksdb::WriteBatch::Handler {
std::vector<std::pair<uint32_t, KeyRange>>* deletes;
DeleteVisitor(std::vector<std::pair<uint32_t, KeyRange>>* deletes) : deletes(deletes) { ASSERT(deletes); }
rocksdb::Status DeleteRangeCF(uint32_t column_family_id,
const rocksdb::Slice& begin,
const rocksdb::Slice& end) override {
deletes->push_back(
std::make_pair(column_family_id, KeyRange(KeyRangeRef(toStringRef(begin), toStringRef(end)))));
return rocksdb::Status::OK();
}
rocksdb::Status PutCF(uint32_t column_family_id,
const rocksdb::Slice& key,
const rocksdb::Slice& value) override {
return rocksdb::Status::OK();
}
rocksdb::Status DeleteCF(uint32_t column_family_id, const rocksdb::Slice& key) override {
return rocksdb::Status::OK();
}
rocksdb::Status SingleDeleteCF(uint32_t column_family_id, const rocksdb::Slice& key) override {
return rocksdb::Status::OK();
}
rocksdb::Status MergeCF(uint32_t column_family_id,
const rocksdb::Slice& key,
const rocksdb::Slice& value) override {
return rocksdb::Status::OK();
}
};
rocksdb::Status doCommit(rocksdb::WriteBatch* batch,
rocksdb::DB* db,
std::vector<std::pair<uint32_t, KeyRange>>* deletes,
bool sample) {
DeleteVisitor dv(deletes);
rocksdb::Status s = batch->Iterate(&dv);
if (!s.ok()) {
logRocksDBError(s, "CommitDeleteVisitor");
return s;
}
// If there are any range deletes, we should have added them to be deleted.
ASSERT(!deletes->empty() || !batch->HasDeleteRange());
rocksdb::WriteOptions options;
options.sync = !SERVER_KNOBS->ROCKSDB_UNSAFE_AUTO_FSYNC;
double writeBeginTime = sample ? timer_monotonic() : 0;
auto s = db->Write(options, batch);
s = db->Write(options, batch);
if (sample) {
rocksDBMetrics->getWriteHistogram()->sampleSeconds(timer_monotonic() - writeBeginTime);
}
@ -1335,7 +1410,6 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
return s;
}
// TODO: Add cf id <-> cf handle mapping and suggest compact range.
return s;
}
@ -1348,15 +1422,25 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
commitBeginTime = timer_monotonic();
rocksDBMetrics->getCommitQueueWaitHistogram()->sampleSeconds(commitBeginTime - a.startTime);
}
auto s = doCommit(a.writeBatch.get(), a.db, a.getHistograms);
std::vector<std::pair<uint32_t, KeyRange>> deletes;
auto s = doCommit(a.writeBatch.get(), a.db, &deletes, a.getHistograms);
if (!s.ok()) {
a.done.sendError(statusToError(s));
return;
}
for (auto shard : *(a.dirtyShards)) {
shard->readIterPool->update();
}
if (!s.ok()) {
a.done.sendError(statusToError(s));
return;
a.done.send(Void());
for (const auto& [id, range] : deletes) {
auto cf = columnFamilyMap->find(id);
ASSERT(cf != columnFamilyMap->end());
auto begin = toSlice(range.begin);
auto end = toSlice(range.end);
ASSERT(a.db->SuggestCompactRange(cf->second, &begin, &end).ok());
}
if (a.getHistograms) {
@ -1368,7 +1452,6 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
if (a.getPerfContext) {
rocksDBMetrics->setPerfContext(threadIndex);
}
a.done.send(Void());
}
struct CloseAction : TypedAction<Writer, CloseAction> {
@ -1714,7 +1797,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
writeThread = createGenericThreadPool();
readThreads = createGenericThreadPool();
}
writeThread->addThread(new Writer(0, rocksDBMetrics), "fdb-rocksdb-wr");
writeThread->addThread(new Writer(0, shardManager.getColumnFamilyMap(), rocksDBMetrics), "fdb-rocksdb-wr");
TraceEvent("RocksDBReadThreads").detail("KnobRocksDBReadParallelism", SERVER_KNOBS->ROCKSDB_READ_PARALLELISM);
for (unsigned i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; ++i) {
readThreads->addThread(new Reader(i, rocksDBMetrics), "fdb-rocksdb-re");
@ -1775,8 +1858,11 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
void set(KeyValueRef kv, const Arena*) override { shardManager.put(kv.key, kv.value); }
void clear(KeyRangeRef range, const Arena*) override {
// TODO: clear ranges.
return;
if (range.singleKeyRange()) {
shardManager.clear(range.begin);
} else {
shardManager.clearRange(range);
}
}
Future<Void> commit(bool) override {
@ -1987,7 +2073,7 @@ TEST_CASE("noSim/ShardedRocksDB/SingleShardRead") {
return Void();
}
TEST_CASE("noSim/ShardedRocksDB/ReadRange") {
TEST_CASE("noSim/ShardedRocksDB/RangeOps") {
state std::string rocksDBTestDir = "sharded-rocksdb-kvs-test-db";
platform::eraseDirectoryRecursive(rocksDBTestDir);
@ -2080,6 +2166,33 @@ TEST_CASE("noSim/ShardedRocksDB/ReadRange") {
ASSERT(result[i] == expectedRows[40 + i]);
}
// Clear a range on a single shard.
kvStore->clear(KeyRangeRef("40"_sr, "45"_sr));
wait(kvStore->commit(false));
RangeResult result =
wait(kvStore->readRange(KeyRangeRef("4"_sr, "5"_sr), 20, 10000, IKeyValueStore::ReadType::NORMAL));
ASSERT_EQ(result.size(), 5);
// Clear a single value.
kvStore->clear(KeyRangeRef("01"_sr, keyAfter("01"_sr)));
wait(kvStore->commit(false));
Optional<Value> val = wait(kvStore->readValue("01"_sr));
ASSERT(!val.present());
// Clear a range spanning on multiple shards.
kvStore->clear(KeyRangeRef("1"_sr, "8"_sr));
wait(kvStore->commit(false));
RangeResult result =
wait(kvStore->readRange(KeyRangeRef("1"_sr, "8"_sr), 1000, 10000, IKeyValueStore::ReadType::NORMAL));
ASSERT_EQ(result.size(), 0);
RangeResult result =
wait(kvStore->readRange(KeyRangeRef("0"_sr, ":"_sr), 1000, 10000, IKeyValueStore::ReadType::NORMAL));
ASSERT_EQ(result.size(), 19);
Future<Void> closed = kvStore->onClosed();
kvStore->dispose();
wait(closed);

View File

@ -573,7 +573,9 @@ Future<Void> logRouterPeekMessages(PromiseType replyPromise,
TLogPeekReply reply;
reply.maxKnownVersion = self->version.get();
reply.minKnownCommittedVersion = self->poppedVersion;
reply.messages = StringRef(reply.arena, messages.toValue());
auto messagesValue = messages.toValue();
reply.arena.dependsOn(messagesValue.arena());
reply.messages = messagesValue;
reply.popped = self->minPopped.get() >= self->startVersion ? self->minPopped.get() : 0;
reply.end = endVersion;
reply.onlySpilled = false;

View File

@ -39,6 +39,7 @@
#include "fdbclient/ManagementAPI.actor.h"
#include <boost/lexical_cast.hpp>
#include "flow/actorcompiler.h" // This must be the last #include.
#include "flow/flow.h"
ACTOR Future<std::vector<WorkerDetails>> getWorkers(Reference<AsyncVar<ServerDBInfo> const> dbInfo, int flags = 0) {
loop {
@ -672,14 +673,18 @@ ACTOR Future<Void> reconfigureAfter(Database cx,
struct QuietDatabaseChecker {
double start = now();
constexpr static double maxDDRunTime = 1000.0;
double maxDDRunTime;
QuietDatabaseChecker(double maxDDRunTime) : maxDDRunTime(maxDDRunTime) {}
struct Impl {
double start;
std::string const& phase;
double maxDDRunTime;
std::vector<std::string> failReasons;
Impl(double start, const std::string& phase) : start(start), phase(phase) {}
Impl(double start, const std::string& phase, const double maxDDRunTime)
: start(start), phase(phase), maxDDRunTime(maxDDRunTime) {}
template <class T, class Comparison = std::less_equal<>>
Impl& add(BaseTraceEvent& evt,
@ -719,7 +724,7 @@ struct QuietDatabaseChecker {
};
Impl startIteration(std::string const& phase) const {
Impl res(start, phase);
Impl res(start, phase, maxDDRunTime);
return res;
}
};
@ -735,7 +740,7 @@ ACTOR Future<Void> waitForQuietDatabase(Database cx,
int64_t maxDataDistributionQueueSize = 0,
int64_t maxPoppedVersionLag = 30e6,
int64_t maxVersionOffset = 1e6) {
state QuietDatabaseChecker checker;
state QuietDatabaseChecker checker(isBuggifyEnabled(BuggifyType::General) ? 1500.0 : 1000.0);
state Future<Void> reconfig =
reconfigureAfter(cx, 100 + (deterministicRandom()->random01() * 100), dbInfo, "QuietDatabase");
state Future<int64_t> dataInFlight;

View File

@ -422,10 +422,11 @@ static JsonBuilderObject getBounceImpactInfo(int recoveryStatusCode) {
}
struct MachineMemoryInfo {
double memoryUsage;
double memoryUsage; // virtual memory usage
double rssUsage; // RSS memory usage
double aggregateLimit;
MachineMemoryInfo() : memoryUsage(0), aggregateLimit(0) {}
MachineMemoryInfo() : memoryUsage(0), rssUsage(0), aggregateLimit(0) {}
bool valid() { return memoryUsage >= 0; }
void invalidate() { memoryUsage = -1; }
@ -789,6 +790,7 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
if (memInfo->second.valid()) {
if (processMetrics.size() > 0 && programStart.size() > 0) {
memInfo->second.memoryUsage += processMetrics.getDouble("Memory");
memInfo->second.rssUsage += processMetrics.getDouble("ResidentMemory");
memInfo->second.aggregateLimit += programStart.getDouble("MemoryLimit");
} else
memInfo->second.invalidate();
@ -815,7 +817,7 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
roles.addRole("blob_manager", db->get().blobManager.get());
}
if (SERVER_KNOBS->ENABLE_ENCRYPTION && db->get().encryptKeyProxy.present()) {
if ((SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) && db->get().encryptKeyProxy.present()) {
roles.addRole("encrypt_key_proxy", db->get().encryptKeyProxy.get());
}
@ -979,6 +981,7 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
statusObj["network"] = networkObj;
memoryObj.setKeyRawNumber("used_bytes", processMetrics.getValue("Memory"));
memoryObj.setKeyRawNumber("rss_bytes", processMetrics.getValue("ResidentMemory"));
memoryObj.setKeyRawNumber("unused_allocated_memory", processMetrics.getValue("UnusedAllocatedMemory"));
}
@ -1011,7 +1014,7 @@ ACTOR static Future<JsonBuilderObject> processStatusFetcher(
if (machineMemInfo.valid() && memoryLimit > 0) {
ASSERT(machineMemInfo.aggregateLimit > 0);
int64_t memory =
(availableMemory + machineMemInfo.memoryUsage) * memoryLimit / machineMemInfo.aggregateLimit;
(availableMemory + machineMemInfo.rssUsage) * memoryLimit / machineMemInfo.aggregateLimit;
memoryObj["available_bytes"] = std::min<int64_t>(std::max<int64_t>(memory, 0), memoryLimit);
}
}

View File

@ -2020,7 +2020,9 @@ Future<Void> tLogPeekMessages(PromiseType replyPromise,
TLogPeekReply reply;
reply.maxKnownVersion = logData->version.get();
reply.minKnownCommittedVersion = logData->minKnownCommittedVersion;
reply.messages = StringRef(reply.arena, messages.toValue());
auto messagesValue = messages.toValue();
reply.arena.dependsOn(messagesValue.arena());
reply.messages = messagesValue;
reply.end = endVersion;
reply.onlySpilled = onlySpilled;

View File

@ -1806,14 +1806,6 @@ int main(int argc, char* argv[]) {
auto opts = CLIOptions::parseArgs(argc, argv);
const auto role = opts.role;
#ifdef _WIN32
// For now, ignore all tests for Windows
if (role == ServerRole::Simulation || role == ServerRole::UnitTests || role == ServerRole::Test) {
printf("Windows tests are not supported yet\n");
flushAndExit(FDB_EXIT_SUCCESS);
}
#endif
if (role == ServerRole::Simulation)
printf("Random seed is %u...\n", opts.randomSeed);

View File

@ -66,6 +66,8 @@ struct MasterData : NonCopyable, ReferenceCounted<MasterData> {
// up-to-date in the presence of key range splits/merges.
VersionVector ssVersionVector;
int8_t locality; // sequencer locality
CounterCollection cc;
Counter getCommitVersionRequests;
Counter getLiveCommittedVersionRequests;
@ -115,6 +117,8 @@ struct MasterData : NonCopyable, ReferenceCounted<MasterData> {
forceRecovery = false;
}
balancer = resolutionBalancer.resolutionBalancing();
locality = myInterface.locality.dcId().present() ? std::stoi(myInterface.locality.dcId().get().toString())
: tagLocalityInvalid;
}
~MasterData() = default;
};
@ -241,7 +245,9 @@ void updateLiveCommittedVersion(Reference<MasterData> self, ReportRawCommittedVe
if (req.version > self->liveCommittedVersion.get()) {
if (SERVER_KNOBS->ENABLE_VERSION_VECTOR && req.writtenTags.present()) {
// TraceEvent("Received ReportRawCommittedVersionRequest").detail("Version",req.version);
self->ssVersionVector.setVersion(req.writtenTags.get(), req.version);
int8_t primaryLocality =
SERVER_KNOBS->ENABLE_VERSION_VECTOR_HA_OPTIMIZATION ? self->locality : tagLocalityInvalid;
self->ssVersionVector.setVersion(req.writtenTags.get(), req.version, primaryLocality);
self->versionVectorTagUpdates += req.writtenTags.get().size();
}
auto curTime = now();

View File

@ -2706,8 +2706,8 @@ ACTOR Future<Void> updateNewestSoftwareVersion(std::string folder,
0600));
SWVersion swVersion(latestVersion, currentVersion, minCompatibleVersion);
auto s = swVersionValue(swVersion);
ErrorOr<Void> e = wait(errorOr(newVersionFile->write(s.toString().c_str(), s.size(), 0)));
Value s = swVersionValue(swVersion);
ErrorOr<Void> e = wait(holdWhile(s, errorOr(newVersionFile->write(s.begin(), s.size(), 0))));
if (e.isError()) {
throw e.getError();
}

View File

@ -251,6 +251,7 @@ struct BlobGranuleCorrectnessWorkload : TestWorkload {
state Reference<GranuleTenantData> data =
tenantData.getDataForGranule(self->directories[directoryIdx]->directoryRange);
wait(data->bstoreLoaded.getFuture());
wait(delay(0));
self->directories[directoryIdx]->bstore = data->bstore;
}

View File

@ -80,6 +80,7 @@ struct ConflictRangeWorkload : TestWorkload {
state int offsetA;
state int offsetB;
state int randomLimit;
state Reverse reverse = Reverse::False;
state bool randomSets = false;
state std::set<int> insertedSet;
state RangeResult originalResults;
@ -159,10 +160,13 @@ struct ConflictRangeWorkload : TestWorkload {
offsetA = deterministicRandom()->randomInt(-1 * self->maxOffset, self->maxOffset);
offsetB = deterministicRandom()->randomInt(-1 * self->maxOffset, self->maxOffset);
randomLimit = deterministicRandom()->randomInt(1, self->maxKeySpace);
reverse.set(deterministicRandom()->coinflip());
RangeResult res = wait(tr1.getRange(KeySelectorRef(StringRef(myKeyA), onEqualA, offsetA),
KeySelectorRef(StringRef(myKeyB), onEqualB, offsetB),
randomLimit));
randomLimit,
Snapshot::False,
reverse));
if (res.size()) {
originalResults = res;
break;
@ -225,13 +229,17 @@ struct ConflictRangeWorkload : TestWorkload {
StringRef(format("%010d", clearedEnd))));
RangeResult res = wait(trRYOW.getRange(KeySelectorRef(StringRef(myKeyA), onEqualA, offsetA),
KeySelectorRef(StringRef(myKeyB), onEqualB, offsetB),
randomLimit));
randomLimit,
Snapshot::False,
reverse));
wait(trRYOW.commit());
} else {
tr3.clear(StringRef(format("%010d", self->maxKeySpace + 1)));
RangeResult res = wait(tr3.getRange(KeySelectorRef(StringRef(myKeyA), onEqualA, offsetA),
KeySelectorRef(StringRef(myKeyB), onEqualB, offsetB),
randomLimit));
randomLimit,
Snapshot::False,
reverse));
wait(tr3.commit());
}
} catch (Error& e) {
@ -252,7 +260,9 @@ struct ConflictRangeWorkload : TestWorkload {
RangeResult res = wait(tr4.getRange(KeySelectorRef(StringRef(myKeyA), onEqualA, offsetA),
KeySelectorRef(StringRef(myKeyB), onEqualB, offsetB),
randomLimit));
randomLimit,
Snapshot::False,
reverse));
++self->withConflicts;
if (res.size() == originalResults.size()) {
@ -261,20 +271,27 @@ struct ConflictRangeWorkload : TestWorkload {
throw not_committed();
// Discard known cases where conflicts do not change the results
if (originalResults.size() == randomLimit && offsetB <= 0) {
// Hit limit but end offset goes backwards, so changes could effect results even though in
// this instance they did not
if (originalResults.size() == randomLimit &&
((offsetB <= 0 && !reverse) || (offsetA > 1 && reverse))) {
// Hit limit but end offset goes into the range, so changes could effect results even though
// in this instance they did not
throw not_committed();
}
if (originalResults[originalResults.size() - 1].key >= sentinelKey) {
KeyRef smallestResult = originalResults[0].key;
KeyRef largestResult = originalResults[originalResults.size() - 1].key;
if (reverse) {
std::swap(smallestResult, largestResult);
}
if (largestResult >= sentinelKey) {
// Results go into server keyspace, so if a key selector does not fully resolve offset, a
// change won't effect results
throw not_committed();
}
if ((originalResults[0].key == firstElement ||
originalResults[0].key == StringRef(format("%010d", *(insertedSet.begin())))) &&
if ((smallestResult == firstElement ||
smallestResult == StringRef(format("%010d", *(insertedSet.begin())))) &&
offsetA < 0) {
// Results return the first element, and the begin offset is negative, so if a key selector
// does not fully resolve the offset, a change won't effect results
@ -308,6 +325,7 @@ struct ConflictRangeWorkload : TestWorkload {
.detail("OffsetA", offsetA)
.detail("OffsetB", offsetB)
.detail("RandomLimit", randomLimit)
.detail("Reverse", reverse)
.detail("Size", originalResults.size())
.detail("Results", keyStr1)
.detail("Original", keyStr2);
@ -328,7 +346,9 @@ struct ConflictRangeWorkload : TestWorkload {
// If the commit is successful, check that the result matches the first execution.
RangeResult res = wait(tr4.getRange(KeySelectorRef(StringRef(myKeyA), onEqualA, offsetA),
KeySelectorRef(StringRef(myKeyB), onEqualB, offsetB),
randomLimit));
randomLimit,
Snapshot::False,
reverse));
++self->withoutConflicts;
if (res.size() == originalResults.size()) {
@ -366,6 +386,7 @@ struct ConflictRangeWorkload : TestWorkload {
.detail("OffsetA", offsetA)
.detail("OffsetB", offsetB)
.detail("RandomLimit", randomLimit)
.detail("Reverse", reverse)
.detail("Size", originalResults.size())
.detail("Results", keyStr1)
.detail("Original", keyStr2);

View File

@ -2378,7 +2378,7 @@ struct ConsistencyCheckWorkload : TestWorkload {
}
// Check EncryptKeyProxy
if (SERVER_KNOBS->ENABLE_ENCRYPTION && db.encryptKeyProxy.present() &&
if ((SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) && db.encryptKeyProxy.present() &&
(!nonExcludedWorkerProcessMap.count(db.encryptKeyProxy.get().address()) ||
nonExcludedWorkerProcessMap[db.encryptKeyProxy.get().address()].processClass.machineClassFitness(
ProcessClass::EncryptKeyProxy) > fitnessLowerBound)) {

View File

@ -280,7 +280,7 @@ struct EncryptionOpsWorkload : TestWorkload {
ASSERT(cipherKey.isValid());
ASSERT(cipherKey->isEqual(orgCipherKey));
DecryptBlobCipherAes256Ctr decryptor(cipherKey, headerCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(cipherKey, headerCipherKey, header.iv);
const bool validateHeaderAuthToken = deterministicRandom()->randomInt(0, 100) < 65;
auto start = std::chrono::high_resolution_clock::now();

View File

@ -273,12 +273,11 @@ struct MachineAttritionWorkload : TestWorkload {
}
ACTOR static Future<Void> machineKillWorker(MachineAttritionWorkload* self, double meanDelay, Database cx) {
state int killedMachines = 0;
state double delayBeforeKill = deterministicRandom()->random01() * meanDelay;
ASSERT(g_network->isSimulated());
state double delayBeforeKill;
if (self->killDc) {
delayBeforeKill = deterministicRandom()->random01() * meanDelay;
wait(delay(delayBeforeKill));
// decide on a machine to kill
@ -303,7 +302,20 @@ struct MachineAttritionWorkload : TestWorkload {
.detail("KillType", kt);
g_simulator.killDataCenter(target, kt);
} else if (self->killDatahall) {
delayBeforeKill = deterministicRandom()->random01() * meanDelay;
wait(delay(delayBeforeKill));
// It only makes sense to kill a single data hall.
ASSERT(self->targetIds.size() == 1);
auto target = self->targetIds.front();
auto kt = ISimulator::KillInstantly;
TraceEvent("Assassination").detail("TargetDataHall", target).detail("KillType", kt);
g_simulator.killDataHall(target, kt);
} else {
state int killedMachines = 0;
while (killedMachines < self->machinesToKill && self->machines.size() > self->machinesToLeave) {
TraceEvent("WorkerKillBegin")
.detail("KilledMachines", killedMachines)
@ -312,6 +324,7 @@ struct MachineAttritionWorkload : TestWorkload {
.detail("Machines", self->machines.size());
TEST(true); // Killing a machine
delayBeforeKill = deterministicRandom()->random01() * meanDelay;
wait(delay(delayBeforeKill));
TraceEvent("WorkerKillAfterDelay").log();
@ -385,8 +398,12 @@ struct MachineAttritionWorkload : TestWorkload {
}
killedMachines++;
if (!self->replacement)
if (self->replacement) {
// Replace by reshuffling, since we always pick from the back.
deterministicRandom()->randomShuffle(self->machines);
} else {
self->machines.pop_back();
}
wait(delay(meanDelay - delayBeforeKill) && success(self->ignoreSSFailures));

View File

@ -184,10 +184,10 @@ Reference<BlobCipherKey> BlobCipherKeyIdCache::insertBaseCipherKey(const Encrypt
return cipherKey;
}
void BlobCipherKeyIdCache::insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt) {
Reference<BlobCipherKey> BlobCipherKeyIdCache::insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt) {
ASSERT_NE(baseCipherId, ENCRYPT_INVALID_CIPHER_KEY_ID);
ASSERT_NE(salt, ENCRYPT_INVALID_RANDOM_SALT);
@ -201,7 +201,7 @@ void BlobCipherKeyIdCache::insertBaseCipherKey(const EncryptCipherBaseKeyId& bas
.detail("BaseCipherKeyId", baseCipherId)
.detail("DomainId", domainId);
// Key is already present; nothing more to do.
return;
return itr->second;
} else {
TraceEvent("InsertBaseCipherKey_UpdateCipher")
.detail("BaseCipherKeyId", baseCipherId)
@ -213,6 +213,7 @@ void BlobCipherKeyIdCache::insertBaseCipherKey(const EncryptCipherBaseKeyId& bas
Reference<BlobCipherKey> cipherKey =
makeReference<BlobCipherKey>(domainId, baseCipherId, baseCipher, baseCipherLen, salt);
keyIdCache.emplace(cacheKey, cipherKey);
return cipherKey;
}
void BlobCipherKeyIdCache::cleanup() {
@ -263,27 +264,28 @@ Reference<BlobCipherKey> BlobCipherKeyCache::insertCipherKey(const EncryptCipher
}
}
void BlobCipherKeyCache::insertCipherKey(const EncryptCipherDomainId& domainId,
const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt) {
Reference<BlobCipherKey> BlobCipherKeyCache::insertCipherKey(const EncryptCipherDomainId& domainId,
const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt) {
if (domainId == ENCRYPT_INVALID_DOMAIN_ID || baseCipherId == ENCRYPT_INVALID_CIPHER_KEY_ID ||
salt == ENCRYPT_INVALID_RANDOM_SALT) {
throw encrypt_invalid_id();
}
Reference<BlobCipherKey> cipherKey;
try {
auto domainItr = domainCacheMap.find(domainId);
if (domainItr == domainCacheMap.end()) {
// Add mapping to track new encryption domain
Reference<BlobCipherKeyIdCache> keyIdCache = makeReference<BlobCipherKeyIdCache>(domainId);
keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt);
cipherKey = keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt);
domainCacheMap.emplace(domainId, keyIdCache);
} else {
// Track new baseCipher keys
Reference<BlobCipherKeyIdCache> keyIdCache = domainItr->second;
keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt);
cipherKey = keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt);
}
TraceEvent("InsertCipherKey")
@ -297,6 +299,8 @@ void BlobCipherKeyCache::insertCipherKey(const EncryptCipherDomainId& domainId,
.detail("Salt", salt);
throw;
}
return cipherKey;
}
Reference<BlobCipherKey> BlobCipherKeyCache::getLatestCipherKey(const EncryptCipherDomainId& domainId) {
@ -376,16 +380,27 @@ EncryptBlobCipherAes265Ctr::EncryptBlobCipherAes265Ctr(Reference<BlobCipherKey>
: ctx(EVP_CIPHER_CTX_new()), textCipherKey(tCipherKey), headerCipherKey(hCipherKey), authTokenMode(mode) {
ASSERT(isEncryptHeaderAuthTokenModeValid(mode));
ASSERT_EQ(ivLen, AES_256_IV_LENGTH);
memcpy(&iv[0], cipherIV, ivLen);
init();
}
EncryptBlobCipherAes265Ctr::EncryptBlobCipherAes265Ctr(Reference<BlobCipherKey> tCipherKey,
Reference<BlobCipherKey> hCipherKey,
const EncryptAuthTokenMode mode)
: ctx(EVP_CIPHER_CTX_new()), textCipherKey(tCipherKey), headerCipherKey(hCipherKey), authTokenMode(mode) {
ASSERT(isEncryptHeaderAuthTokenModeValid(mode));
generateRandomData(iv, AES_256_IV_LENGTH);
init();
}
void EncryptBlobCipherAes265Ctr::init() {
if (ctx == nullptr) {
throw encrypt_ops_error();
}
if (EVP_EncryptInit_ex(ctx, EVP_aes_256_ctr(), nullptr, nullptr, nullptr) != 1) {
throw encrypt_ops_error();
}
if (EVP_EncryptInit_ex(ctx, nullptr, nullptr, textCipherKey.getPtr()->data(), cipherIV) != 1) {
if (EVP_EncryptInit_ex(ctx, nullptr, nullptr, textCipherKey.getPtr()->data(), iv) != 1) {
throw encrypt_ops_error();
}
}
@ -439,7 +454,7 @@ Reference<EncryptBuf> EncryptBlobCipherAes265Ctr::encrypt(const uint8_t* plainte
header->cipherTextDetails.baseCipherId = textCipherKey->getBaseCipherId();
header->cipherTextDetails.encryptDomainId = textCipherKey->getDomainId();
header->cipherTextDetails.salt = textCipherKey->getSalt();
memcpy(&header->cipherTextDetails.iv[0], &iv[0], AES_256_IV_LENGTH);
memcpy(&header->iv[0], &iv[0], AES_256_IV_LENGTH);
if (authTokenMode == ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE) {
// No header 'authToken' generation needed.
@ -887,8 +902,7 @@ TEST_CASE("flow/BlobCipher") {
header.cipherTextDetails.baseCipherId,
header.cipherTextDetails.salt);
ASSERT(tCipherKeyKey->isEqual(cipherKey));
DecryptBlobCipherAes256Ctr decryptor(
tCipherKeyKey, Reference<BlobCipherKey>(), &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, Reference<BlobCipherKey>(), &header.iv[0]);
Reference<EncryptBuf> decrypted = decryptor.decrypt(encrypted->begin(), bufLen, header, arena);
ASSERT_EQ(decrypted->getLogicalSize(), bufLen);
@ -903,8 +917,7 @@ TEST_CASE("flow/BlobCipher") {
headerCopy.flags.headerVersion += 1;
try {
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
DecryptBlobCipherAes256Ctr decryptor(
tCipherKeyKey, Reference<BlobCipherKey>(), &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, Reference<BlobCipherKey>(), header.iv);
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
ASSERT(false); // error expected
} catch (Error& e) {
@ -920,8 +933,7 @@ TEST_CASE("flow/BlobCipher") {
headerCopy.flags.encryptMode += 1;
try {
encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena);
DecryptBlobCipherAes256Ctr decryptor(
tCipherKeyKey, Reference<BlobCipherKey>(), &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, Reference<BlobCipherKey>(), header.iv);
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
ASSERT(false); // error expected
} catch (Error& e) {
@ -937,8 +949,7 @@ TEST_CASE("flow/BlobCipher") {
memcpy(encrypted->begin(), &temp[0], bufLen);
int tIdx = deterministicRandom()->randomInt(0, bufLen - 1);
temp[tIdx] += 1;
DecryptBlobCipherAes256Ctr decryptor(
tCipherKeyKey, Reference<BlobCipherKey>(), &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, Reference<BlobCipherKey>(), header.iv);
decrypted = decryptor.decrypt(&temp[0], bufLen, header, arena);
} catch (Error& e) {
// No authToken, hence, no corruption detection supported
@ -978,7 +989,7 @@ TEST_CASE("flow/BlobCipher") {
header.cipherHeaderDetails.baseCipherId,
header.cipherHeaderDetails.salt);
ASSERT(tCipherKeyKey->isEqual(cipherKey));
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, header.iv);
Reference<EncryptBuf> decrypted = decryptor.decrypt(encrypted->begin(), bufLen, header, arena);
ASSERT_EQ(decrypted->getLogicalSize(), bufLen);
@ -993,7 +1004,7 @@ TEST_CASE("flow/BlobCipher") {
sizeof(BlobCipherEncryptHeader));
headerCopy.flags.headerVersion += 1;
try {
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, header.iv);
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
ASSERT(false); // error expected
} catch (Error& e) {
@ -1009,7 +1020,7 @@ TEST_CASE("flow/BlobCipher") {
sizeof(BlobCipherEncryptHeader));
headerCopy.flags.encryptMode += 1;
try {
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, header.iv);
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
ASSERT(false); // error expected
} catch (Error& e) {
@ -1026,7 +1037,7 @@ TEST_CASE("flow/BlobCipher") {
int hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_SIZE - 1);
headerCopy.singleAuthToken.authToken[hIdx] += 1;
try {
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, header.iv);
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
ASSERT(false); // error expected
} catch (Error& e) {
@ -1042,7 +1053,7 @@ TEST_CASE("flow/BlobCipher") {
memcpy(encrypted->begin(), &temp[0], bufLen);
int tIdx = deterministicRandom()->randomInt(0, bufLen - 1);
temp[tIdx] += 1;
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, header.iv);
decrypted = decryptor.decrypt(&temp[0], bufLen, header, arena);
} catch (Error& e) {
if (e.code() != error_code_encrypt_header_authtoken_mismatch) {
@ -1084,7 +1095,7 @@ TEST_CASE("flow/BlobCipher") {
header.cipherHeaderDetails.salt);
ASSERT(tCipherKey->isEqual(cipherKey));
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv);
Reference<EncryptBuf> decrypted = decryptor.decrypt(encrypted->begin(), bufLen, header, arena);
ASSERT_EQ(decrypted->getLogicalSize(), bufLen);
@ -1099,7 +1110,7 @@ TEST_CASE("flow/BlobCipher") {
sizeof(BlobCipherEncryptHeader));
headerCopy.flags.headerVersion += 1;
try {
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv);
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
ASSERT(false); // error expected
} catch (Error& e) {
@ -1115,7 +1126,7 @@ TEST_CASE("flow/BlobCipher") {
sizeof(BlobCipherEncryptHeader));
headerCopy.flags.encryptMode += 1;
try {
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv);
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
ASSERT(false); // error expected
} catch (Error& e) {
@ -1132,7 +1143,7 @@ TEST_CASE("flow/BlobCipher") {
int hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_SIZE - 1);
headerCopy.multiAuthTokens.cipherTextAuthToken[hIdx] += 1;
try {
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv);
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
ASSERT(false); // error expected
} catch (Error& e) {
@ -1149,7 +1160,7 @@ TEST_CASE("flow/BlobCipher") {
hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_SIZE - 1);
headerCopy.multiAuthTokens.headerAuthToken[hIdx] += 1;
try {
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv);
decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena);
ASSERT(false); // error expected
} catch (Error& e) {
@ -1164,7 +1175,7 @@ TEST_CASE("flow/BlobCipher") {
memcpy(encrypted->begin(), &temp[0], bufLen);
int tIdx = deterministicRandom()->randomInt(0, bufLen - 1);
temp[tIdx] += 1;
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]);
DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv);
decrypted = decryptor.decrypt(&temp[0], bufLen, header, arena);
} catch (Error& e) {
if (e.code() != error_code_encrypt_header_authtoken_mismatch) {

View File

@ -20,10 +20,12 @@
#pragma once
#include "flow/network.h"
#include <cinttypes>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include <boost/functional/hash.hpp>
#include "flow/Arena.h"
#include "flow/EncryptUtils.h"
@ -67,12 +69,42 @@ public:
}
uint8_t* begin() { return buffer; }
StringRef toStringRef() { return StringRef(buffer, logicalSize); }
private:
int allocSize;
int logicalSize;
uint8_t* buffer;
};
#pragma pack(push, 1) // exact fit - no padding
struct BlobCipherDetails {
// Encryption domain boundary identifier.
EncryptCipherDomainId encryptDomainId = ENCRYPT_INVALID_DOMAIN_ID;
// BaseCipher encryption key identifier
EncryptCipherBaseKeyId baseCipherId = ENCRYPT_INVALID_CIPHER_KEY_ID;
// Random salt
EncryptCipherRandomSalt salt{};
bool operator==(const BlobCipherDetails& o) const {
return encryptDomainId == o.encryptDomainId && baseCipherId == o.baseCipherId && salt == o.salt;
}
};
#pragma pack(pop)
namespace std {
template <>
struct hash<BlobCipherDetails> {
std::size_t operator()(BlobCipherDetails const& details) const {
std::size_t seed = 0;
boost::hash_combine(seed, std::hash<EncryptCipherDomainId>{}(details.encryptDomainId));
boost::hash_combine(seed, std::hash<EncryptCipherBaseKeyId>{}(details.baseCipherId));
boost::hash_combine(seed, std::hash<EncryptCipherRandomSalt>{}(details.salt));
return seed;
}
};
} // namespace std
// BlobCipher Encryption header format
// This header is persisted along with encrypted buffer, it contains information necessary
// to assist decrypting the buffers to serve read requests.
@ -95,25 +127,11 @@ typedef struct BlobCipherEncryptHeader {
};
// Cipher text encryption information
struct {
// Encryption domain boundary identifier.
EncryptCipherDomainId encryptDomainId{};
// BaseCipher encryption key identifier
EncryptCipherBaseKeyId baseCipherId{};
// Random salt
EncryptCipherRandomSalt salt{};
// Initialization vector used to encrypt the payload.
uint8_t iv[AES_256_IV_LENGTH];
} cipherTextDetails;
struct {
// Encryption domainId for the header
EncryptCipherDomainId encryptDomainId{};
// BaseCipher encryption key identifier.
EncryptCipherBaseKeyId baseCipherId{};
// Random salt
EncryptCipherRandomSalt salt{};
} cipherHeaderDetails;
BlobCipherDetails cipherTextDetails;
// Cipher header encryption information
BlobCipherDetails cipherHeaderDetails;
// Initialization vector used to encrypt the payload.
uint8_t iv[AES_256_IV_LENGTH];
// Encryption header is stored as plaintext on a persistent storage to assist reconstruction of cipher-key(s) for
// reads. FIPS compliance recommendation is to leverage cryptographic digest mechanism to generate 'authentication
@ -144,6 +162,17 @@ typedef struct BlobCipherEncryptHeader {
};
BlobCipherEncryptHeader() {}
template <class Ar>
void serialize(Ar& ar) {
ar.serializeBytes(this, headerSize);
}
std::string toString() const {
return format("domain id: %" PRId64 ", cipher id: %" PRIu64,
cipherTextDetails.encryptDomainId,
cipherTextDetails.baseCipherId);
}
} BlobCipherEncryptHeader;
#pragma pack(pop)
@ -276,10 +305,10 @@ public:
// 'baseCipherId' & 'salt'. The caller needs to fetch 'baseCipherKey' detail and re-populate KeyCache.
// Also, the invocation will NOT update the latest cipher-key details.
void insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt);
Reference<BlobCipherKey> insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt);
// API cleanup the cache by dropping all cached cipherKeys
void cleanup();
@ -328,11 +357,11 @@ public:
// 'baseCipherId' & 'salt'. The caller needs to fetch 'baseCipherKey' detail and re-populate KeyCache.
// Also, the invocation will NOT update the latest cipher-key details.
void insertCipherKey(const EncryptCipherDomainId& domainId,
const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt);
Reference<BlobCipherKey> insertCipherKey(const EncryptCipherDomainId& domainId,
const EncryptCipherBaseKeyId& baseCipherId,
const uint8_t* baseCipher,
int baseCipherLen,
const EncryptCipherRandomSalt& salt);
// API returns the last insert cipherKey for a given encryption domain Id.
// If domain Id is invalid, it would throw 'encrypt_invalid_id' exception,
@ -389,6 +418,9 @@ public:
const uint8_t* iv,
const int ivLen,
const EncryptAuthTokenMode mode);
EncryptBlobCipherAes265Ctr(Reference<BlobCipherKey> tCipherKey,
Reference<BlobCipherKey> hCipherKey,
const EncryptAuthTokenMode mode);
~EncryptBlobCipherAes265Ctr();
Reference<EncryptBuf> encrypt(const uint8_t* plaintext,
@ -402,6 +434,8 @@ private:
Reference<BlobCipherKey> headerCipherKey;
EncryptAuthTokenMode authTokenMode;
uint8_t iv[AES_256_IV_LENGTH];
void init();
};
// This interface enable data block decryption. An invocation to decrypt() would generate

View File

@ -37,7 +37,8 @@
#define SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID -1
#define ENCRYPT_HEADER_DOMAIN_ID -2
#define FDB_DEFAULT_ENCRYPT_DOMAIN_NAME "FdbDefaultEncryptDomain"
const std::string FDB_DEFAULT_ENCRYPT_DOMAIN_NAME = "FdbDefaultEncryptDomain";
using EncryptCipherDomainId = int64_t;
using EncryptCipherDomainName = StringRef;

View File

@ -80,6 +80,6 @@ std::string JsonTraceLogFormatter::formatEvent(const TraceEventFields& fields) c
escapeString(oss, iter->second);
oss << "\"";
}
oss << " }\r\n";
oss << " }\n";
return std::move(oss).str();
}

View File

@ -2238,7 +2238,9 @@ void renamedFile() {
void renameFile(std::string const& fromPath, std::string const& toPath) {
INJECT_FAULT(io_error, "renameFile"); // rename file failed
#ifdef _WIN32
if (MoveFile(fromPath.c_str(), toPath.c_str())) {
if (MoveFileExA(fromPath.c_str(),
toPath.c_str(),
MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) {
// renamedFile();
return;
}
@ -2331,8 +2333,11 @@ void atomicReplace(std::string const& path, std::string const& content, bool tex
}
f = 0;
if (!ReplaceFile(path.c_str(), tempfilename.c_str(), nullptr, NULL, nullptr, nullptr))
if (!MoveFileExA(tempfilename.c_str(),
path.c_str(),
MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) {
throw io_error();
}
#elif defined(__unixish__)
if (!g_network->isSimulated()) {
if (fsync(fileno(f)) != 0)
@ -2523,14 +2528,14 @@ std::string popPath(const std::string& path) {
return path.substr(0, i + 1);
}
std::string abspath(std::string const& path, bool resolveLinks, bool mustExist) {
if (path.empty()) {
std::string abspath(std::string const& path_, bool resolveLinks, bool mustExist) {
if (path_.empty()) {
Error e = platform_error();
Severity sev = e.code() == error_code_io_error ? SevError : SevWarnAlways;
TraceEvent(sev, "AbsolutePathError").error(e).detail("Path", path);
TraceEvent(sev, "AbsolutePathError").error(e).detail("Path", path_);
throw e;
}
std::string path = path_.back() == '\\' ? path_.substr(0, path_.size() - 1) : path_;
// Returns an absolute path canonicalized to use only CANONICAL_PATH_SEPARATOR
INJECT_FAULT(platform_error, "abspath"); // abspath failed

View File

@ -25,6 +25,7 @@
#include <chrono>
#include <random>
#include <thread>
#include "flow/actorcompiler.h" // has to be last include
#ifdef ENABLE_SAMPLING

View File

@ -86,6 +86,6 @@ std::string XmlTraceLogFormatter::formatEvent(const TraceEventFields& fields) co
oss << "\" ";
}
oss << "/>\r\n";
oss << "/>\n";
return std::move(oss).str();
}

View File

@ -206,6 +206,7 @@ if(WITH_PYTHON)
add_fdb_test(TEST_FILES rare/ConflictRangeRYOWCheck.toml)
add_fdb_test(TEST_FILES rare/CycleRollbackClogged.toml)
add_fdb_test(TEST_FILES rare/CycleWithKills.toml)
add_fdb_test(TEST_FILES rare/CycleWithDeadHall.toml)
add_fdb_test(TEST_FILES rare/FuzzTest.toml)
add_fdb_test(TEST_FILES rare/HighContentionPrefixAllocator.toml)
add_fdb_test(TEST_FILES rare/InventoryTestHeavyWrites.toml)

View File

@ -118,7 +118,7 @@ class LogParser:
with open(self.infile) as f:
line = f.readline()
while line != "":
obj = self.processLine(line, linenr)
obj = self.process_line(line, linenr)
line = f.readline()
linenr += 1
if obj is None:
@ -137,7 +137,7 @@ class LogParser:
and self.sanitize_backtrace(obj) is not None
):
obj = self.apply_address_to_line(obj)
self.writeObject(obj)
self.write_object(obj)
def log_trace_parse_error(self, linenr, e):
obj = {}
@ -164,7 +164,7 @@ class LogParser:
return_code_trace["Command"] = command
return_code_trace["ReturnCode"] = return_code
return_code_trace["testname"] = self.name
self.writeObject(return_code_trace)
self.write_object(return_code_trace)
class JSONParser(LogParser):
@ -208,7 +208,7 @@ class XMLParser(LogParser):
self.errors.append(exception)
def fatalError(self, exception):
self.fatalError.append(exception)
self.fatalErrors.append(exception)
def warning(self, exception):
self.warnings.append(exception)

View File

@ -177,9 +177,9 @@ if __name__ == "__main__":
for line in sev40s:
# When running ASAN we expect to see this message. Boost coroutine should be using the correct asan
# annotations so that it shouldn't produce any false positives.
if line.endswith(
"WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false "
"positives in some cases! "
if (
"WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false positives in some cases!"
in line
):
continue
print(">>>>>>>>>>>>>>>>>>>> Found severity 40 events - the test fails")

View File

@ -0,0 +1,58 @@
# Attempt to reproduce failures which occur in three_data_hall mode
# when one data hall is down and other machines are being rebooted.
#
# three_data_hall is supposed to tolerate the failure of one data hall
# plus one other machine.
#
# CONFIGURATION NOTES
#
# For the simulated test setup, there is currently no way to configure
# three data halls within one data center. Instead, we need to specify
# three data centers, since the simulated setup will place one data
# hall in each data center.
#
# We also need to disable 'generateFearless', since that option will
# sometimes generate configs with a satellite data center, and we have
# a policy of not placing tlogs there. It's impossible to place tlogs
# in a way that satisfies the three_data_hall contstraints.
[configuration]
config = 'three_data_hall'
datacenters = 3
generateFearless = false
[[test]]
testTitle = 'Two out of Three Data Halls'
# Baseline workload during test.
[[test.workload]]
testName = 'Cycle'
transactionsPerSecond = 2500.0
testDuration = 30.0
expectedRate = 0.01
# Immediately take down a data hall.
[[test.workload]]
testName = 'Attrition'
killDatahall = true
killDc = false
machinesToKill = 1
targetIds = 1
testDuration = 0
# Continuously reboot machines.
# (waitForVersion ensures the cluster recovers between reboots.)
[[test.workload]]
testName = 'Attrition'
testDuration = 30.0
machinesToKill = 300 # causes the mean delay to be 30s/300 = 100ms.
reboot = true # reboot, don't kill.
replacement = true # yes, we can reboot the same machine again.
waitForVersion = true # wait for the cluster to reform between reboots.
# Consistency checks won't pass with one data hall missing.
# Change to fallback mode after the test as a workaround.
[[test.workload]]
testName = 'ChangeConfig'
configMode = 'three_data_hall_fallback'
minDelayBeforeChange = 30.0
maxDelayBeforeChange = 30.0