From 17c8b4b2a00f9566d941c3cd5986447ed713e3ea Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 18 May 2022 14:48:40 -0400 Subject: [PATCH 01/49] Initial mako changes introducing Tenant API and replacing transaction resets --- bindings/c/test/fdb_api.hpp | 57 ++++++++++++++++- bindings/c/test/mako/mako.cpp | 115 +++++++++++++++++++++++++++------- bindings/c/test/mako/mako.hpp | 4 +- 3 files changed, 151 insertions(+), 25 deletions(-) diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index 590c4afdb7..e7f176b8a9 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -23,11 +23,12 @@ #pragma once #ifndef FDB_API_VERSION -#define FDB_API_VERSION 720 +#define FDB_API_VERSION 710 #endif #include #include +#include #include #include #include @@ -388,6 +389,7 @@ inline KeySelector lastLessOrEqual(KeyRef key, int offset = 0) { class Transaction { friend class Database; + friend class Tenant; std::shared_ptr tr; explicit Transaction(native::FDBTransaction* tr_raw) { @@ -506,6 +508,7 @@ public: }; class Database { + friend class Tenant; std::shared_ptr db; public: @@ -556,6 +559,58 @@ public: } }; +class Tenant final { + std::string tenantManagementMapPrefix = "\xff\xff/management/tenant_map/"; + std::string tenantMapPrefix = "\xff/tenantMap/"; + +public: + static TypedFuture createTenant(Transaction tr, std::string name) { + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE); + KeyRef tenantManagementKey = toBytesRef(tenantManagementMapPrefix + name); + tr.set(tenantManagementKey, toBytesRef("")); + return tr.commit(); + } + + static TypedFuture deleteTenant(Transaction tr, std::string name) { + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_RAW_ACCESS); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE); + KeyRef tenantMapKey = toBytesRef(tenantMapPrefix + name); + tr.clear(tenantMapKey); + return tr.commit(); + } + + Tenant(fdb::Database* db, const uint8_t* name, int name_length) { + if (native::fdb_error_t err = fdb_database_open_tenant(db->db.get(), name, name_length, &tenant)) { + std::cerr << native::fdb_get_error(err) << std::endl; + std::abort(); + } + } + + ~Tenant() { + if (tenant != nullptr) { + fdb_tenant_destroy(tenant); + } + } + + Transaction createTransaction() { + auto tx_native = static_cast(nullptr); + auto err = Error(native::fdb_tenant_create_transaction(tenant, &tx_native)); + if (err) + throwError("Failed to create transaction: ", err); + return Transaction(tx_native); + } + + Tenant(const Tenant&) = delete; + Tenant& operator=(const Tenant&) = delete; + Tenant(Tenant&&) = delete; + Tenant& operator=(Tenant&&) = delete; + +private: + friend class Transaction; + native::FDBTenant* tenant; +}; + } // namespace fdb #endif /*FDB_API_HPP*/ diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index a1ee557377..21a81c58cc 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -62,6 +62,8 @@ namespace mako { struct alignas(64) ThreadArgs { int worker_id; int thread_id; + bool tenant_main; + int tenants; pid_t parent_id; LatencySampleBinArray sample_bins; Arguments const* args; @@ -76,8 +78,20 @@ using namespace mako; thread_local Logger logr = Logger(MainProcess{}, VERBOSE_DEFAULT); +Transaction createNewTransaction(Database* db, Arguments const& args, int id = -1) { + // No tenants specified + if (args.tenants <= 0) { + return db->createTransaction(); + } + // Create Tenant Transaction + int tenant_id = (id == -1) ? urand(1, args.tenants) : id; + std::string tenant_name = "tenant" + std::to_string(tenant_id); + Tenant t(db, reinterpret_cast(tenant_name.c_str()), tenant_name.length()); + return t.createTransaction(); +} + /* cleanup database */ -int cleanup(Transaction tx, Arguments const& args) { +int cleanup(Database db, Arguments const& args) { const auto prefix_len = args.prefixpadding ? args.key_length - args.row_digits : intSize(KEY_PREFIX); auto genprefix = [&args](ByteString& s) { const auto padding_len = args.key_length - intSize(KEY_PREFIX) - args.row_digits; @@ -96,27 +110,52 @@ int cleanup(Transaction tx, Arguments const& args) { auto watch = Stopwatch(StartAtCtor{}); - while (true) { - tx.clearRange(beginstr, endstr); - auto future_commit = tx.commit(); - const auto rc = waitAndHandleError(tx, future_commit, "COMMIT_CLEANUP"); - if (rc == FutureRC::OK) { - break; - } else if (rc == FutureRC::RETRY || rc == FutureRC::CONFLICT) { - // tx already reset - continue; - } else { - return -1; + int num_iterations = (args.tenants > 1) ? args.tenants : 1; + for (int i = 1; i <= num_iterations; ++i) { + // If args.tenants is zero, this will use a non-tenant txn and perform a single range clear. + // If 1, it will use a tenant txn and do a single range clear instead. + // If > 1, it will perform a range clear with a different tenant txn per iteration. + Transaction tx = createNewTransaction(&db, args, i); + while (true) { + tx.clearRange(beginstr, endstr); + auto future_commit = tx.commit(); + const auto rc = waitAndHandleError(tx, future_commit, "COMMIT_CLEANUP"); + if (rc == FutureRC::OK) { + break; + } else if (rc == FutureRC::RETRY || rc == FutureRC::CONFLICT) { + // tx already reset + continue; + } else { + return -1; + } + } + + // If tenants are specified, also delete the tenant after clearing out its keyspace + if (args.tenants > 0) { + std::string tenant_name = "tenant" + std::to_string(i); + while (true) { + Transaction systemTx = db.createTransaction(); + auto future_commit = Tenant::deleteTenant(systemTx, tenant_name); + const auto rc = waitAndHandleError(systemTx, future_commit, "DELETE_TENANT"); + if (rc == FutureRC::OK) { + break; + } else if (rc == FutureRC::RETRY || rc == FutureRC::CONFLICT) { + // tx already reset + continue; + } else { + // try with new transaction object + systemTx = db.createTransaction(); + } + } } } - tx.reset(); logr.info("Clear range: {:6.3f} sec", toDoubleSeconds(watch.stop().diff())); return 0; } /* populate database */ -int populate(Transaction tx, +int populate(Database db, Arguments const& args, int worker_id, int thread_id, @@ -139,6 +178,7 @@ int populate(Transaction tx, auto watch_trace = Stopwatch(watch_total.getStart()); auto key_checkpoint = key_begin; // in case of commit failure, restart from this key + Transaction tx = createNewTransaction(&db, args); for (auto i = key_begin; i <= key_end; i++) { /* sequential keys */ genKey(keystr.data(), KEY_PREFIX, args, i); @@ -184,7 +224,7 @@ int populate(Transaction tx, auto tx_restarter = ExitGuard([&watch_tx]() { watch_tx.startFromStop(); }); if (rc == FutureRC::OK) { key_checkpoint = i + 1; // restart on failures from next key - tx.reset(); + tx = createNewTransaction(&db, args); } else if (rc == FutureRC::ABORT) { return -1; } else { @@ -257,7 +297,6 @@ transaction_begin: stats.incrErrorCount(op); } else { // abort - tx.reset(); return -1; } // retry from first op @@ -327,12 +366,10 @@ transaction_begin: stats.addLatency(OP_TRANSACTION, tx_duration); } stats.incrOpCount(OP_TRANSACTION); - /* make sure to reset transaction */ - tx.reset(); return 0; } -int runWorkload(Transaction tx, +int runWorkload(Database db, Arguments const& args, int const thread_tps, std::atomic const& throttle_factor, @@ -373,6 +410,7 @@ int runWorkload(Transaction tx, /* main transaction loop */ while (1) { + Transaction tx = createNewTransaction(&db, args); while ((thread_tps > 0) && (xacts >= current_tps)) { /* throttle on */ const auto time_now = steady_clock::now(); @@ -554,6 +592,8 @@ void workerThread(ThreadArgs& thread_args) { const auto parent_id = thread_args.parent_id; const auto worker_id = thread_args.worker_id; const auto thread_id = thread_args.thread_id; + const auto tenant_main = thread_args.tenant_main; + const auto tenants = thread_args.tenants; const auto dotrace = (worker_id == 0 && thread_id == 0 && args.txntrace) ? args.txntrace : 0; auto database = thread_args.database; const auto dotagging = args.txntagging; @@ -576,7 +616,27 @@ void workerThread(ThreadArgs& thread_args) { : computeThreadIters(args.iteration, worker_id, thread_id, args.num_processes, args.num_threads); /* create my own transaction object */ - auto tx = database.createTransaction(); + Transaction tx; + tx = database.createTransaction(); + + if (tenant_main && tenants > 0) { + for (int i = 1; i <= tenants; ++i) { + std::string tenant_name = "tenant" + std::to_string(i); + while (true) { + auto future_commit = Tenant::createTenant(tx, tenant_name); + const auto rc = waitAndHandleError(tx, future_commit, "CREATE_TENANT"); + if (rc == FutureRC::OK) { + break; + } else if (rc == FutureRC::RETRY || rc == FutureRC::CONFLICT) { + // tx already reset + continue; + } else { + // try with new transaction object + tx = database.createTransaction(); + } + } + } + } /* i'm ready */ readycount.fetch_add(1); @@ -588,18 +648,18 @@ void workerThread(ThreadArgs& thread_args) { auto& sample_bins = thread_args.sample_bins; if (args.mode == MODE_CLEAN) { - auto rc = cleanup(tx, args); + auto rc = cleanup(database, args); if (rc < 0) { logr.error("cleanup failed"); } } else if (args.mode == MODE_BUILD) { - auto rc = populate(tx, args, worker_id, thread_id, thread_tps, stats, sample_bins); + auto rc = populate(database, args, worker_id, thread_id, thread_tps, stats, sample_bins); if (rc < 0) { logr.error("populate failed"); } } else if (args.mode == MODE_RUN) { auto rc = runWorkload( - tx, args, thread_tps, throttle_factor, thread_iters, signal, stats, sample_bins, dotrace, dotagging); + database, args, thread_tps, throttle_factor, thread_iters, signal, stats, sample_bins, dotrace, dotagging); if (rc < 0) { logr.error("runWorkload failed"); } @@ -727,6 +787,9 @@ int workerProcessMain(Arguments const& args, int worker_id, shared_memory::Acces this_args.worker_id = worker_id; this_args.thread_id = i; this_args.parent_id = pid_main; + // let a single worker thread create all the tenants before the green signal + this_args.tenant_main = i == 0 && worker_id == 0; + this_args.tenants = args.tenants; this_args.args = &args; this_args.shm = shm; this_args.database = databases[i % args.num_databases]; @@ -974,6 +1037,7 @@ void usage() { printf("%-24s %s\n", "", "This option cannot be specified with --seconds."); printf("%-24s %s\n", " --keylen=LENGTH", "Specify the key lengths"); printf("%-24s %s\n", " --vallen=LENGTH", "Specify the value lengths"); + printf("%-24s %s\n", " --tenants=TENANTS", "Specify the number of tenants to use"); printf("%-24s %s\n", "-x, --transaction=SPEC", "Transaction specification"); printf("%-24s %s\n", " --tps|--tpsmax=TPS", "Specify the target max TPS"); printf("%-24s %s\n", " --tpsmin=TPS", "Specify the target min TPS"); @@ -1024,6 +1088,7 @@ int parseArguments(int argc, char* argv[], Arguments& args) { { "iteration", required_argument, NULL, 'i' }, { "keylen", required_argument, NULL, ARG_KEYLEN }, { "vallen", required_argument, NULL, ARG_VALLEN }, + { "tenants", required_argument, NULL, ARG_TENANTS }, { "transaction", required_argument, NULL, 'x' }, { "tps", required_argument, NULL, ARG_TPS }, { "tpsmax", required_argument, NULL, ARG_TPSMAX }, @@ -1125,6 +1190,9 @@ int parseArguments(int argc, char* argv[], Arguments& args) { case ARG_VALLEN: args.value_length = atoi(optarg); break; + case ARG_TENANTS: + args.tenants = atoi(optarg); + break; case ARG_TPS: case ARG_TPSMAX: args.tpsmax = atoi(optarg); @@ -1832,6 +1900,7 @@ int statsProcessMain(Arguments const& args, fmt::fprintf(fp, "\"sampling\": %d,", args.sampling); fmt::fprintf(fp, "\"key_length\": %d,", args.key_length); fmt::fprintf(fp, "\"value_length\": %d,", args.value_length); + fmt::fprintf(fp, "\"tenants\": %d,", args.tenants); fmt::fprintf(fp, "\"commit_get\": %d,", args.commit_get); fmt::fprintf(fp, "\"verbose\": %d,", args.verbose); fmt::fprintf(fp, "\"cluster_files\": \"%s\",", args.cluster_files[0]); diff --git a/bindings/c/test/mako/mako.hpp b/bindings/c/test/mako/mako.hpp index a2185d41f2..e6e67a1ba8 100644 --- a/bindings/c/test/mako/mako.hpp +++ b/bindings/c/test/mako/mako.hpp @@ -22,7 +22,7 @@ #define MAKO_HPP #ifndef FDB_API_VERSION -#define FDB_API_VERSION 720 +#define FDB_API_VERSION 710 #endif #include @@ -49,6 +49,7 @@ constexpr const int MODE_RUN = 2; enum ArgKind { ARG_KEYLEN, ARG_VALLEN, + ARG_TENANTS, ARG_TPS, ARG_ASYNC, ARG_COMMITGET, @@ -138,6 +139,7 @@ struct Arguments { int sampling; int key_length; int value_length; + int tenants; int zipf; int commit_get; int verbose; From cc6620e9bba44356097f05e048bdc34d3e482825 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 18 May 2022 16:03:44 -0400 Subject: [PATCH 02/49] fix CI errors --- bindings/c/test/fdb_api.hpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index e7f176b8a9..22b4db81c9 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -560,21 +560,21 @@ public: }; class Tenant final { - std::string tenantManagementMapPrefix = "\xff\xff/management/tenant_map/"; - std::string tenantMapPrefix = "\xff/tenantMap/"; + static const std::string tenantManagementMapPrefix; + static const std::string tenantMapPrefix; public: static TypedFuture createTenant(Transaction tr, std::string name) { - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES); - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, 1); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, 1); KeyRef tenantManagementKey = toBytesRef(tenantManagementMapPrefix + name); - tr.set(tenantManagementKey, toBytesRef("")); + tr.set(tenantManagementKey, toBytesRef(std::string(""))); return tr.commit(); } static TypedFuture deleteTenant(Transaction tr, std::string name) { - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_RAW_ACCESS); - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_RAW_ACCESS, 1); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, 1); KeyRef tenantMapKey = toBytesRef(tenantMapPrefix + name); tr.clear(tenantMapKey); return tr.commit(); @@ -611,6 +611,9 @@ private: native::FDBTenant* tenant; }; +const std::string Tenant::tenantManagementMapPrefix = "\xff\xff/management/tenant_map/"; +const std::string Tenant::tenantMapPrefix = "\xff/tenantMap/"; + } // namespace fdb #endif /*FDB_API_HPP*/ From 5a1c7f5c083c29269efca98e7a894ce21689b37b Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 18 May 2022 17:16:30 -0400 Subject: [PATCH 03/49] attempt to create fdb_api.cpp to separate static declaration --- bindings/c/CMakeLists.txt | 2 +- bindings/c/test/fdb_api.cpp | 30 ++++++++++++++++++++++++++++++ bindings/c/test/fdb_api.hpp | 3 --- 3 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 bindings/c/test/fdb_api.cpp diff --git a/bindings/c/CMakeLists.txt b/bindings/c/CMakeLists.txt index 9258d384d9..8e0455dc46 100644 --- a/bindings/c/CMakeLists.txt +++ b/bindings/c/CMakeLists.txt @@ -112,7 +112,7 @@ if(NOT WIN32) test/unit/fdb_api.hpp) add_library(fdb_cpp INTERFACE) - target_sources(fdb_cpp INTERFACE test/fdb_api.hpp) + target_sources(fdb_cpp INTERFACE test/fdb_api.hpp test/fdb_api.cpp) target_include_directories(fdb_cpp INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/test) target_link_libraries(fdb_cpp INTERFACE fmt::fmt) diff --git a/bindings/c/test/fdb_api.cpp b/bindings/c/test/fdb_api.cpp new file mode 100644 index 0000000000..6f359ce76d --- /dev/null +++ b/bindings/c/test/fdb_api.cpp @@ -0,0 +1,30 @@ +/* + * fdb_api.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdb_api.hpp" + +#include + +namespace fdb { + +const std::string Tenant::tenantManagementMapPrefix = "\xff\xff/management/tenant_map/"; +const std::string Tenant::tenantMapPrefix = "\xff/tenantMap/"; + +} // namespace fdb \ No newline at end of file diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index 22b4db81c9..c21433d7fa 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -611,9 +611,6 @@ private: native::FDBTenant* tenant; }; -const std::string Tenant::tenantManagementMapPrefix = "\xff\xff/management/tenant_map/"; -const std::string Tenant::tenantMapPrefix = "\xff/tenantMap/"; - } // namespace fdb #endif /*FDB_API_HPP*/ From cdca68e26a0f9159bbc3cb60c2b1d181a8c4e93a Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 18 May 2022 17:26:05 -0400 Subject: [PATCH 04/49] only use tenant management prefix and default tenants to 0 in mako --- bindings/c/test/fdb_api.cpp | 1 - bindings/c/test/fdb_api.hpp | 4 ++-- bindings/c/test/mako/mako.cpp | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bindings/c/test/fdb_api.cpp b/bindings/c/test/fdb_api.cpp index 6f359ce76d..0e80acd125 100644 --- a/bindings/c/test/fdb_api.cpp +++ b/bindings/c/test/fdb_api.cpp @@ -25,6 +25,5 @@ namespace fdb { const std::string Tenant::tenantManagementMapPrefix = "\xff\xff/management/tenant_map/"; -const std::string Tenant::tenantMapPrefix = "\xff/tenantMap/"; } // namespace fdb \ No newline at end of file diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index c21433d7fa..3cacb15d6e 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -575,8 +575,8 @@ public: static TypedFuture deleteTenant(Transaction tr, std::string name) { tr.setOption(FDBTransactionOption::FDB_TR_OPTION_RAW_ACCESS, 1); tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, 1); - KeyRef tenantMapKey = toBytesRef(tenantMapPrefix + name); - tr.clear(tenantMapKey); + KeyRef tenantManagementKey = toBytesRef(tenantManagementMapPrefix + name); + tr.clear(tenantManagementKey); return tr.commit(); } diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 21a81c58cc..9f3d3ff5f8 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -865,6 +865,7 @@ int initArguments(Arguments& args) { args.sampling = 1000; args.key_length = 32; args.value_length = 16; + args.tenants = 0; args.zipf = 0; args.commit_get = 0; args.verbose = 1; From b92b3b21b0e4114c7d01af6cea908e167935ef16 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 20 May 2022 13:03:59 -0400 Subject: [PATCH 05/49] Refactor Tenant class in fdb_api header to match conventions of existing classes and move tenant creation into populate function --- bindings/c/CMakeLists.txt | 2 +- bindings/c/test/fdb_api.cpp | 29 ---------------- bindings/c/test/fdb_api.hpp | 53 +++++++++++------------------- bindings/c/test/mako/mako.cpp | 62 +++++++++++++++-------------------- bindings/c/test/mako/mako.hpp | 2 +- 5 files changed, 48 insertions(+), 100 deletions(-) delete mode 100644 bindings/c/test/fdb_api.cpp diff --git a/bindings/c/CMakeLists.txt b/bindings/c/CMakeLists.txt index 8e0455dc46..9258d384d9 100644 --- a/bindings/c/CMakeLists.txt +++ b/bindings/c/CMakeLists.txt @@ -112,7 +112,7 @@ if(NOT WIN32) test/unit/fdb_api.hpp) add_library(fdb_cpp INTERFACE) - target_sources(fdb_cpp INTERFACE test/fdb_api.hpp test/fdb_api.cpp) + target_sources(fdb_cpp INTERFACE test/fdb_api.hpp) target_include_directories(fdb_cpp INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/test) target_link_libraries(fdb_cpp INTERFACE fmt::fmt) diff --git a/bindings/c/test/fdb_api.cpp b/bindings/c/test/fdb_api.cpp deleted file mode 100644 index 0e80acd125..0000000000 --- a/bindings/c/test/fdb_api.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * fdb_api.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdb_api.hpp" - -#include - -namespace fdb { - -const std::string Tenant::tenantManagementMapPrefix = "\xff\xff/management/tenant_map/"; - -} // namespace fdb \ No newline at end of file diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index 3cacb15d6e..e9196aefb2 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -23,7 +23,7 @@ #pragma once #ifndef FDB_API_VERSION -#define FDB_API_VERSION 710 +#define FDB_API_VERSION 720 #endif #include @@ -560,55 +560,42 @@ public: }; class Tenant final { - static const std::string tenantManagementMapPrefix; - static const std::string tenantMapPrefix; + std::shared_ptr tenant; + static constexpr CharsRef tenantManagementMapPrefix = "\xff\xff/management/tenant_map/"; public: - static TypedFuture createTenant(Transaction tr, std::string name) { + Tenant(const Tenant&) noexcept = default; + Tenant& operator=(const Tenant&) noexcept = default; + Tenant(fdb::Database* db, BytesRef name, int name_length) : tenant(nullptr) { + auto tenant_raw = static_cast(nullptr); + if (auto err = Error(native::fdb_database_open_tenant(db->db.get(), name.data(), name_length, &tenant_raw))) { + throwError(fmt::format("Failed to create tenant with name '{}': ", name), err); + } + tenant = std::shared_ptr(tenant_raw, &native::fdb_tenant_destroy); + } + Tenant() noexcept : tenant(nullptr) {} + + static void createTenant(Transaction tr, BytesRef name) { tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, 1); tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, 1); - KeyRef tenantManagementKey = toBytesRef(tenantManagementMapPrefix + name); - tr.set(tenantManagementKey, toBytesRef(std::string(""))); - return tr.commit(); + tr.set(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name))), + toBytesRef(std::string(""))); } - static TypedFuture deleteTenant(Transaction tr, std::string name) { + static void deleteTenant(Transaction tr, BytesRef name) { tr.setOption(FDBTransactionOption::FDB_TR_OPTION_RAW_ACCESS, 1); tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, 1); - KeyRef tenantManagementKey = toBytesRef(tenantManagementMapPrefix + name); - tr.clear(tenantManagementKey); - return tr.commit(); - } - - Tenant(fdb::Database* db, const uint8_t* name, int name_length) { - if (native::fdb_error_t err = fdb_database_open_tenant(db->db.get(), name, name_length, &tenant)) { - std::cerr << native::fdb_get_error(err) << std::endl; - std::abort(); - } - } - - ~Tenant() { - if (tenant != nullptr) { - fdb_tenant_destroy(tenant); - } + tr.clear(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name)))); } Transaction createTransaction() { auto tx_native = static_cast(nullptr); - auto err = Error(native::fdb_tenant_create_transaction(tenant, &tx_native)); + auto err = Error(native::fdb_tenant_create_transaction(tenant.get(), &tx_native)); if (err) throwError("Failed to create transaction: ", err); return Transaction(tx_native); } - Tenant(const Tenant&) = delete; - Tenant& operator=(const Tenant&) = delete; - Tenant(Tenant&&) = delete; - Tenant& operator=(Tenant&&) = delete; - -private: - friend class Transaction; - native::FDBTenant* tenant; }; } // namespace fdb diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 9f3d3ff5f8..5e67fa932a 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -78,15 +78,15 @@ using namespace mako; thread_local Logger logr = Logger(MainProcess{}, VERBOSE_DEFAULT); -Transaction createNewTransaction(Database* db, Arguments const& args, int id = -1) { +Transaction createNewTransaction(Database db, Arguments const& args, int id = -1) { // No tenants specified if (args.tenants <= 0) { - return db->createTransaction(); + return db.createTransaction(); } // Create Tenant Transaction int tenant_id = (id == -1) ? urand(1, args.tenants) : id; - std::string tenant_name = "tenant" + std::to_string(tenant_id); - Tenant t(db, reinterpret_cast(tenant_name.c_str()), tenant_name.length()); + BytesRef tenant_name = toBytesRef("tenant" + std::to_string(tenant_id)); + Tenant t(&db, tenant_name, tenant_name.length()); return t.createTransaction(); } @@ -115,7 +115,7 @@ int cleanup(Database db, Arguments const& args) { // If args.tenants is zero, this will use a non-tenant txn and perform a single range clear. // If 1, it will use a tenant txn and do a single range clear instead. // If > 1, it will perform a range clear with a different tenant txn per iteration. - Transaction tx = createNewTransaction(&db, args, i); + Transaction tx = createNewTransaction(db, args, i); while (true) { tx.clearRange(beginstr, endstr); auto future_commit = tx.commit(); @@ -132,19 +132,16 @@ int cleanup(Database db, Arguments const& args) { // If tenants are specified, also delete the tenant after clearing out its keyspace if (args.tenants > 0) { - std::string tenant_name = "tenant" + std::to_string(i); while (true) { Transaction systemTx = db.createTransaction(); - auto future_commit = Tenant::deleteTenant(systemTx, tenant_name); + Tenant::deleteTenant(systemTx, toBytesRef("tenant" + std::to_string(i))); + auto future_commit = systemTx.commit(); const auto rc = waitAndHandleError(systemTx, future_commit, "DELETE_TENANT"); if (rc == FutureRC::OK) { break; } else if (rc == FutureRC::RETRY || rc == FutureRC::CONFLICT) { // tx already reset continue; - } else { - // try with new transaction object - systemTx = db.createTransaction(); } } } @@ -178,7 +175,23 @@ int populate(Database db, auto watch_trace = Stopwatch(watch_total.getStart()); auto key_checkpoint = key_begin; // in case of commit failure, restart from this key - Transaction tx = createNewTransaction(&db, args); + Transaction systemTx = db.createTransaction(); + for (int i = 1; i <= args.tenants; ++i) { + std::string tenant_name = "tenant" + std::to_string(i); + Tenant::createTenant(systemTx, toBytesRef(tenant_name)); + while (i % 10 == 0 || i == args.tenants) { + // create {batchSize} # of tenants + // commit every batch + auto future_commit = systemTx.commit(); + const auto rc = waitAndHandleError(systemTx, future_commit, "CREATE_TENANT"); + if (rc == FutureRC::OK) { + systemTx.reset(); + break; + } + // look up tenant range limit 1. If found, break, else retry + } + } + Transaction tx = createNewTransaction(db, args); for (auto i = key_begin; i <= key_end; i++) { /* sequential keys */ genKey(keystr.data(), KEY_PREFIX, args, i); @@ -224,7 +237,7 @@ int populate(Database db, auto tx_restarter = ExitGuard([&watch_tx]() { watch_tx.startFromStop(); }); if (rc == FutureRC::OK) { key_checkpoint = i + 1; // restart on failures from next key - tx = createNewTransaction(&db, args); + tx = createNewTransaction(db, args); } else if (rc == FutureRC::ABORT) { return -1; } else { @@ -410,7 +423,7 @@ int runWorkload(Database db, /* main transaction loop */ while (1) { - Transaction tx = createNewTransaction(&db, args); + Transaction tx = createNewTransaction(db, args); while ((thread_tps > 0) && (xacts >= current_tps)) { /* throttle on */ const auto time_now = steady_clock::now(); @@ -615,29 +628,6 @@ void workerThread(ThreadArgs& thread_args) { ? -1 : computeThreadIters(args.iteration, worker_id, thread_id, args.num_processes, args.num_threads); - /* create my own transaction object */ - Transaction tx; - tx = database.createTransaction(); - - if (tenant_main && tenants > 0) { - for (int i = 1; i <= tenants; ++i) { - std::string tenant_name = "tenant" + std::to_string(i); - while (true) { - auto future_commit = Tenant::createTenant(tx, tenant_name); - const auto rc = waitAndHandleError(tx, future_commit, "CREATE_TENANT"); - if (rc == FutureRC::OK) { - break; - } else if (rc == FutureRC::RETRY || rc == FutureRC::CONFLICT) { - // tx already reset - continue; - } else { - // try with new transaction object - tx = database.createTransaction(); - } - } - } - } - /* i'm ready */ readycount.fetch_add(1); auto stopcount_guard = ExitGuard([&stopcount]() { stopcount.fetch_add(1); }); diff --git a/bindings/c/test/mako/mako.hpp b/bindings/c/test/mako/mako.hpp index e6e67a1ba8..d0bef6da2a 100644 --- a/bindings/c/test/mako/mako.hpp +++ b/bindings/c/test/mako/mako.hpp @@ -22,7 +22,7 @@ #define MAKO_HPP #ifndef FDB_API_VERSION -#define FDB_API_VERSION 710 +#define FDB_API_VERSION 720 #endif #include From 612b94efa3f6539375abd00cbbd7bacdddb8929f Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 20 May 2022 13:16:49 -0400 Subject: [PATCH 06/49] remove tenant_main arg and use CharsRef in error message format --- bindings/c/test/fdb_api.hpp | 2 +- bindings/c/test/mako/mako.cpp | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index e9196aefb2..aef4cb68ac 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -569,7 +569,7 @@ public: Tenant(fdb::Database* db, BytesRef name, int name_length) : tenant(nullptr) { auto tenant_raw = static_cast(nullptr); if (auto err = Error(native::fdb_database_open_tenant(db->db.get(), name.data(), name_length, &tenant_raw))) { - throwError(fmt::format("Failed to create tenant with name '{}': ", name), err); + throwError(fmt::format("Failed to create tenant with name '{}': ", toCharsRef(name)), err); } tenant = std::shared_ptr(tenant_raw, &native::fdb_tenant_destroy); } diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 5e67fa932a..d971783006 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -62,7 +62,6 @@ namespace mako { struct alignas(64) ThreadArgs { int worker_id; int thread_id; - bool tenant_main; int tenants; pid_t parent_id; LatencySampleBinArray sample_bins; @@ -142,6 +141,8 @@ int cleanup(Database db, Arguments const& args) { } else if (rc == FutureRC::RETRY || rc == FutureRC::CONFLICT) { // tx already reset continue; + } else { + return -1; } } } @@ -605,8 +606,6 @@ void workerThread(ThreadArgs& thread_args) { const auto parent_id = thread_args.parent_id; const auto worker_id = thread_args.worker_id; const auto thread_id = thread_args.thread_id; - const auto tenant_main = thread_args.tenant_main; - const auto tenants = thread_args.tenants; const auto dotrace = (worker_id == 0 && thread_id == 0 && args.txntrace) ? args.txntrace : 0; auto database = thread_args.database; const auto dotagging = args.txntagging; @@ -777,8 +776,6 @@ int workerProcessMain(Arguments const& args, int worker_id, shared_memory::Acces this_args.worker_id = worker_id; this_args.thread_id = i; this_args.parent_id = pid_main; - // let a single worker thread create all the tenants before the green signal - this_args.tenant_main = i == 0 && worker_id == 0; this_args.tenants = args.tenants; this_args.args = &args; this_args.shm = shm; From 8ae951079fd620abd1686523042dd4913829f91b Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 20 May 2022 13:28:26 -0400 Subject: [PATCH 07/49] remove extra space for CI formatting --- bindings/c/test/fdb_api.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index aef4cb68ac..4eabe880df 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -595,7 +595,6 @@ public: throwError("Failed to create transaction: ", err); return Transaction(tx_native); } - }; } // namespace fdb From f25450ddd8fc3704d5d1993d1c77743e3802ff4a Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 25 May 2022 11:23:58 -0400 Subject: [PATCH 08/49] keep tenants in memory when running workload and introduce tenants to async run --- bindings/c/test/mako/mako.cpp | 37 +++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index d971783006..0f9efacf3a 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -77,13 +78,17 @@ using namespace mako; thread_local Logger logr = Logger(MainProcess{}, VERBOSE_DEFAULT); -Transaction createNewTransaction(Database db, Arguments const& args, int id = -1) { +Transaction createNewTransaction(Database db, Arguments const& args, int id = -1, Tenant** tenants = nullptr) { // No tenants specified if (args.tenants <= 0) { return db.createTransaction(); } // Create Tenant Transaction - int tenant_id = (id == -1) ? urand(1, args.tenants) : id; + int tenant_id = (id == -1) ? urand(0, args.tenants - 1) : id; + // If provided tenants array (only necessary in runWorkload), use it + if (tenants) { + return tenants[tenant_id]->createTransaction(); + } BytesRef tenant_name = toBytesRef("tenant" + std::to_string(tenant_id)); Tenant t(&db, tenant_name, tenant_name.length()); return t.createTransaction(); @@ -110,7 +115,7 @@ int cleanup(Database db, Arguments const& args) { auto watch = Stopwatch(StartAtCtor{}); int num_iterations = (args.tenants > 1) ? args.tenants : 1; - for (int i = 1; i <= num_iterations; ++i) { + for (int i = 0; i < num_iterations; ++i) { // If args.tenants is zero, this will use a non-tenant txn and perform a single range clear. // If 1, it will use a tenant txn and do a single range clear instead. // If > 1, it will perform a range clear with a different tenant txn per iteration. @@ -177,19 +182,23 @@ int populate(Database db, auto key_checkpoint = key_begin; // in case of commit failure, restart from this key Transaction systemTx = db.createTransaction(); - for (int i = 1; i <= args.tenants; ++i) { + for (int i = 0; i < args.tenants; ++i) { std::string tenant_name = "tenant" + std::to_string(i); Tenant::createTenant(systemTx, toBytesRef(tenant_name)); - while (i % 10 == 0 || i == args.tenants) { + while (i % 10 == 9 || i == args.tenants) { // create {batchSize} # of tenants // commit every batch auto future_commit = systemTx.commit(); const auto rc = waitAndHandleError(systemTx, future_commit, "CREATE_TENANT"); - if (rc == FutureRC::OK) { + if (rc == FutureRC::RETRY) { + continue; + } else { + // Keep going if commit was successful (FutureRC::OK) + // If not a retryable error, expected to be the error + // tenant_already_exists, meaning another thread finished creating it systemTx.reset(); break; } - // look up tenant range limit 1. If found, break, else retry } } Transaction tx = createNewTransaction(db, args); @@ -422,9 +431,17 @@ int runWorkload(Database db, auto val = ByteString{}; val.resize(args.value_length); + // mimic typical tenant usage: keep tenants in memory + // and create transactions as needed + Tenant* tenants[args.tenants]; + for (int i = 0; i < args.tenants; ++i) { + BytesRef tenant_name = toBytesRef("tenant" + std::to_string(i)); + tenants[i] = new Tenant(&db, tenant_name, tenant_name.length()); + } + /* main transaction loop */ while (1) { - Transaction tx = createNewTransaction(db, args); + Transaction tx = createNewTransaction(db, args, 0, args.tenants > 0 ? tenants : nullptr); while ((thread_tps > 0) && (xacts >= current_tps)) { /* throttle on */ const auto time_now = steady_clock::now(); @@ -547,7 +564,7 @@ void runAsyncWorkload(Arguments const& args, auto state = std::make_shared(Logger(WorkerProcess{}, args.verbose, worker_id, i), db, - db.createTransaction(), + createNewTransaction(db, args), io_context, args, shm.statsSlot(worker_id, i), @@ -577,7 +594,7 @@ void runAsyncWorkload(Arguments const& args, auto state = std::make_shared(Logger(WorkerProcess{}, args.verbose, worker_id, i), db, - db.createTransaction(), + createNewTransaction(db, args), io_context, args, shm.statsSlot(worker_id, i), From c24970915fb8abc5dbae8812bd1922cf1a38dc05 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 25 May 2022 12:16:15 -0400 Subject: [PATCH 09/49] fix off-by-one error --- bindings/c/test/mako/mako.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 0f9efacf3a..c9fa9d0098 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -185,7 +185,7 @@ int populate(Database db, for (int i = 0; i < args.tenants; ++i) { std::string tenant_name = "tenant" + std::to_string(i); Tenant::createTenant(systemTx, toBytesRef(tenant_name)); - while (i % 10 == 9 || i == args.tenants) { + while (i % 10 == 9 || i == args.tenants - 1) { // create {batchSize} # of tenants // commit every batch auto future_commit = systemTx.commit(); From 01fa56630ab3e4bd255f76a929454696197c52e3 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Thu, 26 May 2022 14:55:59 -0400 Subject: [PATCH 10/49] WIP commit attempting to solve some tenant open and deletion issues --- bindings/c/test/fdb_api.hpp | 9 +++++---- bindings/c/test/mako/mako.cpp | 26 +++++++++++++++++++++----- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index 4eabe880df..4658c01709 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -576,15 +576,16 @@ public: Tenant() noexcept : tenant(nullptr) {} static void createTenant(Transaction tr, BytesRef name) { - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, 1); - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, 1); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef()); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef()); tr.set(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name))), toBytesRef(std::string(""))); } static void deleteTenant(Transaction tr, BytesRef name) { - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_RAW_ACCESS, 1); - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, 1); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef()); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_RAW_ACCESS, BytesRef()); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef()); tr.clear(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name)))); } diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index c9fa9d0098..69de788070 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -115,12 +115,18 @@ int cleanup(Database db, Arguments const& args) { auto watch = Stopwatch(StartAtCtor{}); int num_iterations = (args.tenants > 1) ? args.tenants : 1; + Tenant* tenants[args.tenants]; + for (int i = 0; i < args.tenants; ++i) { + BytesRef tenant_name = toBytesRef("tenant" + std::to_string(i)); + tenants[i] = new Tenant(&db, tenant_name, tenant_name.length()); + } for (int i = 0; i < num_iterations; ++i) { // If args.tenants is zero, this will use a non-tenant txn and perform a single range clear. // If 1, it will use a tenant txn and do a single range clear instead. // If > 1, it will perform a range clear with a different tenant txn per iteration. - Transaction tx = createNewTransaction(db, args, i); + Transaction tx = createNewTransaction(db, args, i, args.tenants > 0 ? tenants : nullptr); while (true) { + printf("clearrange: %d\n", i); tx.clearRange(beginstr, endstr); auto future_commit = tx.commit(); const auto rc = waitAndHandleError(tx, future_commit, "COMMIT_CLEANUP"); @@ -136,15 +142,18 @@ int cleanup(Database db, Arguments const& args) { // If tenants are specified, also delete the tenant after clearing out its keyspace if (args.tenants > 0) { + Transaction systemTx = db.createTransaction(); while (true) { - Transaction systemTx = db.createTransaction(); + printf("deletetenant: %d\n", i); Tenant::deleteTenant(systemTx, toBytesRef("tenant" + std::to_string(i))); auto future_commit = systemTx.commit(); const auto rc = waitAndHandleError(systemTx, future_commit, "DELETE_TENANT"); if (rc == FutureRC::OK) { + printf("branch1"); break; } else if (rc == FutureRC::RETRY || rc == FutureRC::CONFLICT) { // tx already reset + printf("branch2"); continue; } else { return -1; @@ -191,8 +200,10 @@ int populate(Database db, auto future_commit = systemTx.commit(); const auto rc = waitAndHandleError(systemTx, future_commit, "CREATE_TENANT"); if (rc == FutureRC::RETRY) { + printf("retry\n"); continue; } else { + printf("success\n"); // Keep going if commit was successful (FutureRC::OK) // If not a retryable error, expected to be the error // tenant_already_exists, meaning another thread finished creating it @@ -201,7 +212,11 @@ int populate(Database db, } } } - Transaction tx = createNewTransaction(db, args); + Tenant* tenants[args.tenants]; + for (int i = 0; i < args.tenants; ++i) { + BytesRef tenant_name = toBytesRef("tenant" + std::to_string(i)); + tenants[i] = new Tenant(&db, tenant_name, tenant_name.length()); + } for (auto i = key_begin; i <= key_end; i++) { /* sequential keys */ genKey(keystr.data(), KEY_PREFIX, args, i); @@ -216,6 +231,7 @@ int populate(Database db, usleep(1000); } } + Transaction tx = createNewTransaction(db, args, -1, args.tenants > 0 ? tenants : nullptr); if (num_seconds_trace_every) { if (toIntegerSeconds(watch_trace.stop().diff()) >= num_seconds_trace_every) { watch_trace.startFromStop(); @@ -247,7 +263,7 @@ int populate(Database db, auto tx_restarter = ExitGuard([&watch_tx]() { watch_tx.startFromStop(); }); if (rc == FutureRC::OK) { key_checkpoint = i + 1; // restart on failures from next key - tx = createNewTransaction(db, args); + tx = createNewTransaction(db, args, -1, args.tenants > 0 ? tenants : nullptr); } else if (rc == FutureRC::ABORT) { return -1; } else { @@ -441,7 +457,7 @@ int runWorkload(Database db, /* main transaction loop */ while (1) { - Transaction tx = createNewTransaction(db, args, 0, args.tenants > 0 ? tenants : nullptr); + Transaction tx = createNewTransaction(db, args, -1, args.tenants > 0 ? tenants : nullptr); while ((thread_tps > 0) && (xacts >= current_tps)) { /* throttle on */ const auto time_now = steady_clock::now(); From ddda238c01b7b357e0cb24e6124f1d7b81a4d169 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Thu, 26 May 2022 16:29:16 -0400 Subject: [PATCH 11/49] fix issues with tenant creation/deletion and temporary tenants and strings --- bindings/c/test/mako/mako.cpp | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 69de788070..17d22afa2d 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -89,7 +89,8 @@ Transaction createNewTransaction(Database db, Arguments const& args, int id = -1 if (tenants) { return tenants[tenant_id]->createTransaction(); } - BytesRef tenant_name = toBytesRef("tenant" + std::to_string(tenant_id)); + std::string tenantStr = "tenant" + std::to_string(tenant_id); + BytesRef tenant_name = toBytesRef(tenantStr); Tenant t(&db, tenant_name, tenant_name.length()); return t.createTransaction(); } @@ -115,18 +116,12 @@ int cleanup(Database db, Arguments const& args) { auto watch = Stopwatch(StartAtCtor{}); int num_iterations = (args.tenants > 1) ? args.tenants : 1; - Tenant* tenants[args.tenants]; - for (int i = 0; i < args.tenants; ++i) { - BytesRef tenant_name = toBytesRef("tenant" + std::to_string(i)); - tenants[i] = new Tenant(&db, tenant_name, tenant_name.length()); - } for (int i = 0; i < num_iterations; ++i) { // If args.tenants is zero, this will use a non-tenant txn and perform a single range clear. // If 1, it will use a tenant txn and do a single range clear instead. // If > 1, it will perform a range clear with a different tenant txn per iteration. - Transaction tx = createNewTransaction(db, args, i, args.tenants > 0 ? tenants : nullptr); + Transaction tx = createNewTransaction(db, args, i); while (true) { - printf("clearrange: %d\n", i); tx.clearRange(beginstr, endstr); auto future_commit = tx.commit(); const auto rc = waitAndHandleError(tx, future_commit, "COMMIT_CLEANUP"); @@ -144,16 +139,13 @@ int cleanup(Database db, Arguments const& args) { if (args.tenants > 0) { Transaction systemTx = db.createTransaction(); while (true) { - printf("deletetenant: %d\n", i); Tenant::deleteTenant(systemTx, toBytesRef("tenant" + std::to_string(i))); auto future_commit = systemTx.commit(); const auto rc = waitAndHandleError(systemTx, future_commit, "DELETE_TENANT"); if (rc == FutureRC::OK) { - printf("branch1"); break; } else if (rc == FutureRC::RETRY || rc == FutureRC::CONFLICT) { // tx already reset - printf("branch2"); continue; } else { return -1; @@ -194,9 +186,10 @@ int populate(Database db, for (int i = 0; i < args.tenants; ++i) { std::string tenant_name = "tenant" + std::to_string(i); Tenant::createTenant(systemTx, toBytesRef(tenant_name)); - while (i % 10 == 9 || i == args.tenants - 1) { - // create {batchSize} # of tenants - // commit every batch + // Until this issue https://github.com/apple/foundationdb/issues/7260 is resolved + // we have to commit each tenant creation transaction one-by-one + // while (i % 10 == 9 || i == args.tenants - 1) { + while (true) { auto future_commit = systemTx.commit(); const auto rc = waitAndHandleError(systemTx, future_commit, "CREATE_TENANT"); if (rc == FutureRC::RETRY) { @@ -212,11 +205,6 @@ int populate(Database db, } } } - Tenant* tenants[args.tenants]; - for (int i = 0; i < args.tenants; ++i) { - BytesRef tenant_name = toBytesRef("tenant" + std::to_string(i)); - tenants[i] = new Tenant(&db, tenant_name, tenant_name.length()); - } for (auto i = key_begin; i <= key_end; i++) { /* sequential keys */ genKey(keystr.data(), KEY_PREFIX, args, i); @@ -231,7 +219,7 @@ int populate(Database db, usleep(1000); } } - Transaction tx = createNewTransaction(db, args, -1, args.tenants > 0 ? tenants : nullptr); + Transaction tx = createNewTransaction(db, args); if (num_seconds_trace_every) { if (toIntegerSeconds(watch_trace.stop().diff()) >= num_seconds_trace_every) { watch_trace.startFromStop(); @@ -263,7 +251,7 @@ int populate(Database db, auto tx_restarter = ExitGuard([&watch_tx]() { watch_tx.startFromStop(); }); if (rc == FutureRC::OK) { key_checkpoint = i + 1; // restart on failures from next key - tx = createNewTransaction(db, args, -1, args.tenants > 0 ? tenants : nullptr); + tx = createNewTransaction(db, args); } else if (rc == FutureRC::ABORT) { return -1; } else { From 8180414b4a5ff321b0f28f0d931d791c1f3dc252 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Thu, 26 May 2022 16:51:31 -0400 Subject: [PATCH 12/49] remove printf debugging statements --- bindings/c/test/mako/mako.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 7a9675c281..8ebbb90a1f 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -193,10 +193,8 @@ int populate(Database db, auto future_commit = systemTx.commit(); const auto rc = waitAndHandleError(systemTx, future_commit, "CREATE_TENANT"); if (rc == FutureRC::RETRY) { - printf("retry\n"); continue; } else { - printf("success\n"); // Keep going if commit was successful (FutureRC::OK) // If not a retryable error, expected to be the error // tenant_already_exists, meaning another thread finished creating it From 8a7a8d0d596b5ec69268301f671d9e6535181fb9 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Thu, 26 May 2022 17:14:41 -0400 Subject: [PATCH 13/49] delete tenants in memory when workload is done running --- bindings/c/test/mako/mako.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 8ebbb90a1f..2011ef565a 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -509,6 +509,9 @@ int runWorkload(Database db, xacts++; total_xacts++; } + for (int i = 0; i < args.tenants; ++i) { + delete tenants[i]; + } return rc; } From 55972ca07e103430f333041983d1389fad02b336 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 27 May 2022 10:57:57 -0400 Subject: [PATCH 14/49] address code review comments --- bindings/c/test/fdb_api.hpp | 2 +- bindings/c/test/mako/mako.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index 4658c01709..50d497f7cf 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -569,7 +569,7 @@ public: Tenant(fdb::Database* db, BytesRef name, int name_length) : tenant(nullptr) { auto tenant_raw = static_cast(nullptr); if (auto err = Error(native::fdb_database_open_tenant(db->db.get(), name.data(), name_length, &tenant_raw))) { - throwError(fmt::format("Failed to create tenant with name '{}': ", toCharsRef(name)), err); + throwError(fmt::format("Failed to open tenant with name '{}': ", toCharsRef(name)), err); } tenant = std::shared_ptr(tenant_raw, &native::fdb_tenant_destroy); } diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 2011ef565a..53a9c2a2ce 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -437,7 +437,8 @@ int runWorkload(Database db, // and create transactions as needed Tenant* tenants[args.tenants]; for (int i = 0; i < args.tenants; ++i) { - BytesRef tenant_name = toBytesRef("tenant" + std::to_string(i)); + std::string tenantStr = "tenant" + std::to_string(i); + BytesRef tenant_name = toBytesRef(tenantStr); tenants[i] = new Tenant(&db, tenant_name, tenant_name.length()); } From 3bf4a8d521b3502d8e0bc0eac37342700a9e782f Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Fri, 27 May 2022 15:59:57 -0400 Subject: [PATCH 15/49] move transaction creation to outside outer populate loop --- bindings/c/test/mako/mako.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 53a9c2a2ce..3e56bed426 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -184,12 +184,12 @@ int populate(Database db, Transaction systemTx = db.createTransaction(); for (int i = 0; i < args.tenants; ++i) { - std::string tenant_name = "tenant" + std::to_string(i); - Tenant::createTenant(systemTx, toBytesRef(tenant_name)); - // Until this issue https://github.com/apple/foundationdb/issues/7260 is resolved - // we have to commit each tenant creation transaction one-by-one - // while (i % 10 == 9 || i == args.tenants - 1) { while (true) { + // Until this issue https://github.com/apple/foundationdb/issues/7260 is resolved + // we have to commit each tenant creation transaction one-by-one + // while (i % 10 == 9 || i == args.tenants - 1) { + std::string tenant_name = "tenant" + std::to_string(i); + Tenant::createTenant(systemTx, toBytesRef(tenant_name)); auto future_commit = systemTx.commit(); const auto rc = waitAndHandleError(systemTx, future_commit, "CREATE_TENANT"); if (rc == FutureRC::RETRY) { @@ -203,6 +203,7 @@ int populate(Database db, } } } + Transaction tx = createNewTransaction(db, args); for (auto i = key_begin; i <= key_end; i++) { /* sequential keys */ genKey(keystr.data(), KEY_PREFIX, args, i); @@ -217,7 +218,6 @@ int populate(Database db, usleep(1000); } } - Transaction tx = createNewTransaction(db, args); if (num_seconds_trace_every) { if (toIntegerSeconds(watch_trace.stop().diff()) >= num_seconds_trace_every) { watch_trace.startFromStop(); From 24b2be1c4fed1613a79ae7e286165bc548f4bdb3 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Mon, 30 May 2022 21:57:34 -0700 Subject: [PATCH 16/49] Remove last-limited check from DDMountainChopper and DDValleyFiller --- fdbserver/DataDistribution.actor.cpp | 25 --------- fdbserver/DataDistribution.actor.h | 1 - fdbserver/DataDistributionQueue.actor.cpp | 66 +++-------------------- 3 files changed, 6 insertions(+), 86 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 50b6b68753..921b35361b 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -492,34 +492,10 @@ struct DataDistributorData : NonCopyable, ReferenceCounted totalDataInFlightRemoteEventHolder(makeReference("TotalDataInFlightRemote")) {} }; -ACTOR Future monitorBatchLimitedTime(Reference const> db, double* lastLimited) { - loop { - wait(delay(SERVER_KNOBS->METRIC_UPDATE_RATE)); - - state Reference grvProxies(new GrvProxyInfo(db->get().client.grvProxies)); - - choose { - when(wait(db->onChange())) {} - when(GetHealthMetricsReply reply = - wait(grvProxies->size() ? basicLoadBalance(grvProxies, - &GrvProxyInterface::getHealthMetrics, - GetHealthMetricsRequest(false)) - : Never())) { - if (reply.healthMetrics.batchLimited) { - *lastLimited = now(); - } - } - } - } -} - // Runs the data distribution algorithm for FDB, including the DD Queue, DD tracker, and DD team collection ACTOR Future dataDistribution(Reference self, PromiseStream getShardMetricsList, const DDEnabledState* ddEnabledState) { - state double lastLimited = 0; - self->addActor.send(monitorBatchLimitedTime(self->dbInfo, &lastLimited)); - state Database cx = openDBOnServer(self->dbInfo, TaskPriority::DataDistributionLaunch, LockAware::True); cx->locationCacheSize = SERVER_KNOBS->DD_LOCATION_CACHE_SIZE; @@ -762,7 +738,6 @@ ACTOR Future dataDistribution(Reference self, self->ddId, storageTeamSize, configuration.storageTeamSize, - &lastLimited, ddEnabledState), "DDQueue", self->ddId, diff --git a/fdbserver/DataDistribution.actor.h b/fdbserver/DataDistribution.actor.h index bba180b236..4997132f36 100644 --- a/fdbserver/DataDistribution.actor.h +++ b/fdbserver/DataDistribution.actor.h @@ -345,7 +345,6 @@ ACTOR Future dataDistributionQueue(Database cx, UID distributorId, int teamSize, int singleRegionTeamSize, - double* lastLimited, const DDEnabledState* ddEnabledState); // Holds the permitted size and IO Bounds for a shard diff --git a/fdbserver/DataDistributionQueue.actor.cpp b/fdbserver/DataDistributionQueue.actor.cpp index bdbd9fb1d6..10d5c90357 100644 --- a/fdbserver/DataDistributionQueue.actor.cpp +++ b/fdbserver/DataDistributionQueue.actor.cpp @@ -487,7 +487,6 @@ struct DDQueueData { PromiseStream getShardMetrics; PromiseStream getTopKMetrics; - double* lastLimited; double lastInterval; int suppressIntervals; @@ -550,18 +549,17 @@ struct DDQueueData { PromiseStream output, FutureStream input, PromiseStream getShardMetrics, - PromiseStream getTopKMetrics, - double* lastLimited) + PromiseStream getTopKMetrics) : distributorId(mid), lock(lock), cx(cx), teamCollections(teamCollections), shardsAffectedByTeamFailure(sABTF), getAverageShardBytes(getAverageShardBytes), startMoveKeysParallelismLock(SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM), finishMoveKeysParallelismLock(SERVER_KNOBS->DD_MOVE_KEYS_PARALLELISM), fetchSourceLock(new FlowLock(SERVER_KNOBS->DD_FETCH_SOURCE_PARALLELISM)), activeRelocations(0), queuedRelocations(0), bytesWritten(0), teamSize(teamSize), singleRegionTeamSize(singleRegionTeamSize), - output(output), input(input), getShardMetrics(getShardMetrics), getTopKMetrics(getTopKMetrics), - lastLimited(lastLimited), lastInterval(0), suppressIntervals(0), - rawProcessingUnhealthy(new AsyncVar(false)), rawProcessingWiggle(new AsyncVar(false)), - unhealthyRelocations(0), movedKeyServersEventHolder(makeReference("MovedKeyServers")) {} + output(output), input(input), getShardMetrics(getShardMetrics), getTopKMetrics(getTopKMetrics), lastInterval(0), + suppressIntervals(0), rawProcessingUnhealthy(new AsyncVar(false)), + rawProcessingWiggle(new AsyncVar(false)), unhealthyRelocations(0), + movedKeyServersEventHolder(makeReference("MovedKeyServers")) {} void validate() { if (EXPENSIVE_VALIDATION) { @@ -1819,7 +1817,6 @@ ACTOR Future BgDDLoadRebalance(DDQueueData* self, int teamCollectionIndex, ACTOR Future BgDDMountainChopper(DDQueueData* self, int teamCollectionIndex) { state double rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL; - state int resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT; state Transaction tr(self->cx); state double lastRead = 0; state bool skipCurrentLoop = false; @@ -1829,10 +1826,6 @@ ACTOR Future BgDDMountainChopper(DDQueueData* self, int teamCollectionInde state TraceEvent traceEvent("BgDDMountainChopper_Old", self->distributorId); traceEvent.suppressFor(5.0).detail("PollingInterval", rebalancePollingInterval).detail("Rebalance", "Disk"); - if (*self->lastLimited > 0) { - traceEvent.detail("SecondsSinceLastLimited", now() - *self->lastLimited); - } - try { state Future delayF = delay(rebalancePollingInterval, TaskPriority::DataDistributionLaunch); if ((now() - lastRead) > SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL) { @@ -1904,30 +1897,10 @@ ACTOR Future BgDDMountainChopper(DDQueueData* self, int teamCollectionInde teamCollectionIndex == 0, &traceEvent)); moved = _moved; - if (moved) { - resetCount = 0; - } else { - resetCount++; - } } } } - if (now() - (*self->lastLimited) < SERVER_KNOBS->BG_DD_SATURATION_DELAY) { - rebalancePollingInterval = std::min(SERVER_KNOBS->BG_DD_MAX_WAIT, - rebalancePollingInterval * SERVER_KNOBS->BG_DD_INCREASE_RATE); - } else { - rebalancePollingInterval = std::max(SERVER_KNOBS->BG_DD_MIN_WAIT, - rebalancePollingInterval / SERVER_KNOBS->BG_DD_DECREASE_RATE); - } - - if (resetCount >= SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT && - rebalancePollingInterval < SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL) { - rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL; - resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT; - } - - traceEvent.detail("ResetCount", resetCount); tr.reset(); } catch (Error& e) { // Log actor_cancelled because it's not legal to suppress an event that's initialized @@ -1942,7 +1915,6 @@ ACTOR Future BgDDMountainChopper(DDQueueData* self, int teamCollectionInde ACTOR Future BgDDValleyFiller(DDQueueData* self, int teamCollectionIndex) { state double rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL; - state int resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT; state Transaction tr(self->cx); state double lastRead = 0; state bool skipCurrentLoop = false; @@ -1953,10 +1925,6 @@ ACTOR Future BgDDValleyFiller(DDQueueData* self, int teamCollectionIndex) state TraceEvent traceEvent("BgDDValleyFiller_Old", self->distributorId); traceEvent.suppressFor(5.0).detail("PollingInterval", rebalancePollingInterval).detail("Rebalance", "Disk"); - if (*self->lastLimited > 0) { - traceEvent.detail("SecondsSinceLastLimited", now() - *self->lastLimited); - } - try { state Future delayF = delay(rebalancePollingInterval, TaskPriority::DataDistributionLaunch); if ((now() - lastRead) > SERVER_KNOBS->BG_REBALANCE_SWITCH_CHECK_INTERVAL) { @@ -2028,30 +1996,10 @@ ACTOR Future BgDDValleyFiller(DDQueueData* self, int teamCollectionIndex) teamCollectionIndex == 0, &traceEvent)); moved = _moved; - if (moved) { - resetCount = 0; - } else { - resetCount++; - } } } } - if (now() - (*self->lastLimited) < SERVER_KNOBS->BG_DD_SATURATION_DELAY) { - rebalancePollingInterval = std::min(SERVER_KNOBS->BG_DD_MAX_WAIT, - rebalancePollingInterval * SERVER_KNOBS->BG_DD_INCREASE_RATE); - } else { - rebalancePollingInterval = std::max(SERVER_KNOBS->BG_DD_MIN_WAIT, - rebalancePollingInterval / SERVER_KNOBS->BG_DD_DECREASE_RATE); - } - - if (resetCount >= SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT && - rebalancePollingInterval < SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL) { - rebalancePollingInterval = SERVER_KNOBS->BG_REBALANCE_POLLING_INTERVAL; - resetCount = SERVER_KNOBS->DD_REBALANCE_RESET_AMOUNT; - } - - traceEvent.detail("ResetCount", resetCount); tr.reset(); } catch (Error& e) { // Log actor_cancelled because it's not legal to suppress an event that's initialized @@ -2079,7 +2027,6 @@ ACTOR Future dataDistributionQueue(Database cx, UID distributorId, int teamSize, int singleRegionTeamSize, - double* lastLimited, const DDEnabledState* ddEnabledState) { state DDQueueData self(distributorId, lock, @@ -2092,8 +2039,7 @@ ACTOR Future dataDistributionQueue(Database cx, output, input, getShardMetrics, - getTopKMetrics, - lastLimited); + getTopKMetrics); state std::set serversToLaunchFrom; state KeyRange keysToLaunchFrom; state RelocateData launchData; From ae8014a181cd3c3ac10a54eef28df4b373981b2c Mon Sep 17 00:00:00 2001 From: Mohamed Oulmahdi Date: Tue, 31 May 2022 11:57:41 +0200 Subject: [PATCH 17/49] Disable iterator debugging for Windows --- cmake/ConfigureCompiler.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 398dfe5fb6..ab214a7f51 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -70,6 +70,7 @@ include_directories(${CMAKE_BINARY_DIR}) if(WIN32) add_definitions(-DBOOST_USE_WINDOWS_H) add_definitions(-DWIN32_LEAN_AND_MEAN) + add_definitions(-D_ITERATOR_DEBUG_LEVEL=0) endif() if (USE_CCACHE) From 541ff206f3d01085d26e76199cf3f0fa8c074cda Mon Sep 17 00:00:00 2001 From: Mohamed Oulmahdi Date: Tue, 31 May 2022 11:59:21 +0200 Subject: [PATCH 18/49] Use \n instead of \r\n in log formatters --- flow/JsonTraceLogFormatter.cpp | 2 +- flow/XmlTraceLogFormatter.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/flow/JsonTraceLogFormatter.cpp b/flow/JsonTraceLogFormatter.cpp index 2323d3d272..3e5239b567 100644 --- a/flow/JsonTraceLogFormatter.cpp +++ b/flow/JsonTraceLogFormatter.cpp @@ -80,6 +80,6 @@ std::string JsonTraceLogFormatter::formatEvent(const TraceEventFields& fields) c escapeString(oss, iter->second); oss << "\""; } - oss << " }\r\n"; + oss << " }\n"; return std::move(oss).str(); } diff --git a/flow/XmlTraceLogFormatter.cpp b/flow/XmlTraceLogFormatter.cpp index dff1ca15ab..35bb3faf95 100644 --- a/flow/XmlTraceLogFormatter.cpp +++ b/flow/XmlTraceLogFormatter.cpp @@ -86,6 +86,6 @@ std::string XmlTraceLogFormatter::formatEvent(const TraceEventFields& fields) co oss << "\" "; } - oss << "/>\r\n"; + oss << "/>\n"; return std::move(oss).str(); } From 81e81c6799ab591af4b3e4676c8610327dfa24b0 Mon Sep 17 00:00:00 2001 From: Mohamed Oulmahdi Date: Tue, 31 May 2022 12:01:39 +0200 Subject: [PATCH 19/49] Fix atomicPath and abspath for Windows --- flow/Platform.actor.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/flow/Platform.actor.cpp b/flow/Platform.actor.cpp index 2fb0c8e62f..1d36a55689 100644 --- a/flow/Platform.actor.cpp +++ b/flow/Platform.actor.cpp @@ -2238,7 +2238,7 @@ void renamedFile() { void renameFile(std::string const& fromPath, std::string const& toPath) { INJECT_FAULT(io_error, "renameFile"); // rename file failed #ifdef _WIN32 - if (MoveFile(fromPath.c_str(), toPath.c_str())) { + if (MoveFileExA(fromPath.c_str(), toPath.c_str(), MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) { // renamedFile(); return; } @@ -2331,8 +2331,9 @@ void atomicReplace(std::string const& path, std::string const& content, bool tex } f = 0; - if (!ReplaceFile(path.c_str(), tempfilename.c_str(), nullptr, NULL, nullptr, nullptr)) + if (!MoveFileExA(tempfilename.c_str(), path.c_str(), MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) { throw io_error(); + } #elif defined(__unixish__) if (!g_network->isSimulated()) { if (fsync(fileno(f)) != 0) @@ -2523,14 +2524,14 @@ std::string popPath(const std::string& path) { return path.substr(0, i + 1); } -std::string abspath(std::string const& path, bool resolveLinks, bool mustExist) { - if (path.empty()) { +std::string abspath(std::string const& path_, bool resolveLinks, bool mustExist) { + if (path_.empty()) { Error e = platform_error(); Severity sev = e.code() == error_code_io_error ? SevError : SevWarnAlways; - TraceEvent(sev, "AbsolutePathError").error(e).detail("Path", path); + TraceEvent(sev, "AbsolutePathError").error(e).detail("Path", path_); throw e; } - + std::string path = path_.back() == '\\' ? path_.substr(0, path_.size() - 1) : path_; // Returns an absolute path canonicalized to use only CANONICAL_PATH_SEPARATOR INJECT_FAULT(platform_error, "abspath"); // abspath failed From 3c4c485ef2e8a3ed78a4de22da613dc0fa19b7eb Mon Sep 17 00:00:00 2001 From: Mohamed Oulmahdi Date: Tue, 31 May 2022 12:01:57 +0200 Subject: [PATCH 20/49] Enable Windows tests --- fdbserver/fdbserver.actor.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fdbserver/fdbserver.actor.cpp b/fdbserver/fdbserver.actor.cpp index cfb7d3f93c..467b5d682a 100644 --- a/fdbserver/fdbserver.actor.cpp +++ b/fdbserver/fdbserver.actor.cpp @@ -1806,14 +1806,6 @@ int main(int argc, char* argv[]) { auto opts = CLIOptions::parseArgs(argc, argv); const auto role = opts.role; -#ifdef _WIN32 - // For now, ignore all tests for Windows - if (role == ServerRole::Simulation || role == ServerRole::UnitTests || role == ServerRole::Test) { - printf("Windows tests are not supported yet\n"); - flushAndExit(FDB_EXIT_SUCCESS); - } -#endif - if (role == ServerRole::Simulation) printf("Random seed is %u...\n", opts.randomSeed); From 3139e28aebc26aad2ca42c18fb0b0ec61a81ad05 Mon Sep 17 00:00:00 2001 From: Mohamed Oulmahdi Date: Tue, 31 May 2022 13:29:25 +0200 Subject: [PATCH 21/49] Fix TestRunner.py typos --- tests/TestRunner/TestRunner.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/TestRunner/TestRunner.py b/tests/TestRunner/TestRunner.py index c6b405a654..718057e969 100755 --- a/tests/TestRunner/TestRunner.py +++ b/tests/TestRunner/TestRunner.py @@ -118,7 +118,7 @@ class LogParser: with open(self.infile) as f: line = f.readline() while line != "": - obj = self.processLine(line, linenr) + obj = self.process_line(line, linenr) line = f.readline() linenr += 1 if obj is None: @@ -137,7 +137,7 @@ class LogParser: and self.sanitize_backtrace(obj) is not None ): obj = self.apply_address_to_line(obj) - self.writeObject(obj) + self.write_object(obj) def log_trace_parse_error(self, linenr, e): obj = {} @@ -164,7 +164,7 @@ class LogParser: return_code_trace["Command"] = command return_code_trace["ReturnCode"] = return_code return_code_trace["testname"] = self.name - self.writeObject(return_code_trace) + self.write_object(return_code_trace) class JSONParser(LogParser): @@ -208,7 +208,7 @@ class XMLParser(LogParser): self.errors.append(exception) def fatalError(self, exception): - self.fatalError.append(exception) + self.fatalErrors.append(exception) def warning(self, exception): self.warnings.append(exception) From 26ad8bc1843d35e317bc72e2a94864d5ae92fb23 Mon Sep 17 00:00:00 2001 From: Mohamed Oulmahdi Date: Tue, 31 May 2022 14:17:41 +0200 Subject: [PATCH 22/49] Format --- flow/Platform.actor.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/flow/Platform.actor.cpp b/flow/Platform.actor.cpp index 1d36a55689..a4882a3312 100644 --- a/flow/Platform.actor.cpp +++ b/flow/Platform.actor.cpp @@ -2238,7 +2238,9 @@ void renamedFile() { void renameFile(std::string const& fromPath, std::string const& toPath) { INJECT_FAULT(io_error, "renameFile"); // rename file failed #ifdef _WIN32 - if (MoveFileExA(fromPath.c_str(), toPath.c_str(), MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) { + if (MoveFileExA(fromPath.c_str(), + toPath.c_str(), + MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) { // renamedFile(); return; } @@ -2331,7 +2333,9 @@ void atomicReplace(std::string const& path, std::string const& content, bool tex } f = 0; - if (!MoveFileExA(tempfilename.c_str(), path.c_str(), MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) { + if (!MoveFileExA(tempfilename.c_str(), + path.c_str(), + MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)) { throw io_error(); } #elif defined(__unixish__) From ee3508c2f88e3a5f4c18c8649e1abae198e1c1f7 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Tue, 31 May 2022 11:19:02 -0700 Subject: [PATCH 23/49] Address review comments. Primarily, this changes Tenant creation to be done via a method call on Database. --- bindings/c/test/fdb_api.hpp | 89 +++++++++++++++++++---------------- bindings/c/test/mako/mako.cpp | 13 ++--- 2 files changed, 54 insertions(+), 48 deletions(-) diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index 50d497f7cf..5ceb7168bb 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -28,7 +28,6 @@ #include #include -#include #include #include #include @@ -507,6 +506,45 @@ public: } }; +class Tenant final { + friend class Database; + std::shared_ptr tenant; + + static constexpr CharsRef tenantManagementMapPrefix = "\xff\xff/management/tenant_map/"; + + explicit Tenant(native::FDBTenant* tenant_raw) { + if (tenant_raw) + tenant = std::shared_ptr(tenant_raw, &native::fdb_tenant_destroy); + } + +public: + Tenant(const Tenant&) noexcept = default; + Tenant& operator=(const Tenant&) noexcept = default; + Tenant() noexcept : tenant(nullptr) {} + + static void createTenant(Transaction tr, BytesRef name) { + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef()); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef()); + tr.set(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name))), + BytesRef()); + } + + static void deleteTenant(Transaction tr, BytesRef name) { + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef()); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_RAW_ACCESS, BytesRef()); + tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef()); + tr.clear(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name)))); + } + + Transaction createTransaction() { + auto tx_native = static_cast(nullptr); + auto err = Error(native::fdb_tenant_create_transaction(tenant.get(), &tx_native)); + if (err) + throwError("Failed to create transaction: ", err); + return Transaction(tx_native); + } +}; + class Database { friend class Tenant; std::shared_ptr db; @@ -548,6 +586,16 @@ public: } } + Tenant openTenant(BytesRef name) { + if (!db) + throw std::runtime_error("openTenant from null database"); + auto tenant_native = static_cast(nullptr); + if (auto err = Error(native::fdb_database_open_tenant(db.get(), name.data(), name.size(), &tenant_native))) { + throwError(fmt::format("Failed to open tenant with name '{}': ", toCharsRef(name)), err); + } + return Tenant(tenant_native); + } + Transaction createTransaction() { if (!db) throw std::runtime_error("create_transaction from null database"); @@ -559,45 +607,6 @@ public: } }; -class Tenant final { - std::shared_ptr tenant; - static constexpr CharsRef tenantManagementMapPrefix = "\xff\xff/management/tenant_map/"; - -public: - Tenant(const Tenant&) noexcept = default; - Tenant& operator=(const Tenant&) noexcept = default; - Tenant(fdb::Database* db, BytesRef name, int name_length) : tenant(nullptr) { - auto tenant_raw = static_cast(nullptr); - if (auto err = Error(native::fdb_database_open_tenant(db->db.get(), name.data(), name_length, &tenant_raw))) { - throwError(fmt::format("Failed to open tenant with name '{}': ", toCharsRef(name)), err); - } - tenant = std::shared_ptr(tenant_raw, &native::fdb_tenant_destroy); - } - Tenant() noexcept : tenant(nullptr) {} - - static void createTenant(Transaction tr, BytesRef name) { - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef()); - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef()); - tr.set(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name))), - toBytesRef(std::string(""))); - } - - static void deleteTenant(Transaction tr, BytesRef name) { - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef()); - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_RAW_ACCESS, BytesRef()); - tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef()); - tr.clear(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name)))); - } - - Transaction createTransaction() { - auto tx_native = static_cast(nullptr); - auto err = Error(native::fdb_tenant_create_transaction(tenant.get(), &tx_native)); - if (err) - throwError("Failed to create transaction: ", err); - return Transaction(tx_native); - } -}; - } // namespace fdb #endif /*FDB_API_HPP*/ diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 3e56bed426..41d55f7bf6 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -78,7 +78,7 @@ using namespace mako; thread_local Logger logr = Logger(MainProcess{}, VERBOSE_DEFAULT); -Transaction createNewTransaction(Database db, Arguments const& args, int id = -1, Tenant** tenants = nullptr) { +Transaction createNewTransaction(Database db, Arguments const& args, int id = -1, Tenant* tenants = nullptr) { // No tenants specified if (args.tenants <= 0) { return db.createTransaction(); @@ -87,11 +87,11 @@ Transaction createNewTransaction(Database db, Arguments const& args, int id = -1 int tenant_id = (id == -1) ? urand(0, args.tenants - 1) : id; // If provided tenants array (only necessary in runWorkload), use it if (tenants) { - return tenants[tenant_id]->createTransaction(); + return tenants[tenant_id].createTransaction(); } std::string tenantStr = "tenant" + std::to_string(tenant_id); BytesRef tenant_name = toBytesRef(tenantStr); - Tenant t(&db, tenant_name, tenant_name.length()); + Tenant t = db.openTenant(tenant_name); return t.createTransaction(); } @@ -435,11 +435,11 @@ int runWorkload(Database db, // mimic typical tenant usage: keep tenants in memory // and create transactions as needed - Tenant* tenants[args.tenants]; + Tenant tenants[args.tenants]; for (int i = 0; i < args.tenants; ++i) { std::string tenantStr = "tenant" + std::to_string(i); BytesRef tenant_name = toBytesRef(tenantStr); - tenants[i] = new Tenant(&db, tenant_name, tenant_name.length()); + tenants[i] = db.openTenant(tenant_name); } /* main transaction loop */ @@ -510,9 +510,6 @@ int runWorkload(Database db, xacts++; total_xacts++; } - for (int i = 0; i < args.tenants; ++i) { - delete tenants[i]; - } return rc; } From 526d8b217edd574afbed6b8b8f52287510fd4372 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Tue, 31 May 2022 11:37:52 -0700 Subject: [PATCH 24/49] Fix formatting issues --- bindings/c/test/fdb_api.hpp | 3 +-- bindings/c/test/mako/mako.cpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index 5ceb7168bb..529dd32344 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -525,8 +525,7 @@ public: static void createTenant(Transaction tr, BytesRef name) { tr.setOption(FDBTransactionOption::FDB_TR_OPTION_SPECIAL_KEY_SPACE_ENABLE_WRITES, BytesRef()); tr.setOption(FDBTransactionOption::FDB_TR_OPTION_LOCK_AWARE, BytesRef()); - tr.set(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name))), - BytesRef()); + tr.set(toBytesRef(fmt::format("{}{}", tenantManagementMapPrefix, toCharsRef(name))), BytesRef()); } static void deleteTenant(Transaction tr, BytesRef name) { diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 41d55f7bf6..daa60253da 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -91,7 +91,7 @@ Transaction createNewTransaction(Database db, Arguments const& args, int id = -1 } std::string tenantStr = "tenant" + std::to_string(tenant_id); BytesRef tenant_name = toBytesRef(tenantStr); - Tenant t = db.openTenant(tenant_name); + Tenant t = db.openTenant(tenant_name); return t.createTransaction(); } From 083fe01022d0b5057dd33c0c4e95837488015a29 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Thu, 2 Jun 2022 23:01:10 -0700 Subject: [PATCH 25/49] More uses of structured bindings to avoid use of .first and .second in pairs --- fdbserver/DDTeamCollection.actor.cpp | 100 +++++++++++++-------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/fdbserver/DDTeamCollection.actor.cpp b/fdbserver/DDTeamCollection.actor.cpp index f63c0d341b..f4a5175dce 100644 --- a/fdbserver/DDTeamCollection.actor.cpp +++ b/fdbserver/DDTeamCollection.actor.cpp @@ -3946,14 +3946,14 @@ void DDTeamCollection::traceMachineInfo() const { int i = 0; TraceEvent("MachineInfo").detail("Size", machine_info.size()); - for (auto& machine : machine_info) { + for (auto& [machineName, machineInfo] : machine_info) { TraceEvent("MachineInfo", distributorId) .detail("MachineInfoIndex", i++) - .detail("Healthy", isMachineHealthy(machine.second)) - .detail("MachineID", machine.first.contents().toString()) - .detail("MachineTeamOwned", machine.second->machineTeams.size()) - .detail("ServerNumOnMachine", machine.second->serversOnMachine.size()) - .detail("ServersID", machine.second->getServersIDStr()); + .detail("Healthy", isMachineHealthy(machineInfo)) + .detail("MachineID", machineName.contents().toString()) + .detail("MachineTeamOwned", machineInfo->machineTeams.size()) + .detail("ServerNumOnMachine", machineInfo->serversOnMachine.size()) + .detail("ServersID", machineInfo->getServersIDStr()); } } @@ -4196,20 +4196,20 @@ int DDTeamCollection::addBestMachineTeams(int machineTeamsToBuild) { Reference DDTeamCollection::findOneLeastUsedServer() const { std::vector> leastUsedServers; int minTeams = std::numeric_limits::max(); - for (auto& server : server_info) { + for (auto& [serverID, server] : server_info) { // Only pick healthy server, which is not failed or excluded. - if (server_status.get(server.first).isUnhealthy()) + if (server_status.get(serverID).isUnhealthy()) continue; - if (!isValidLocality(configuration.storagePolicy, server.second->getLastKnownInterface().locality)) + if (!isValidLocality(configuration.storagePolicy, server->getLastKnownInterface().locality)) continue; - int numTeams = server.second->getTeams().size(); + int numTeams = server->getTeams().size(); if (numTeams < minTeams) { minTeams = numTeams; leastUsedServers.clear(); } if (minTeams == numTeams) { - leastUsedServers.push_back(server.second); + leastUsedServers.push_back(server); } } @@ -4299,12 +4299,12 @@ int DDTeamCollection::calculateHealthyMachineCount() const { std::pair DDTeamCollection::calculateMinMaxServerTeamsOnServer() const { int64_t minTeams = std::numeric_limits::max(); int64_t maxTeams = 0; - for (auto& server : server_info) { - if (server_status.get(server.first).isUnhealthy()) { + for (auto& [serverID, server] : server_info) { + if (server_status.get(serverID).isUnhealthy()) { continue; } - minTeams = std::min((int64_t)server.second->getTeams().size(), minTeams); - maxTeams = std::max((int64_t)server.second->getTeams().size(), maxTeams); + minTeams = std::min((int64_t)server->getTeams().size(), minTeams); + maxTeams = std::max((int64_t)server->getTeams().size(), maxTeams); } return std::make_pair(minTeams, maxTeams); } @@ -4312,12 +4312,12 @@ std::pair DDTeamCollection::calculateMinMaxServerTeamsOnServer std::pair DDTeamCollection::calculateMinMaxMachineTeamsOnMachine() const { int64_t minTeams = std::numeric_limits::max(); int64_t maxTeams = 0; - for (auto& machine : machine_info) { - if (!isMachineHealthy(machine.second)) { + for (auto& [_, machine] : machine_info) { + if (!isMachineHealthy(machine)) { continue; } - minTeams = std::min((int64_t)machine.second->machineTeams.size(), minTeams); - maxTeams = std::max((int64_t)machine.second->machineTeams.size(), maxTeams); + minTeams = std::min((int64_t)machine->machineTeams.size(), minTeams); + maxTeams = std::max((int64_t)machine->machineTeams.size(), maxTeams); } return std::make_pair(minTeams, maxTeams); } @@ -4581,8 +4581,8 @@ int DDTeamCollection::addTeamsBestOf(int teamsToBuild, int desiredTeams, int max healthyMachineTeamCount = getHealthyMachineTeamCount(); - std::pair minMaxTeamsOnServer = calculateMinMaxServerTeamsOnServer(); - std::pair minMaxMachineTeamsOnMachine = calculateMinMaxMachineTeamsOnMachine(); + auto [minTeamsOnServer, maxTeamsOnServer] = calculateMinMaxServerTeamsOnServer(); + auto [minMachineTeamsOnMachine, maxMachineTeamsOnMachine] = calculateMinMaxMachineTeamsOnMachine(); TraceEvent("TeamCollectionInfo", distributorId) .detail("Primary", primary) @@ -4597,10 +4597,10 @@ int DDTeamCollection::addTeamsBestOf(int teamsToBuild, int desiredTeams, int max .detail("DesiredMachineTeams", desiredMachineTeams) .detail("MaxMachineTeams", maxMachineTeams) .detail("TotalHealthyMachines", totalHealthyMachineCount) - .detail("MinTeamsOnServer", minMaxTeamsOnServer.first) - .detail("MaxTeamsOnServer", minMaxTeamsOnServer.second) - .detail("MinMachineTeamsOnMachine", minMaxMachineTeamsOnMachine.first) - .detail("MaxMachineTeamsOnMachine", minMaxMachineTeamsOnMachine.second) + .detail("MinTeamsOnServer", minTeamsOnServer) + .detail("MaxTeamsOnServer", maxTeamsOnServer) + .detail("MinMachineTeamsOnMachine", minMachineTeamsOnMachine) + .detail("MaxMachineTeamsOnMachine", maxMachineTeamsOnMachine) .detail("DoBuildTeams", doBuildTeams) .trackLatest(teamCollectionInfoEventHolder->trackingKey); @@ -4617,8 +4617,8 @@ void DDTeamCollection::traceTeamCollectionInfo() const { int maxMachineTeams = SERVER_KNOBS->MAX_TEAMS_PER_SERVER * totalHealthyMachineCount; int healthyMachineTeamCount = getHealthyMachineTeamCount(); - std::pair minMaxTeamsOnServer = calculateMinMaxServerTeamsOnServer(); - std::pair minMaxMachineTeamsOnMachine = calculateMinMaxMachineTeamsOnMachine(); + auto [minTeamsOnServer, maxTeamsOnServer] = calculateMinMaxServerTeamsOnServer(); + auto [minMachineTeamsOnMachine, maxMachineTeamsOnMachine] = calculateMinMaxMachineTeamsOnMachine(); TraceEvent("TeamCollectionInfo", distributorId) .detail("Primary", primary) @@ -4633,10 +4633,10 @@ void DDTeamCollection::traceTeamCollectionInfo() const { .detail("DesiredMachineTeams", desiredMachineTeams) .detail("MaxMachineTeams", maxMachineTeams) .detail("TotalHealthyMachines", totalHealthyMachineCount) - .detail("MinTeamsOnServer", minMaxTeamsOnServer.first) - .detail("MaxTeamsOnServer", minMaxTeamsOnServer.second) - .detail("MinMachineTeamsOnMachine", minMaxMachineTeamsOnMachine.first) - .detail("MaxMachineTeamsOnMachine", minMaxMachineTeamsOnMachine.second) + .detail("MinTeamsOnServer", minTeamsOnServer) + .detail("MaxTeamsOnServer", maxTeamsOnServer) + .detail("MinMachineTeamsOnMachine", minMachineTeamsOnMachine) + .detail("MaxMachineTeamsOnMachine", maxMachineTeamsOnMachine) .detail("DoBuildTeams", doBuildTeams) .trackLatest(teamCollectionInfoEventHolder->trackingKey); @@ -5370,11 +5370,11 @@ public: wait(collection->getTeam(req)); - std::pair>, bool> resTeam = req.reply.getFuture().get(); + auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); std::set expectedServers{ UID(1, 0), UID(2, 0), UID(3, 0) }; - ASSERT(resTeam.first.present()); - auto servers = resTeam.first.get()->getServerIDs(); + ASSERT(resTeam.present()); + auto servers = resTeam.get()->getServerIDs(); const std::set selectedServers(servers.begin(), servers.end()); ASSERT(expectedServers == selectedServers); @@ -5422,11 +5422,11 @@ public: wait(collection->getTeam(req)); - std::pair>, bool> resTeam = req.reply.getFuture().get(); + auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); std::set expectedServers{ UID(2, 0), UID(3, 0), UID(4, 0) }; - ASSERT(resTeam.first.present()); - auto servers = resTeam.first.get()->getServerIDs(); + ASSERT(resTeam.present()); + auto servers = resTeam.get()->getServerIDs(); const std::set selectedServers(servers.begin(), servers.end()); ASSERT(expectedServers == selectedServers); @@ -5472,11 +5472,11 @@ public: wait(collection->getTeam(req)); - std::pair>, bool> resTeam = req.reply.getFuture().get(); + auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); std::set expectedServers{ UID(2, 0), UID(3, 0), UID(4, 0) }; - ASSERT(resTeam.first.present()); - auto servers = resTeam.first.get()->getServerIDs(); + ASSERT(resTeam.present()); + auto servers = resTeam.get()->getServerIDs(); const std::set selectedServers(servers.begin(), servers.end()); ASSERT(expectedServers == selectedServers); @@ -5521,11 +5521,11 @@ public: wait(collection->getTeam(req)); - std::pair>, bool> resTeam = req.reply.getFuture().get(); + auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); std::set expectedServers{ UID(1, 0), UID(2, 0), UID(3, 0) }; - ASSERT(resTeam.first.present()); - auto servers = resTeam.first.get()->getServerIDs(); + ASSERT(resTeam.present()); + auto servers = resTeam.get()->getServerIDs(); const std::set selectedServers(servers.begin(), servers.end()); ASSERT(expectedServers == selectedServers); @@ -5572,9 +5572,9 @@ public: wait(collection->getTeam(req)); - std::pair>, bool> resTeam = req.reply.getFuture().get(); + auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); - ASSERT(!resTeam.first.present()); + ASSERT(!resTeam.present()); return Void(); } @@ -5628,9 +5628,9 @@ public: wait(collection->getTeam(req)); - std::pair>, bool> resTeam = req.reply.getFuture().get(); + auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); - ASSERT(!resTeam.first.present()); + ASSERT(!resTeam.present()); return Void(); } @@ -5746,11 +5746,11 @@ public: wait(collection->getTeam(req)); - std::pair>, bool> resTeam = req.reply.getFuture().get(); + auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); std::set expectedServers{ UID(1, 0), UID(2, 0), UID(3, 0) }; - ASSERT(resTeam.first.present()); - auto servers = resTeam.first.get()->getServerIDs(); + ASSERT(resTeam.present()); + auto servers = resTeam.get()->getServerIDs(); const std::set selectedServers(servers.begin(), servers.end()); ASSERT(expectedServers == selectedServers); From d41ec69b23bb501a6940949bd7e31f48b48cdae1 Mon Sep 17 00:00:00 2001 From: Marian Dvorsky Date: Fri, 3 Jun 2022 11:55:33 +0200 Subject: [PATCH 26/49] Remove TesterApiWrapper, replace its uses with fdb_api.hpp --- bindings/c/CMakeLists.txt | 6 +- .../c/test/apitester/TesterApiWorkload.cpp | 43 +-- bindings/c/test/apitester/TesterApiWorkload.h | 14 +- .../c/test/apitester/TesterApiWrapper.cpp | 255 ------------------ bindings/c/test/apitester/TesterApiWrapper.h | 129 --------- .../TesterBlobGranuleCorrectnessWorkload.cpp | 114 ++++++-- .../TesterCancelTransactionWorkload.cpp | 24 +- .../apitester/TesterCorrectnessWorkload.cpp | 37 +-- .../c/test/apitester/TesterKeyValueStore.cpp | 49 ++-- .../c/test/apitester/TesterKeyValueStore.h | 22 +- .../apitester/TesterTransactionExecutor.cpp | 187 +++++++------ .../apitester/TesterTransactionExecutor.h | 22 +- bindings/c/test/apitester/TesterUtil.cpp | 8 +- bindings/c/test/apitester/TesterUtil.h | 13 +- bindings/c/test/apitester/TesterWorkload.cpp | 8 +- .../c/test/apitester/fdb_c_api_tester.cpp | 50 ++-- bindings/c/test/fdb_api.hpp | 165 +++++++++++- bindings/c/test/mako/operations.cpp | 12 +- 18 files changed, 486 insertions(+), 672 deletions(-) delete mode 100644 bindings/c/test/apitester/TesterApiWrapper.cpp delete mode 100644 bindings/c/test/apitester/TesterApiWrapper.h diff --git a/bindings/c/CMakeLists.txt b/bindings/c/CMakeLists.txt index f9e6631849..003fe7f684 100644 --- a/bindings/c/CMakeLists.txt +++ b/bindings/c/CMakeLists.txt @@ -121,8 +121,6 @@ if(NOT WIN32) test/apitester/fdb_c_api_tester.cpp test/apitester/TesterApiWorkload.cpp test/apitester/TesterApiWorkload.h - test/apitester/TesterApiWrapper.cpp - test/apitester/TesterApiWrapper.h test/apitester/TesterTestSpec.cpp test/apitester/TesterTestSpec.h test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp @@ -191,9 +189,9 @@ if(NOT WIN32) target_link_libraries(disconnected_timeout_unit_tests PRIVATE fdb_c Threads::Threads) if(USE_SANITIZER) - target_link_libraries(fdb_c_api_tester PRIVATE fdb_c toml11_target Threads::Threads fmt::fmt boost_asan) + target_link_libraries(fdb_c_api_tester PRIVATE fdb_c fdb_cpp toml11_target Threads::Threads fmt::fmt boost_asan) else() - target_link_libraries(fdb_c_api_tester PRIVATE fdb_c toml11_target Threads::Threads fmt::fmt boost_target) + target_link_libraries(fdb_c_api_tester PRIVATE fdb_c fdb_cpp toml11_target Threads::Threads fmt::fmt boost_target) endif() # do not set RPATH for mako diff --git a/bindings/c/test/apitester/TesterApiWorkload.cpp b/bindings/c/test/apitester/TesterApiWorkload.cpp index b172e9d1ff..cd45e1b759 100644 --- a/bindings/c/test/apitester/TesterApiWorkload.cpp +++ b/bindings/c/test/apitester/TesterApiWorkload.cpp @@ -20,6 +20,7 @@ #include "TesterApiWorkload.h" #include "TesterUtil.h" +#include "test/fdb_api.hpp" #include namespace FdbApiTester { @@ -35,7 +36,7 @@ ApiWorkload::ApiWorkload(const WorkloadConfig& config) : WorkloadBase(config) { runUntilStop = config.getBoolOption("runUntilStop", false); numRandomOperations = config.getIntOption("numRandomOperations", 1000); numOperationsForProgressCheck = config.getIntOption("numOperationsForProgressCheck", 10); - keyPrefix = fmt::format("{}/", workloadId); + keyPrefix = fdb::toBytesRef(fmt::format("{}/", workloadId)); numRandomOpLeft = 0; stopReceived = false; checkingProgress = false; @@ -105,26 +106,26 @@ void ApiWorkload::randomOperation(TTaskFct cont) { ASSERT(false); } -std::string ApiWorkload::randomKeyName() { +fdb::Key ApiWorkload::randomKeyName() { return keyPrefix + Random::get().randomStringLowerCase(minKeyLength, maxKeyLength); } -std::string ApiWorkload::randomValue() { +fdb::Value ApiWorkload::randomValue() { return Random::get().randomStringLowerCase(minValueLength, maxValueLength); } -std::string ApiWorkload::randomNotExistingKey() { +fdb::Key ApiWorkload::randomNotExistingKey() { while (true) { - std::string key = randomKeyName(); + fdb::Key key = randomKeyName(); if (!store.exists(key)) { return key; } } } -std::string ApiWorkload::randomExistingKey() { - std::string genKey = randomKeyName(); - std::string key = store.getKey(genKey, true, 1); +fdb::Key ApiWorkload::randomExistingKey() { + fdb::Key genKey = randomKeyName(); + fdb::Key key = store.getKey(genKey, true, 1); if (key != store.endKey()) { return key; } @@ -136,7 +137,7 @@ std::string ApiWorkload::randomExistingKey() { return genKey; } -std::string ApiWorkload::randomKey(double existingKeyRatio) { +fdb::Key ApiWorkload::randomKey(double existingKeyRatio) { if (Random::get().randomBool(existingKeyRatio)) { return randomExistingKey(); } else { @@ -146,19 +147,19 @@ std::string ApiWorkload::randomKey(double existingKeyRatio) { void ApiWorkload::populateDataTx(TTaskFct cont) { int numKeys = maxKeysPerTransaction; - auto kvPairs = std::make_shared>(); + auto kvPairs = std::make_shared>(); for (int i = 0; i < numKeys; i++) { - kvPairs->push_back(KeyValue{ randomNotExistingKey(), randomValue() }); + kvPairs->push_back(fdb::KeyValue{ randomNotExistingKey(), randomValue() }); } execTransaction( [kvPairs](auto ctx) { - for (const KeyValue& kv : *kvPairs) { + for (const fdb::KeyValue& kv : *kvPairs) { ctx->tx()->set(kv.key, kv.value); } ctx->commit(); }, [this, kvPairs, cont]() { - for (const KeyValue& kv : *kvPairs) { + for (const fdb::KeyValue& kv : *kvPairs) { store.set(kv.key, kv.value); } schedule(cont); @@ -168,7 +169,7 @@ void ApiWorkload::populateDataTx(TTaskFct cont) { void ApiWorkload::clearData(TTaskFct cont) { execTransaction( [this](auto ctx) { - ctx->tx()->clearRange(keyPrefix, fmt::format("{}\xff", keyPrefix)); + ctx->tx()->clearRange(keyPrefix, keyPrefix + fdb::Key(1, '\xff')); ctx->commit(); }, [this, cont]() { schedule(cont); }); @@ -185,19 +186,19 @@ void ApiWorkload::populateData(TTaskFct cont) { void ApiWorkload::randomInsertOp(TTaskFct cont) { int numKeys = Random::get().randomInt(1, maxKeysPerTransaction); - auto kvPairs = std::make_shared>(); + auto kvPairs = std::make_shared>(); for (int i = 0; i < numKeys; i++) { - kvPairs->push_back(KeyValue{ randomNotExistingKey(), randomValue() }); + kvPairs->push_back(fdb::KeyValue{ randomNotExistingKey(), randomValue() }); } execTransaction( [kvPairs](auto ctx) { - for (const KeyValue& kv : *kvPairs) { + for (const fdb::KeyValue& kv : *kvPairs) { ctx->tx()->set(kv.key, kv.value); } ctx->commit(); }, [this, kvPairs, cont]() { - for (const KeyValue& kv : *kvPairs) { + for (const fdb::KeyValue& kv : *kvPairs) { store.set(kv.key, kv.value); } schedule(cont); @@ -206,7 +207,7 @@ void ApiWorkload::randomInsertOp(TTaskFct cont) { void ApiWorkload::randomClearOp(TTaskFct cont) { int numKeys = Random::get().randomInt(1, maxKeysPerTransaction); - auto keys = std::make_shared>(); + auto keys = std::make_shared>(); for (int i = 0; i < numKeys; i++) { keys->push_back(randomExistingKey()); } @@ -226,8 +227,8 @@ void ApiWorkload::randomClearOp(TTaskFct cont) { } void ApiWorkload::randomClearRangeOp(TTaskFct cont) { - std::string begin = randomKeyName(); - std::string end = randomKeyName(); + fdb::Key begin = randomKeyName(); + fdb::Key end = randomKeyName(); if (begin > end) { std::swap(begin, end); } diff --git a/bindings/c/test/apitester/TesterApiWorkload.h b/bindings/c/test/apitester/TesterApiWorkload.h index 1151737e1d..fd3630ceee 100644 --- a/bindings/c/test/apitester/TesterApiWorkload.h +++ b/bindings/c/test/apitester/TesterApiWorkload.h @@ -94,7 +94,7 @@ protected: std::atomic numRandomOpLeft; // Key prefix - std::string keyPrefix; + fdb::Key keyPrefix; // In-memory store maintaining expected database state KeyValueStore store; @@ -102,11 +102,11 @@ protected: ApiWorkload(const WorkloadConfig& config); // Methods for generating random keys and values - std::string randomKeyName(); - std::string randomValue(); - std::string randomNotExistingKey(); - std::string randomExistingKey(); - std::string randomKey(double existingKeyRatio); + fdb::Key randomKeyName(); + fdb::Value randomValue(); + fdb::Key randomNotExistingKey(); + fdb::Key randomExistingKey(); + fdb::Key randomKey(double existingKeyRatio); // Generate initial random data for the workload void populateData(TTaskFct cont); @@ -127,4 +127,4 @@ private: } // namespace FdbApiTester -#endif \ No newline at end of file +#endif diff --git a/bindings/c/test/apitester/TesterApiWrapper.cpp b/bindings/c/test/apitester/TesterApiWrapper.cpp deleted file mode 100644 index a018126ca1..0000000000 --- a/bindings/c/test/apitester/TesterApiWrapper.cpp +++ /dev/null @@ -1,255 +0,0 @@ -/* - * TesterApiWrapper.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "TesterApiWrapper.h" -#include -#include -#include - -namespace FdbApiTester { - -namespace { - -void fdb_check(fdb_error_t e) { - if (e) { - fmt::print(stderr, "Unexpected error: {}\n", fdb_get_error(e)); - std::abort(); - } -} - -} // namespace - -Future::Future(FDBFuture* f) : future_(f, fdb_future_destroy) {} - -void Future::reset() { - future_.reset(); -} - -void Future::cancel() { - ASSERT(future_); - fdb_future_cancel(future_.get()); -} - -fdb_error_t Future::getError() const { - ASSERT(future_); - return fdb_future_get_error(future_.get()); -} - -std::optional ValueFuture::getValue() const { - ASSERT(future_); - int out_present; - const std::uint8_t* val; - int vallen; - fdb_check(fdb_future_get_value(future_.get(), &out_present, &val, &vallen)); - return out_present ? std::make_optional(std::string((const char*)val, vallen)) : std::nullopt; -} - -std::vector KeyRangesFuture::getKeyRanges() const { - ASSERT(future_); - - int count; - const FDBKeyRange* ranges; - - fdb_check(fdb_future_get_keyrange_array(future_.get(), &ranges, &count)); - std::vector result; - result.reserve(count); - for (int i = 0; i < count; i++) { - FDBKeyRange kr = *ranges++; - KeyValue rkv; - rkv.key = std::string((const char*)kr.begin_key, kr.begin_key_length); - rkv.value = std::string((const char*)kr.end_key, kr.end_key_length); - result.push_back(rkv); - } - - return result; -} - -Result::Result(FDBResult* r) : result_(r, fdb_result_destroy) {} - -std::vector KeyValuesResult::getKeyValues(bool* more_out) { - ASSERT(result_); - - int count; - const FDBKeyValue* kvs; - int more; - - std::vector result; - - error_ = fdb_result_get_keyvalue_array(result_.get(), &kvs, &count, &more); - - if (error_ != error_code_success) { - return result; - } - - result.reserve(count); - for (int i = 0; i < count; i++) { - FDBKeyValue kv = *kvs++; - KeyValue rkv; - rkv.key = std::string((const char*)kv.key, kv.key_length); - rkv.value = std::string((const char*)kv.value, kv.value_length); - result.push_back(rkv); - } - *more_out = more; - - return result; -} - -// Given an FDBDatabase, initializes a new transaction. -Transaction::Transaction(FDBTransaction* tx) : tx_(tx, fdb_transaction_destroy) {} - -ValueFuture Transaction::get(std::string_view key, fdb_bool_t snapshot) { - ASSERT(tx_); - return ValueFuture(fdb_transaction_get(tx_.get(), (const uint8_t*)key.data(), key.size(), snapshot)); -} - -void Transaction::set(std::string_view key, std::string_view value) { - ASSERT(tx_); - fdb_transaction_set(tx_.get(), (const uint8_t*)key.data(), key.size(), (const uint8_t*)value.data(), value.size()); -} - -void Transaction::clear(std::string_view key) { - ASSERT(tx_); - fdb_transaction_clear(tx_.get(), (const uint8_t*)key.data(), key.size()); -} - -void Transaction::clearRange(std::string_view begin, std::string_view end) { - ASSERT(tx_); - fdb_transaction_clear_range( - tx_.get(), (const uint8_t*)begin.data(), begin.size(), (const uint8_t*)end.data(), end.size()); -} - -Future Transaction::commit() { - ASSERT(tx_); - return Future(fdb_transaction_commit(tx_.get())); -} - -void Transaction::cancel() { - ASSERT(tx_); - fdb_transaction_cancel(tx_.get()); -} - -Future Transaction::onError(fdb_error_t err) { - ASSERT(tx_); - return Future(fdb_transaction_on_error(tx_.get(), err)); -} - -void Transaction::reset() { - ASSERT(tx_); - fdb_transaction_reset(tx_.get()); -} - -fdb_error_t Transaction::setOption(FDBTransactionOption option) { - ASSERT(tx_); - return fdb_transaction_set_option(tx_.get(), option, reinterpret_cast(""), 0); -} - -class TesterGranuleContext { -public: - std::unordered_map loadsInProgress; - int64_t nextId = 0; - std::string basePath; - - ~TesterGranuleContext() { - // if there was an error or not all loads finished, delete data - for (auto& it : loadsInProgress) { - uint8_t* dataToFree = it.second; - delete[] dataToFree; - } - } -}; - -static int64_t granule_start_load(const char* filename, - int filenameLength, - int64_t offset, - int64_t length, - int64_t fullFileLength, - void* context) { - - TesterGranuleContext* ctx = (TesterGranuleContext*)context; - int64_t loadId = ctx->nextId++; - - uint8_t* buffer = new uint8_t[length]; - std::ifstream fin(ctx->basePath + std::string(filename, filenameLength), std::ios::in | std::ios::binary); - fin.seekg(offset); - fin.read((char*)buffer, length); - - ctx->loadsInProgress.insert({ loadId, buffer }); - - return loadId; -} - -static uint8_t* granule_get_load(int64_t loadId, void* context) { - TesterGranuleContext* ctx = (TesterGranuleContext*)context; - return ctx->loadsInProgress.at(loadId); -} - -static void granule_free_load(int64_t loadId, void* context) { - TesterGranuleContext* ctx = (TesterGranuleContext*)context; - auto it = ctx->loadsInProgress.find(loadId); - uint8_t* dataToFree = it->second; - delete[] dataToFree; - - ctx->loadsInProgress.erase(it); -} - -KeyValuesResult Transaction::readBlobGranules(std::string_view begin, - std::string_view end, - const std::string& basePath) { - ASSERT(tx_); - - TesterGranuleContext testerContext; - testerContext.basePath = basePath; - - FDBReadBlobGranuleContext granuleContext; - granuleContext.userContext = &testerContext; - granuleContext.debugNoMaterialize = false; - granuleContext.granuleParallelism = 1; - granuleContext.start_load_f = &granule_start_load; - granuleContext.get_load_f = &granule_get_load; - granuleContext.free_load_f = &granule_free_load; - - return KeyValuesResult(fdb_transaction_read_blob_granules(tx_.get(), - (const uint8_t*)begin.data(), - begin.size(), - (const uint8_t*)end.data(), - end.size(), - 0 /* beginVersion */, - -2 /* latest read version */, - granuleContext)); -} - -KeyRangesFuture Transaction::getBlobGranuleRanges(std::string_view begin, std::string_view end) { - ASSERT(tx_); - return KeyRangesFuture(fdb_transaction_get_blob_granule_ranges( - tx_.get(), (const uint8_t*)begin.data(), begin.size(), (const uint8_t*)end.data(), end.size())); -} - -fdb_error_t FdbApi::setOption(FDBNetworkOption option, std::string_view value) { - return fdb_network_set_option(option, reinterpret_cast(value.data()), value.size()); -} - -fdb_error_t FdbApi::setOption(FDBNetworkOption option, int64_t value) { - return fdb_network_set_option(option, reinterpret_cast(&value), sizeof(value)); -} - -fdb_error_t FdbApi::setOption(FDBNetworkOption option) { - return fdb_network_set_option(option, reinterpret_cast(""), 0); -} - -} // namespace FdbApiTester \ No newline at end of file diff --git a/bindings/c/test/apitester/TesterApiWrapper.h b/bindings/c/test/apitester/TesterApiWrapper.h deleted file mode 100644 index c987901bd4..0000000000 --- a/bindings/c/test/apitester/TesterApiWrapper.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * TesterApiWrapper.h - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#ifndef APITESTER_API_WRAPPER_H -#define APITESTER_API_WRAPPER_H - -#include -#include -#include -#include - -#define FDB_API_VERSION 720 -#include "bindings/c/foundationdb/fdb_c.h" - -#undef ERROR -#define ERROR(name, number, description) enum { error_code_##name = number }; - -#include "flow/error_definitions.h" - -#include "TesterUtil.h" - -namespace FdbApiTester { - -// Wrapper parent class to manage memory of an FDBFuture pointer. Cleans up -// FDBFuture when this instance goes out of scope. -class Future { -public: - Future() = default; - Future(FDBFuture* f); - - FDBFuture* fdbFuture() { return future_.get(); }; - - fdb_error_t getError() const; - explicit operator bool() const { return future_ != nullptr; }; - void reset(); - void cancel(); - -protected: - std::shared_ptr future_; -}; - -class ValueFuture : public Future { -public: - ValueFuture() = default; - ValueFuture(FDBFuture* f) : Future(f) {} - std::optional getValue() const; -}; - -class KeyRangesFuture : public Future { -public: - KeyRangesFuture() = default; - KeyRangesFuture(FDBFuture* f) : Future(f) {} - std::vector getKeyRanges() const; -}; - -class Result { -public: - Result() = default; - Result(FDBResult* r); - - FDBResult* fdbResult() { return result_.get(); }; - - fdb_error_t getError() const { return error_; } - - explicit operator bool() const { return result_ != nullptr; }; - - fdb_error_t error_ = error_code_client_invalid_operation; // have to call getX function to set this - -protected: - std::shared_ptr result_; -}; - -class KeyValuesResult : public Result { -public: - KeyValuesResult() = default; - KeyValuesResult(FDBResult* f) : Result(f) {} - std::vector getKeyValues(bool* more_out); -}; - -class Transaction { -public: - Transaction() = default; - Transaction(FDBTransaction* tx); - ValueFuture get(std::string_view key, fdb_bool_t snapshot); - void set(std::string_view key, std::string_view value); - void clear(std::string_view key); - void clearRange(std::string_view begin, std::string_view end); - Future commit(); - void cancel(); - Future onError(fdb_error_t err); - void reset(); - fdb_error_t setOption(FDBTransactionOption option); - - KeyValuesResult readBlobGranules(std::string_view begin, std::string_view end, const std::string& basePath); - KeyRangesFuture getBlobGranuleRanges(std::string_view begin, std::string_view end); - -private: - std::shared_ptr tx_; -}; - -class FdbApi { -public: - static fdb_error_t setOption(FDBNetworkOption option, std::string_view value); - static fdb_error_t setOption(FDBNetworkOption option, int64_t value); - static fdb_error_t setOption(FDBNetworkOption option); -}; - -} // namespace FdbApiTester - -#endif \ No newline at end of file diff --git a/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp b/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp index 33d3bf7c60..f731c050e0 100644 --- a/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp +++ b/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp @@ -24,6 +24,55 @@ namespace FdbApiTester { +class TesterGranuleContext { +public: + std::unordered_map loadsInProgress; + int64_t nextId = 0; + std::string basePath; + + ~TesterGranuleContext() { + // if there was an error or not all loads finished, delete data + for (auto& it : loadsInProgress) { + uint8_t* dataToFree = it.second; + delete[] dataToFree; + } + } +}; + +static int64_t granule_start_load(const char* filename, + int filenameLength, + int64_t offset, + int64_t length, + int64_t fullFileLength, + void* context) { + + TesterGranuleContext* ctx = (TesterGranuleContext*)context; + int64_t loadId = ctx->nextId++; + + uint8_t* buffer = new uint8_t[length]; + std::ifstream fin(ctx->basePath + std::string(filename, filenameLength), std::ios::in | std::ios::binary); + fin.seekg(offset); + fin.read((char*)buffer, length); + + ctx->loadsInProgress.insert({ loadId, buffer }); + + return loadId; +} + +static uint8_t* granule_get_load(int64_t loadId, void* context) { + TesterGranuleContext* ctx = (TesterGranuleContext*)context; + return ctx->loadsInProgress.at(loadId); +} + +static void granule_free_load(int64_t loadId, void* context) { + TesterGranuleContext* ctx = (TesterGranuleContext*)context; + auto it = ctx->loadsInProgress.find(loadId); + uint8_t* dataToFree = it->second; + delete[] dataToFree; + + ctx->loadsInProgress.erase(it); +} + class ApiBlobGranuleCorrectnessWorkload : public ApiWorkload { public: ApiBlobGranuleCorrectnessWorkload(const WorkloadConfig& config) : ApiWorkload(config) { @@ -42,9 +91,9 @@ private: bool seenReadSuccess = false; void randomReadOp(TTaskFct cont) { - std::string begin = randomKeyName(); - std::string end = randomKeyName(); - auto results = std::make_shared>(); + fdb::Key begin = randomKeyName(); + fdb::Key end = randomKeyName(); + auto results = std::make_shared>(); auto tooOld = std::make_shared(false); if (begin > end) { std::swap(begin, end); @@ -52,18 +101,31 @@ private: execTransaction( [this, begin, end, results, tooOld](auto ctx) { ctx->tx()->setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE); - KeyValuesResult res = ctx->tx()->readBlobGranules(begin, end, ctx->getBGBasePath()); - bool more = false; - (*results) = res.getKeyValues(&more); - if (res.getError() == error_code_blob_granule_transaction_too_old) { + TesterGranuleContext testerContext; + testerContext.basePath = ctx->getBGBasePath(); + + fdb::native::FDBReadBlobGranuleContext granuleContext; + granuleContext.userContext = &testerContext; + granuleContext.debugNoMaterialize = false; + granuleContext.granuleParallelism = 1; + granuleContext.start_load_f = &granule_start_load; + granuleContext.get_load_f = &granule_get_load; + granuleContext.free_load_f = &granule_free_load; + + fdb::Result res = ctx->tx()->readBlobGranules( + begin, end, 0 /* beginVersion */, -2 /* latest read version */, granuleContext); + auto out = fdb::Result::NativeKeyValueArray{}; + fdb::Error err = res.getKeyValueArrayNothrow(out); + if (err.code() == error_code_blob_granule_transaction_too_old) { info("BlobGranuleCorrectness::randomReadOp bg too old\n"); ASSERT(!seenReadSuccess); *tooOld = true; ctx->done(); - } else if (res.getError() != error_code_success) { - ctx->onError(res.getError()); + } else if (err.code() != error_code_success) { + ctx->onError(err); } else { - ASSERT(!more); + auto& [out_kv, out_count, out_more] = out; + ASSERT(!out_more); if (!seenReadSuccess) { info("BlobGranuleCorrectness::randomReadOp first success\n"); } @@ -73,7 +135,7 @@ private: }, [this, begin, end, results, tooOld, cont]() { if (!*tooOld) { - std::vector expected = store.getRange(begin, end, store.size(), false); + std::vector expected = store.getRange(begin, end, store.size(), false); if (results->size() != expected.size()) { error(fmt::format("randomReadOp result size mismatch. expected: {} actual: {}", expected.size(), @@ -86,8 +148,8 @@ private: error(fmt::format("randomReadOp key mismatch at {}/{}. expected: {} actual: {}", i, results->size(), - expected[i].key, - (*results)[i].key)); + fdb::toCharsRef(expected[i].key), + fdb::toCharsRef((*results)[i].key))); } ASSERT((*results)[i].key == expected[i].key); @@ -96,9 +158,9 @@ private: "randomReadOp value mismatch at {}/{}. key: {} expected: {:.80} actual: {:.80}", i, results->size(), - expected[i].key, - expected[i].value, - (*results)[i].value)); + fdb::toCharsRef(expected[i].key), + fdb::toCharsRef(expected[i].value), + fdb::toCharsRef((*results)[i].value))); } ASSERT((*results)[i].value == expected[i].value); } @@ -108,19 +170,19 @@ private: } void randomGetRangesOp(TTaskFct cont) { - std::string begin = randomKeyName(); - std::string end = randomKeyName(); - auto results = std::make_shared>(); + fdb::Key begin = randomKeyName(); + fdb::Key end = randomKeyName(); + auto results = std::make_shared>(); if (begin > end) { std::swap(begin, end); } execTransaction( [begin, end, results](auto ctx) { - KeyRangesFuture f = ctx->tx()->getBlobGranuleRanges(begin, end); + fdb::Future f = ctx->tx()->getBlobGranuleRanges(begin, end).eraseType(); ctx->continueAfter( f, [ctx, f, results]() { - (*results) = f.getKeyRanges(); + *results = f.get(); ctx->done(); }, true); @@ -128,18 +190,18 @@ private: [this, begin, end, results, cont]() { if (seenReadSuccess) { ASSERT(results->size() > 0); - ASSERT(results->front().key <= begin); - ASSERT(results->back().value >= end); + ASSERT(results->front().beginKey <= begin); + ASSERT(results->back().endKey >= end); } for (int i = 0; i < results->size(); i++) { // no empty or inverted ranges - ASSERT((*results)[i].key < (*results)[i].value); + ASSERT((*results)[i].beginKey < (*results)[i].endKey); } for (int i = 1; i < results->size(); i++) { // ranges contain entire requested key range - ASSERT((*results)[i].key == (*results)[i - 1].value); + ASSERT((*results)[i].beginKey == (*results)[i - 1].endKey); } schedule(cont); @@ -174,4 +236,4 @@ private: WorkloadFactory ApiBlobGranuleCorrectnessWorkloadFactory( "ApiBlobGranuleCorrectness"); -} // namespace FdbApiTester \ No newline at end of file +} // namespace FdbApiTester diff --git a/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp b/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp index f77918cce3..4f287aec90 100644 --- a/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp +++ b/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp @@ -19,6 +19,7 @@ */ #include "TesterApiWorkload.h" #include "TesterUtil.h" +#include "test/fdb_api.hpp" namespace FdbApiTester { @@ -32,15 +33,15 @@ private: // Start multiple concurrent gets and cancel the transaction void randomCancelGetTx(TTaskFct cont) { int numKeys = Random::get().randomInt(1, maxKeysPerTransaction); - auto keys = std::make_shared>(); + auto keys = std::make_shared>(); for (int i = 0; i < numKeys; i++) { keys->push_back(randomKey(readExistingKeysRatio)); } execTransaction( [keys](auto ctx) { - std::vector futures; + std::vector futures; for (const auto& key : *keys) { - futures.push_back(ctx->tx()->get(key, false)); + futures.push_back(ctx->tx()->get(key, false).eraseType()); } ctx->done(); }, @@ -50,24 +51,25 @@ private: // Start multiple concurrent gets and cancel the transaction after the first get returns void randomCancelAfterFirstResTx(TTaskFct cont) { int numKeys = Random::get().randomInt(1, maxKeysPerTransaction); - auto keys = std::make_shared>(); + auto keys = std::make_shared>(); for (int i = 0; i < numKeys; i++) { keys->push_back(randomKey(readExistingKeysRatio)); } execTransaction( [this, keys](auto ctx) { - std::vector futures; + std::vector futures; for (const auto& key : *keys) { - futures.push_back(ctx->tx()->get(key, false)); + futures.push_back(ctx->tx()->get(key, false).eraseType()); } for (int i = 0; i < keys->size(); i++) { - ValueFuture f = futures[i]; + fdb::Future f = futures[i]; auto expectedVal = store.get((*keys)[i]); ctx->continueAfter(f, [expectedVal, f, this, ctx]() { - auto val = f.getValue(); + auto val = f.get(); if (expectedVal != val) { - error(fmt::format( - "cancelAfterFirstResTx mismatch. expected: {:.80} actual: {:.80}", expectedVal, val)); + error(fmt::format("cancelAfterFirstResTx mismatch. expected: {:.80} actual: {:.80}", + fdb::toCharsRef(expectedVal.value()), + fdb::toCharsRef(val.value()))); } ctx->done(); }); @@ -91,4 +93,4 @@ private: WorkloadFactory MiscTestWorkloadFactory("CancelTransaction"); -} // namespace FdbApiTester \ No newline at end of file +} // namespace FdbApiTester diff --git a/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp b/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp index fabfd1c8a8..dd8f426a53 100644 --- a/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp +++ b/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp @@ -19,6 +19,7 @@ */ #include "TesterApiWorkload.h" #include "TesterUtil.h" +#include "test/fdb_api.hpp" #include #include @@ -33,36 +34,36 @@ private: void randomCommitReadOp(TTaskFct cont) { int numKeys = Random::get().randomInt(1, maxKeysPerTransaction); - auto kvPairs = std::make_shared>(); + auto kvPairs = std::make_shared>(); for (int i = 0; i < numKeys; i++) { - kvPairs->push_back(KeyValue{ randomKey(readExistingKeysRatio), randomValue() }); + kvPairs->push_back(fdb::KeyValue{ randomKey(readExistingKeysRatio), randomValue() }); } execTransaction( [kvPairs](auto ctx) { - for (const KeyValue& kv : *kvPairs) { + for (const fdb::KeyValue& kv : *kvPairs) { ctx->tx()->set(kv.key, kv.value); } ctx->commit(); }, [this, kvPairs, cont]() { - for (const KeyValue& kv : *kvPairs) { + for (const fdb::KeyValue& kv : *kvPairs) { store.set(kv.key, kv.value); } - auto results = std::make_shared>>(); + auto results = std::make_shared>>(); execTransaction( [kvPairs, results, this](auto ctx) { if (apiVersion >= 710) { // Test GRV caching in 7.1 and later ctx->tx()->setOption(FDB_TR_OPTION_USE_GRV_CACHE); } - auto futures = std::make_shared>(); + auto futures = std::make_shared>(); for (const auto& kv : *kvPairs) { futures->push_back(ctx->tx()->get(kv.key, false)); } ctx->continueAfterAll(*futures, [ctx, futures, results]() { results->clear(); for (auto& f : *futures) { - results->push_back(((ValueFuture&)f).getValue()); + results->push_back(f.get()); } ASSERT(results->size() == futures->size()); ctx->done(); @@ -76,9 +77,9 @@ private: if (actual != expected) { error( fmt::format("randomCommitReadOp mismatch. key: {} expected: {:.80} actual: {:.80}", - (*kvPairs)[i].key, - expected, - actual)); + fdb::toCharsRef((*kvPairs)[i].key), + fdb::toCharsRef(expected.value()), + fdb::toCharsRef(actual.value()))); ASSERT(false); } } @@ -89,21 +90,21 @@ private: void randomGetOp(TTaskFct cont) { int numKeys = Random::get().randomInt(1, maxKeysPerTransaction); - auto keys = std::make_shared>(); - auto results = std::make_shared>>(); + auto keys = std::make_shared>(); + auto results = std::make_shared>>(); for (int i = 0; i < numKeys; i++) { keys->push_back(randomKey(readExistingKeysRatio)); } execTransaction( [keys, results](auto ctx) { - auto futures = std::make_shared>(); + auto futures = std::make_shared>(); for (const auto& key : *keys) { futures->push_back(ctx->tx()->get(key, false)); } ctx->continueAfterAll(*futures, [ctx, futures, results]() { results->clear(); for (auto& f : *futures) { - results->push_back(((ValueFuture&)f).getValue()); + results->push_back(f.get()); } ASSERT(results->size() == futures->size()); ctx->done(); @@ -115,9 +116,9 @@ private: auto expected = store.get((*keys)[i]); if ((*results)[i] != expected) { error(fmt::format("randomGetOp mismatch. key: {} expected: {:.80} actual: {:.80}", - (*keys)[i], - expected, - (*results)[i])); + fdb::toCharsRef((*keys)[i]), + fdb::toCharsRef(expected.value()), + fdb::toCharsRef((*results)[i].value()))); } } schedule(cont); @@ -148,4 +149,4 @@ private: WorkloadFactory ApiCorrectnessWorkloadFactory("ApiCorrectness"); -} // namespace FdbApiTester \ No newline at end of file +} // namespace FdbApiTester diff --git a/bindings/c/test/apitester/TesterKeyValueStore.cpp b/bindings/c/test/apitester/TesterKeyValueStore.cpp index 1d9b8d63d2..7d0d7415a8 100644 --- a/bindings/c/test/apitester/TesterKeyValueStore.cpp +++ b/bindings/c/test/apitester/TesterKeyValueStore.cpp @@ -23,26 +23,26 @@ namespace FdbApiTester { // Get the value associated with a key -std::optional KeyValueStore::get(std::string_view key) const { +std::optional KeyValueStore::get(fdb::KeyRef key) const { std::unique_lock lock(mutex); - auto value = store.find(std::string(key)); + auto value = store.find(fdb::Key(key)); if (value != store.end()) return value->second; else - return std::optional(); + return std::optional(); } // Checks if the key exists -bool KeyValueStore::exists(std::string_view key) { +bool KeyValueStore::exists(fdb::KeyRef key) { std::unique_lock lock(mutex); - return (store.find(std::string(key)) != store.end()); + return (store.find(fdb::Key(key)) != store.end()); } // Returns the key designated by a key selector -std::string KeyValueStore::getKey(std::string_view keyName, bool orEqual, int offset) const { +fdb::Key KeyValueStore::getKey(fdb::KeyRef keyName, bool orEqual, int offset) const { std::unique_lock lock(mutex); // Begin by getting the start key referenced by the key selector - std::map::const_iterator mapItr = store.lower_bound(keyName); + std::map::const_iterator mapItr = store.lower_bound(keyName); // Update the iterator position if necessary based on the value of orEqual int count = 0; @@ -88,28 +88,25 @@ std::string KeyValueStore::getKey(std::string_view keyName, bool orEqual, int of } // Gets a range of key-value pairs, returning a maximum of results -std::vector KeyValueStore::getRange(std::string_view begin, - std::string_view end, - int limit, - bool reverse) const { +std::vector KeyValueStore::getRange(fdb::KeyRef begin, fdb::KeyRef end, int limit, bool reverse) const { std::unique_lock lock(mutex); - std::vector results; + std::vector results; if (!reverse) { - std::map::const_iterator mapItr = store.lower_bound(begin); + std::map::const_iterator mapItr = store.lower_bound(begin); for (; mapItr != store.end() && mapItr->first < end && results.size() < limit; mapItr++) - results.push_back(KeyValue{ mapItr->first, mapItr->second }); + results.push_back(fdb::KeyValue{ mapItr->first, mapItr->second }); } // Support for reverse getRange queries is supported, but not tested at this time. This is because reverse range // queries have been disallowed by the database at the API level else { - std::map::const_iterator mapItr = store.lower_bound(end); + std::map::const_iterator mapItr = store.lower_bound(end); if (mapItr == store.begin()) return results; for (--mapItr; mapItr->first >= begin && results.size() < abs(limit); mapItr--) { - results.push_back(KeyValue{ mapItr->first, mapItr->second }); + results.push_back(fdb::KeyValue{ mapItr->first, mapItr->second }); if (mapItr == store.begin()) break; } @@ -119,13 +116,13 @@ std::vector KeyValueStore::getRange(std::string_view begin, } // Stores a key-value pair in the database -void KeyValueStore::set(std::string_view key, std::string_view value) { +void KeyValueStore::set(fdb::KeyRef key, fdb::ValueRef value) { std::unique_lock lock(mutex); - store[std::string(key)] = value; + store[fdb::Key(key)] = value; } // Removes a key from the database -void KeyValueStore::clear(std::string_view key) { +void KeyValueStore::clear(fdb::KeyRef key) { std::unique_lock lock(mutex); auto iter = store.find(key); if (iter != store.end()) { @@ -134,7 +131,7 @@ void KeyValueStore::clear(std::string_view key) { } // Removes a range of keys from the database -void KeyValueStore::clear(std::string_view begin, std::string_view end) { +void KeyValueStore::clear(fdb::KeyRef begin, fdb::KeyRef end) { std::unique_lock lock(mutex); store.erase(store.lower_bound(begin), store.lower_bound(end)); } @@ -146,22 +143,22 @@ uint64_t KeyValueStore::size() const { } // The first key in the database; returned by key selectors that choose a key off the front -std::string KeyValueStore::startKey() const { - return ""; +fdb::Key KeyValueStore::startKey() const { + return fdb::Key(); } // The last key in the database; returned by key selectors that choose a key off the back -std::string KeyValueStore::endKey() const { - return "\xff"; +fdb::Key KeyValueStore::endKey() const { + return fdb::Key(1, '\xff'); } // Debugging function that prints all key-value pairs void KeyValueStore::printContents() const { std::unique_lock lock(mutex); printf("Contents:\n"); - std::map::const_iterator mapItr; + std::map::const_iterator mapItr; for (mapItr = store.begin(); mapItr != store.end(); mapItr++) printf("%s\n", mapItr->first.c_str()); } -} // namespace FdbApiTester \ No newline at end of file +} // namespace FdbApiTester diff --git a/bindings/c/test/apitester/TesterKeyValueStore.h b/bindings/c/test/apitester/TesterKeyValueStore.h index 0a60403d98..cccfe80970 100644 --- a/bindings/c/test/apitester/TesterKeyValueStore.h +++ b/bindings/c/test/apitester/TesterKeyValueStore.h @@ -37,44 +37,44 @@ namespace FdbApiTester { class KeyValueStore { public: // Get the value associated with a key - std::optional get(std::string_view key) const; + std::optional get(fdb::KeyRef key) const; // Checks if the key exists - bool exists(std::string_view key); + bool exists(fdb::KeyRef key); // Returns the key designated by a key selector - std::string getKey(std::string_view keyName, bool orEqual, int offset) const; + fdb::Key getKey(fdb::KeyRef keyName, bool orEqual, int offset) const; // Gets a range of key-value pairs, returning a maximum of results - std::vector getRange(std::string_view begin, std::string_view end, int limit, bool reverse) const; + std::vector getRange(fdb::KeyRef begin, fdb::KeyRef end, int limit, bool reverse) const; // Stores a key-value pair in the database - void set(std::string_view key, std::string_view value); + void set(fdb::KeyRef key, fdb::ValueRef value); // Removes a key from the database - void clear(std::string_view key); + void clear(fdb::KeyRef key); // Removes a range of keys from the database - void clear(std::string_view begin, std::string_view end); + void clear(fdb::KeyRef begin, fdb::KeyRef end); // The number of keys in the database uint64_t size() const; // The first key in the database; returned by key selectors that choose a key off the front - std::string startKey() const; + fdb::Key startKey() const; // The last key in the database; returned by key selectors that choose a key off the back - std::string endKey() const; + fdb::Key endKey() const; // Debugging function that prints all key-value pairs void printContents() const; private: // A map holding the key-value pairs - std::map> store; + std::map> store; mutable std::mutex mutex; }; } // namespace FdbApiTester -#endif \ No newline at end of file +#endif diff --git a/bindings/c/test/apitester/TesterTransactionExecutor.cpp b/bindings/c/test/apitester/TesterTransactionExecutor.cpp index e0cb8f93e2..4a9ba83619 100644 --- a/bindings/c/test/apitester/TesterTransactionExecutor.cpp +++ b/bindings/c/test/apitester/TesterTransactionExecutor.cpp @@ -22,6 +22,7 @@ #include "TesterUtil.h" #include "foundationdb/fdb_c_types.h" #include "test/apitester/TesterScheduler.h" +#include "test/fdb_api.hpp" #include #include #include @@ -36,24 +37,24 @@ namespace FdbApiTester { constexpr int LONG_WAIT_TIME_US = 2000000; constexpr int LARGE_NUMBER_OF_RETRIES = 10; -void TransactionActorBase::complete(fdb_error_t err) { +void TransactionActorBase::complete(fdb::Error err) { error = err; context = {}; } -void ITransactionContext::continueAfterAll(std::vector futures, TTaskFct cont) { +void ITransactionContext::continueAfterAll(std::vector futures, TTaskFct cont) { auto counter = std::make_shared>(futures.size()); - auto errorCode = std::make_shared>(error_code_success); + auto errorCode = std::make_shared>(fdb::Error::success()); auto thisPtr = shared_from_this(); for (auto& f : futures) { continueAfter( f, [thisPtr, f, counter, errorCode, cont]() { - if (f.getError() != error_code_success) { - (*errorCode) = f.getError(); + if (f.error().code() != error_code_success) { + (*errorCode) = f.error(); } if (--(*counter) == 0) { - if (*errorCode == error_code_success) { + if (errorCode->load().code() == error_code_success) { // all futures successful -> continue cont(); } else { @@ -71,7 +72,7 @@ void ITransactionContext::continueAfterAll(std::vector futures, TTaskFct */ class TransactionContextBase : public ITransactionContext { public: - TransactionContextBase(FDBTransaction* tx, + TransactionContextBase(fdb::Transaction tx, std::shared_ptr txActor, TTaskFct cont, IScheduler* scheduler, @@ -84,10 +85,12 @@ public: // IN_PROGRESS -> (ON_ERROR -> IN_PROGRESS)* [-> ON_ERROR] -> DONE enum class TxState { IN_PROGRESS, ON_ERROR, DONE }; - Transaction* tx() override { return &fdbTx; } + fdb::Transaction* tx() override { return &fdbTx; } // Set a continuation to be executed when a future gets ready - void continueAfter(Future f, TTaskFct cont, bool retryOnError) override { doContinueAfter(f, cont, retryOnError); } + void continueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override { + doContinueAfter(f, cont, retryOnError); + } // Complete the transaction with a commit void commit() override { @@ -97,7 +100,7 @@ public: } commitCalled = true; lock.unlock(); - Future f = fdbTx.commit(); + fdb::Future f = fdbTx.commit(); auto thisRef = shared_from_this(); doContinueAfter( f, [thisRef]() { thisRef->done(); }, true); @@ -114,12 +117,12 @@ public: if (retriedErrors.size() >= LARGE_NUMBER_OF_RETRIES) { fmt::print("Transaction succeeded after {} retries on errors: {}\n", retriedErrors.size(), - fmt::join(retriedErrors, ", ")); + fmt::join(retriedErrorCodes(), ", ")); } // cancel transaction so that any pending operations on it // fail gracefully fdbTx.cancel(); - txActor->complete(error_code_success); + txActor->complete(fdb::Error::success()); cleanUp(); contAfterDone(); } @@ -127,7 +130,7 @@ public: std::string getBGBasePath() override { return bgBasePath; } protected: - virtual void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) = 0; + virtual void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) = 0; // Clean up transaction state after completing the transaction // Note that the object may live longer, because it is referenced @@ -139,8 +142,8 @@ protected: } // Complete the transaction with an (unretriable) error - void transactionFailed(fdb_error_t err) { - ASSERT(err != error_code_success); + void transactionFailed(fdb::Error err) { + ASSERT(err); std::unique_lock lock(mutex); if (txState == TxState::DONE) { return; @@ -155,7 +158,7 @@ protected: // Handle result of an a transaction onError call void handleOnErrorResult() { ASSERT(txState == TxState::ON_ERROR); - fdb_error_t err = onErrorFuture.getError(); + fdb::Error err = onErrorFuture.error(); onErrorFuture = {}; if (err) { transactionFailed(err); @@ -169,24 +172,32 @@ protected: } // Checks if a transaction can be retried. Fails the transaction if the check fails - bool canRetry(fdb_error_t lastErr) { + bool canRetry(fdb::Error lastErr) { ASSERT(txState == TxState::ON_ERROR); retriedErrors.push_back(lastErr); if (retryLimit == 0 || retriedErrors.size() <= retryLimit) { if (retriedErrors.size() == LARGE_NUMBER_OF_RETRIES) { fmt::print("Transaction already retried {} times, on errors: {}\n", retriedErrors.size(), - fmt::join(retriedErrors, ", ")); + fmt::join(retriedErrorCodes(), ", ")); } return true; } - fmt::print("Transaction retry limit reached. Retried on errors: {}\n", fmt::join(retriedErrors, ", ")); + fmt::print("Transaction retry limit reached. Retried on errors: {}\n", fmt::join(retriedErrorCodes(), ", ")); transactionFailed(lastErr); return false; } + std::vector retriedErrorCodes() { + std::vector retriedErrorCodes; + for (auto e : retriedErrors) { + retriedErrorCodes.push_back(e.code()); + } + return retriedErrorCodes; + } + // FDB transaction - Transaction fdbTx; + fdb::Transaction fdbTx; // Actor implementing the transaction worklflow std::shared_ptr txActor; @@ -207,10 +218,10 @@ protected: TxState txState; // onError future used in ON_ERROR state - Future onErrorFuture; + fdb::Future onErrorFuture; // The error code on which onError was called - fdb_error_t onErrorArg; + fdb::Error onErrorArg; // The time point of calling onError TimePoint onErrorCallTimePoint; @@ -219,7 +230,7 @@ protected: bool commitCalled; // A history of errors on which the transaction was retried - std::vector retriedErrors; + std::vector retriedErrors; // blob granule base path std::string bgBasePath; @@ -230,7 +241,7 @@ protected: */ class BlockingTransactionContext : public TransactionContextBase { public: - BlockingTransactionContext(FDBTransaction* tx, + BlockingTransactionContext(fdb::Transaction tx, std::shared_ptr txActor, TTaskFct cont, IScheduler* scheduler, @@ -239,37 +250,37 @@ public: : TransactionContextBase(tx, txActor, cont, scheduler, retryLimit, bgBasePath) {} protected: - void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) override { + void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override { auto thisRef = std::static_pointer_cast(shared_from_this()); scheduler->schedule( [thisRef, f, cont, retryOnError]() mutable { thisRef->blockingContinueAfter(f, cont, retryOnError); }); } - void blockingContinueAfter(Future f, TTaskFct cont, bool retryOnError) { + void blockingContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) { std::unique_lock lock(mutex); if (txState != TxState::IN_PROGRESS) { return; } lock.unlock(); auto start = timeNow(); - fdb_error_t err = fdb_future_block_until_ready(f.fdbFuture()); + fdb::Error err = f.blockUntilReady(); if (err) { transactionFailed(err); return; } - err = f.getError(); + err = f.error(); auto waitTimeUs = timeElapsedInUs(start); if (waitTimeUs > LONG_WAIT_TIME_US) { fmt::print("Long waiting time on a future: {:.3f}s, return code {} ({}), commit called: {}\n", microsecToSec(waitTimeUs), - err, - fdb_get_error(err), + err.code(), + err.what(), commitCalled); } - if (err == error_code_transaction_cancelled) { + if (err.code() == error_code_transaction_cancelled) { return; } - if (err == error_code_success || !retryOnError) { + if (err.code() == error_code_success || !retryOnError) { scheduler->schedule([cont]() { cont(); }); return; } @@ -277,7 +288,7 @@ protected: onError(err); } - virtual void onError(fdb_error_t err) override { + virtual void onError(fdb::Error err) override { std::unique_lock lock(mutex); if (txState != TxState::IN_PROGRESS) { // Ignore further errors, if the transaction is in the error handing mode or completed @@ -295,19 +306,19 @@ protected: onErrorArg = err; auto start = timeNow(); - fdb_error_t err2 = fdb_future_block_until_ready(onErrorFuture.fdbFuture()); + fdb::Error err2 = onErrorFuture.blockUntilReady(); if (err2) { transactionFailed(err2); return; } auto waitTimeUs = timeElapsedInUs(start); if (waitTimeUs > LONG_WAIT_TIME_US) { - fdb_error_t err3 = onErrorFuture.getError(); + fdb::Error err3 = onErrorFuture.error(); fmt::print("Long waiting time on onError({}) future: {:.3f}s, return code {} ({})\n", - onErrorArg, + onErrorArg.code(), microsecToSec(waitTimeUs), - err3, - fdb_get_error(err3)); + err3.code(), + err3.what()); } auto thisRef = std::static_pointer_cast(shared_from_this()); scheduler->schedule([thisRef]() { thisRef->handleOnErrorResult(); }); @@ -319,7 +330,7 @@ protected: */ class AsyncTransactionContext : public TransactionContextBase { public: - AsyncTransactionContext(FDBTransaction* tx, + AsyncTransactionContext(fdb::Transaction tx, std::shared_ptr txActor, TTaskFct cont, IScheduler* scheduler, @@ -328,23 +339,24 @@ public: : TransactionContextBase(tx, txActor, cont, scheduler, retryLimit, bgBasePath) {} protected: - void doContinueAfter(Future f, TTaskFct cont, bool retryOnError) override { + void doContinueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override { std::unique_lock lock(mutex); if (txState != TxState::IN_PROGRESS) { return; } - callbackMap[f.fdbFuture()] = CallbackInfo{ f, cont, shared_from_this(), retryOnError, timeNow() }; + callbackMap[f] = CallbackInfo{ f, cont, shared_from_this(), retryOnError, timeNow() }; lock.unlock(); - fdb_error_t err = fdb_future_set_callback(f.fdbFuture(), futureReadyCallback, this); - if (err) { + try { + f.then([this](fdb::Future f) { futureReadyCallback(f, this); }); + } catch (std::runtime_error& err) { lock.lock(); - callbackMap.erase(f.fdbFuture()); + callbackMap.erase(f); lock.unlock(); - transactionFailed(err); + transactionFailed(fdb::Error(error_code_operation_failed)); } } - static void futureReadyCallback(FDBFuture* f, void* param) { + static void futureReadyCallback(fdb::Future f, void* param) { try { AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param; txCtx->onFutureReady(f); @@ -357,7 +369,7 @@ protected: } } - void onFutureReady(FDBFuture* f) { + void onFutureReady(fdb::Future f) { auto endTime = timeNow(); injectRandomSleep(); // Hold a reference to this to avoid it to be @@ -372,25 +384,25 @@ protected: return; } lock.unlock(); - fdb_error_t err = fdb_future_get_error(f); + fdb::Error err = f.error(); auto waitTimeUs = timeElapsedInUs(cbInfo.startTime, endTime); if (waitTimeUs > LONG_WAIT_TIME_US) { fmt::print("Long waiting time on a future: {:.3f}s, return code {} ({})\n", microsecToSec(waitTimeUs), - err, - fdb_get_error(err)); + err.code(), + err.what()); } - if (err == error_code_transaction_cancelled) { + if (err.code() == error_code_transaction_cancelled) { return; } - if (err == error_code_success || !cbInfo.retryOnError) { + if (err.code() == error_code_success || !cbInfo.retryOnError) { scheduler->schedule(cbInfo.cont); return; } onError(err); } - virtual void onError(fdb_error_t err) override { + virtual void onError(fdb::Error err) override { std::unique_lock lock(mutex); if (txState != TxState::IN_PROGRESS) { // Ignore further errors, if the transaction is in the error handing mode or completed @@ -408,14 +420,15 @@ protected: onErrorFuture = tx()->onError(err); onErrorCallTimePoint = timeNow(); onErrorThisRef = std::static_pointer_cast(shared_from_this()); - fdb_error_t err2 = fdb_future_set_callback(onErrorFuture.fdbFuture(), onErrorReadyCallback, this); - if (err2) { + try { + onErrorFuture.then([this](fdb::Future f) { onErrorReadyCallback(f, this); }); + } catch (...) { onErrorFuture = {}; - transactionFailed(err2); + transactionFailed(fdb::Error(error_code_operation_failed)); } } - static void onErrorReadyCallback(FDBFuture* f, void* param) { + static void onErrorReadyCallback(fdb::Future f, void* param) { try { AsyncTransactionContext* txCtx = (AsyncTransactionContext*)param; txCtx->onErrorReady(f); @@ -428,15 +441,15 @@ protected: } } - void onErrorReady(FDBFuture* f) { + void onErrorReady(fdb::Future f) { auto waitTimeUs = timeElapsedInUs(onErrorCallTimePoint); if (waitTimeUs > LONG_WAIT_TIME_US) { - fdb_error_t err = onErrorFuture.getError(); + fdb::Error err = onErrorFuture.error(); fmt::print("Long waiting time on onError({}): {:.3f}s, return code {} ({})\n", - onErrorArg, + onErrorArg.code(), microsecToSec(waitTimeUs), - err, - fdb_get_error(err)); + err.code(), + err.what()); } injectRandomSleep(); auto thisRef = onErrorThisRef; @@ -450,7 +463,7 @@ protected: // Cancel all pending operations // Note that the callbacks of the cancelled futures will still be called std::unique_lock lock(mutex); - std::vector futures; + std::vector futures; for (auto& iter : callbackMap) { futures.push_back(iter.second.future); } @@ -469,7 +482,7 @@ protected: // Object references for a future callback struct CallbackInfo { - Future future; + fdb::Future future; TTaskFct cont; std::shared_ptr thisRef; bool retryOnError; @@ -477,7 +490,7 @@ protected: }; // Map for keeping track of future waits and holding necessary object references - std::unordered_map callbackMap; + std::unordered_map callbackMap; // Holding reference to this for onError future C callback std::shared_ptr onErrorThisRef; @@ -498,13 +511,9 @@ public: protected: // Execute the transaction on the given database instance - void executeOnDatabase(FDBDatabase* db, std::shared_ptr txActor, TTaskFct cont) { - FDBTransaction* tx; - fdb_error_t err = fdb_database_create_transaction(db, &tx); - if (err != error_code_success) { - txActor->complete(err); - cont(); - } else { + void executeOnDatabase(fdb::Database db, std::shared_ptr txActor, TTaskFct cont) { + try { + fdb::Transaction tx = db.createTransaction(); std::shared_ptr ctx; if (options.blockOnFutures) { ctx = std::make_shared( @@ -515,6 +524,9 @@ protected: } txActor->init(ctx); txActor->start(); + } catch (...) { + txActor->complete(fdb::Error(error_code_operation_failed)); + cont(); } } @@ -537,14 +549,7 @@ public: void init(IScheduler* scheduler, const char* clusterFile, const std::string& bgBasePath) override { TransactionExecutorBase::init(scheduler, clusterFile, bgBasePath); for (int i = 0; i < options.numDatabases; i++) { - FDBDatabase* db; - fdb_error_t err = fdb_create_database(clusterFile, &db); - if (err != error_code_success) { - throw TesterError(fmt::format("Failed create database with the cluster file '{}'. Error: {}({})", - clusterFile, - err, - fdb_get_error(err))); - } + fdb::Database db(clusterFile); databases.push_back(db); } } @@ -554,14 +559,10 @@ public: executeOnDatabase(databases[idx], txActor, cont); } - void release() { - for (FDBDatabase* db : databases) { - fdb_database_destroy(db); - } - } + void release() { databases.clear(); } private: - std::vector databases; + std::vector databases; }; /** @@ -572,16 +573,8 @@ public: DBPerTransactionExecutor(const TransactionExecutorOptions& options) : TransactionExecutorBase(options) {} void execute(std::shared_ptr txActor, TTaskFct cont) override { - FDBDatabase* db = nullptr; - fdb_error_t err = fdb_create_database(clusterFile.c_str(), &db); - if (err != error_code_success) { - txActor->complete(err); - cont(); - } - executeOnDatabase(db, txActor, [cont, db]() { - fdb_database_destroy(db); - cont(); - }); + fdb::Database db(clusterFile.c_str()); + executeOnDatabase(db, txActor, cont); } }; @@ -593,4 +586,4 @@ std::unique_ptr createTransactionExecutor(const Transactio } } -} // namespace FdbApiTester \ No newline at end of file +} // namespace FdbApiTester diff --git a/bindings/c/test/apitester/TesterTransactionExecutor.h b/bindings/c/test/apitester/TesterTransactionExecutor.h index 9ab6bb03cb..09ab365827 100644 --- a/bindings/c/test/apitester/TesterTransactionExecutor.h +++ b/bindings/c/test/apitester/TesterTransactionExecutor.h @@ -23,8 +23,8 @@ #ifndef APITESTER_TRANSACTION_EXECUTOR_H #define APITESTER_TRANSACTION_EXECUTOR_H +#include "test/fdb_api.hpp" #include "TesterOptions.h" -#include "TesterApiWrapper.h" #include "TesterScheduler.h" #include #include @@ -39,18 +39,18 @@ public: virtual ~ITransactionContext() {} // Current FDB transaction - virtual Transaction* tx() = 0; + virtual fdb::Transaction* tx() = 0; // Schedule a continuation to be executed when the future gets ready // retryOnError controls whether transaction is retried in case of an error instead // of calling the continuation - virtual void continueAfter(Future f, TTaskFct cont, bool retryOnError = true) = 0; + virtual void continueAfter(fdb::Future f, TTaskFct cont, bool retryOnError = true) = 0; // Complete the transaction with a commit virtual void commit() = 0; // retry transaction on error - virtual void onError(fdb_error_t err) = 0; + virtual void onError(fdb::Error err) = 0; // Mark the transaction as completed without committing it (for read transactions) virtual void done() = 0; @@ -59,7 +59,7 @@ public: virtual std::string getBGBasePath() = 0; // A continuation to be executed when all of the given futures get ready - virtual void continueAfterAll(std::vector futures, TTaskFct cont); + virtual void continueAfterAll(std::vector futures, TTaskFct cont); }; /** @@ -76,10 +76,10 @@ public: virtual void start() = 0; // Transaction completion result (error_code_success in case of success) - virtual fdb_error_t getErrorCode() = 0; + virtual fdb::Error getError() = 0; // Notification about the completion of the transaction - virtual void complete(fdb_error_t err) = 0; + virtual void complete(fdb::Error err) = 0; }; /** @@ -88,15 +88,15 @@ public: class TransactionActorBase : public ITransactionActor { public: void init(std::shared_ptr ctx) override { context = ctx; } - fdb_error_t getErrorCode() override { return error; } - void complete(fdb_error_t err) override; + fdb::Error getError() override { return error; } + void complete(fdb::Error err) override; protected: std::shared_ptr ctx() { return context; } private: std::shared_ptr context; - fdb_error_t error = error_code_success; + fdb::Error error = fdb::Error::success(); }; // Type of the lambda functions implementing a transaction @@ -148,4 +148,4 @@ std::unique_ptr createTransactionExecutor(const Transactio } // namespace FdbApiTester -#endif \ No newline at end of file +#endif diff --git a/bindings/c/test/apitester/TesterUtil.cpp b/bindings/c/test/apitester/TesterUtil.cpp index a7e5414f85..3062fcd5ed 100644 --- a/bindings/c/test/apitester/TesterUtil.cpp +++ b/bindings/c/test/apitester/TesterUtil.cpp @@ -26,8 +26,8 @@ namespace FdbApiTester { -std::string lowerCase(const std::string& str) { - std::string res = str; +fdb::ByteString lowerCase(fdb::BytesRef str) { + fdb::ByteString res(str); std::transform(res.begin(), res.end(), res.begin(), ::tolower); return res; } @@ -46,9 +46,9 @@ Random& Random::get() { return random; } -std::string Random::randomStringLowerCase(int minLength, int maxLength) { +fdb::ByteString Random::randomStringLowerCase(int minLength, int maxLength) { int length = randomInt(minLength, maxLength); - std::string str; + fdb::ByteString str; str.reserve(length); for (int i = 0; i < length; i++) { str += (char)randomInt('a', 'z'); diff --git a/bindings/c/test/apitester/TesterUtil.h b/bindings/c/test/apitester/TesterUtil.h index 96a2e3d560..21c083b905 100644 --- a/bindings/c/test/apitester/TesterUtil.h +++ b/bindings/c/test/apitester/TesterUtil.h @@ -29,6 +29,8 @@ #include #include +#include "test/fdb_api.hpp" + namespace fmt { // fmt::format formatting for std::optional @@ -49,12 +51,7 @@ struct formatter> : fmt::formatter { namespace FdbApiTester { -struct KeyValue { - std::string key; - std::string value; -}; - -std::string lowerCase(const std::string& str); +fdb::ByteString lowerCase(fdb::BytesRef str); class Random { public: @@ -64,7 +61,7 @@ public: int randomInt(int min, int max); - std::string randomStringLowerCase(int minLength, int maxLength); + fdb::ByteString randomStringLowerCase(int minLength, int maxLength); bool randomBool(double trueRatio); @@ -112,4 +109,4 @@ static inline double microsecToSec(TimeDuration timeUs) { } // namespace FdbApiTester -#endif \ No newline at end of file +#endif diff --git a/bindings/c/test/apitester/TesterWorkload.cpp b/bindings/c/test/apitester/TesterWorkload.cpp index 1d11cd9a1f..cbce118f10 100644 --- a/bindings/c/test/apitester/TesterWorkload.cpp +++ b/bindings/c/test/apitester/TesterWorkload.cpp @@ -66,7 +66,7 @@ bool WorkloadConfig::getBoolOption(const std::string& name, bool defaultVal) con if (iter == options.end()) { return defaultVal; } else { - std::string val = lowerCase(iter->second); + std::string val(fdb::toCharsRef(lowerCase(fdb::toBytesRef(iter->second)))); if (val == "true") { return true; } else if (val == "false") { @@ -111,11 +111,11 @@ void WorkloadBase::execTransaction(std::shared_ptr tx, TTaskF tasksScheduled++; manager->txExecutor->execute(tx, [this, tx, cont, failOnError]() { numTxCompleted++; - fdb_error_t err = tx->getErrorCode(); - if (tx->getErrorCode() == error_code_success) { + fdb::Error err = tx->getError(); + if (err.code() == error_code_success) { cont(); } else { - std::string msg = fmt::format("Transaction failed with error: {} ({})", err, fdb_get_error(err)); + std::string msg = fmt::format("Transaction failed with error: {} ({})", err.code(), err.what()); if (failOnError) { error(msg); failed = true; diff --git a/bindings/c/test/apitester/fdb_c_api_tester.cpp b/bindings/c/test/apitester/fdb_c_api_tester.cpp index a08aa9e291..4b899fb66c 100644 --- a/bindings/c/test/apitester/fdb_c_api_tester.cpp +++ b/bindings/c/test/apitester/fdb_c_api_tester.cpp @@ -25,7 +25,7 @@ #include "TesterTestSpec.h" #include "TesterUtil.h" #include "flow/SimpleOpt.h" -#include "bindings/c/foundationdb/fdb_c.h" +#include "test/fdb_api.hpp" #include #include @@ -270,27 +270,26 @@ bool parseArgs(TesterOptions& options, int argc, char** argv) { return true; } -void fdb_check(fdb_error_t e) { +void fdb_check(fdb::native::fdb_error_t e) { if (e) { - fmt::print(stderr, "Unexpected FDB error: {}({})\n", e, fdb_get_error(e)); + fmt::print(stderr, "Unexpected FDB error: {}({})\n", e, fdb::native::fdb_get_error(e)); std::abort(); } } void applyNetworkOptions(TesterOptions& options) { if (!options.tmpDir.empty()) { - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_TMP_DIR, options.tmpDir)); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_TMP_DIR, options.tmpDir); } if (!options.externalClientLibrary.empty()) { - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT)); - fdb_check( - FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY, options.externalClientLibrary)); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_LIBRARY, + options.externalClientLibrary); } else if (!options.externalClientDir.empty()) { if (options.disableLocalClient) { - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT)); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_DISABLE_LOCAL_CLIENT); } - fdb_check( - FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_DIRECTORY, options.externalClientDir)); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_EXTERNAL_CLIENT_DIRECTORY, options.externalClientDir); } else { if (options.disableLocalClient) { throw TesterError("Invalid options: Cannot disable local client if no external library is provided"); @@ -298,39 +297,38 @@ void applyNetworkOptions(TesterOptions& options) { } if (options.testSpec.multiThreaded) { - fdb_check( - FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_THREADS_PER_VERSION, options.numFdbThreads)); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_THREADS_PER_VERSION, options.numFdbThreads); } if (options.testSpec.fdbCallbacksOnExternalThreads) { - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CALLBACKS_ON_EXTERNAL_THREADS)); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_CALLBACKS_ON_EXTERNAL_THREADS); } if (options.testSpec.buggify) { - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_BUGGIFY_ENABLE)); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_CLIENT_BUGGIFY_ENABLE); } if (options.trace) { - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_ENABLE, options.traceDir)); - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_FORMAT, options.traceFormat)); - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_LOG_GROUP, options.logGroup)); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_ENABLE, options.traceDir); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_FORMAT, options.traceFormat); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TRACE_LOG_GROUP, options.logGroup); } for (auto knob : options.knobs) { - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_KNOB, - fmt::format("{}={}", knob.first.c_str(), knob.second.c_str()))); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_KNOB, + fmt::format("{}={}", knob.first.c_str(), knob.second.c_str())); } if (!options.tlsCertFile.empty()) { - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_CERT_PATH, options.tlsCertFile)); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_CERT_PATH, options.tlsCertFile); } if (!options.tlsKeyFile.empty()) { - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_KEY_PATH, options.tlsKeyFile)); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_KEY_PATH, options.tlsKeyFile); } if (!options.tlsCaFile.empty()) { - fdb_check(FdbApi::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_CA_PATH, options.tlsCaFile)); + fdb::network::setOption(FDBNetworkOption::FDB_NET_OPTION_TLS_CA_PATH, options.tlsCaFile); } } @@ -400,17 +398,17 @@ int main(int argc, char** argv) { } randomizeOptions(options); - fdb_check(fdb_select_api_version(options.apiVersion)); + fdb_check(fdb::native::fdb_select_api_version(options.apiVersion)); applyNetworkOptions(options); - fdb_check(fdb_setup_network()); + fdb_check(fdb::native::fdb_setup_network()); - std::thread network_thread{ &fdb_run_network }; + std::thread network_thread{ &fdb::native::fdb_run_network }; if (!runWorkloads(options)) { retCode = 1; } - fdb_check(fdb_stop_network()); + fdb_check(fdb::native::fdb_stop_network()); network_thread.join(); } catch (const std::runtime_error& err) { fmt::print(stderr, "ERROR: {}\n", err.what()); diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index 590c4afdb7..94e192d857 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -29,14 +29,21 @@ #include #include #include +#include #include #include #include +#include #include // introduce the option enums #include +#undef ERROR +#define ERROR(name, number, description) enum { error_code_##name = number }; + +#include "flow/error_definitions.h" + namespace fdb { // hide C API to discourage mixing C/C++ API @@ -47,9 +54,20 @@ namespace native { using ByteString = std::basic_string; using BytesRef = std::basic_string_view; using CharsRef = std::string_view; +using Key = ByteString; using KeyRef = BytesRef; +using Value = ByteString; using ValueRef = BytesRef; +struct KeyValue { + Key key; + Value value; +}; +struct KeyRange { + Key beginKey; + Key endKey; +}; + inline uint8_t const* toBytePtr(char const* ptr) noexcept { return reinterpret_cast(ptr); } @@ -96,6 +114,8 @@ public: bool retryable() const noexcept { return native::fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err) != 0; } + static Error success() { return Error(error_code_success); } + private: CodeType err; }; @@ -113,14 +133,24 @@ struct Int64 { return Error(native::fdb_future_get_int64(f, &out)); } }; -struct Key { +struct NativeKey { using Type = std::pair; static Error extract(native::FDBFuture* f, Type& out) noexcept { auto& [out_key, out_key_length] = out; return Error(native::fdb_future_get_key(f, &out_key, &out_key_length)); } }; -struct Value { +struct Key { + using Type = fdb::Key; + static Error extract(native::FDBFuture* f, Type& out) noexcept { + NativeKey::Type native_out{}; + auto err = NativeKey::extract(f, native_out); + auto& [out_key, out_key_length] = native_out; + out = fdb::Key(out_key, out_key_length); + return Error(err); + } +}; +struct NativeValue { using Type = std::tuple; static Error extract(native::FDBFuture* f, Type& out) noexcept { auto& [out_present, out_value, out_value_length] = out; @@ -130,6 +160,16 @@ struct Value { return Error(err); } }; +struct OptionalValue { + using Type = std::optional; + static Error extract(native::FDBFuture* f, Type& out) noexcept { + NativeValue::Type native_out{}; + auto err = NativeValue::extract(f, native_out); + auto& [out_present, out_value, out_value_length] = native_out; + out = out_present ? std::make_optional(fdb::Value(out_value, out_value_length)) : std::nullopt; + return Error(err); + } +}; struct StringArray { using Type = std::pair; static Error extract(native::FDBFuture* f, Type& out) noexcept { @@ -137,7 +177,7 @@ struct StringArray { return Error(native::fdb_future_get_string_array(f, &out_strings, &out_count)); } }; -struct KeyValueArray { +struct NativeKeyValueArray { using Type = std::tuple; static Error extract(native::FDBFuture* f, Type& out) noexcept { auto& [out_kv, out_count, out_more] = out; @@ -147,6 +187,52 @@ struct KeyValueArray { return Error(err); } }; +struct KeyValueArray { + using Type = std::pair, bool>; + static Error extract(native::FDBFuture* f, Type& out) noexcept { + NativeKeyValueArray::Type native_out{}; + auto err = NativeKeyValueArray::extract(f, native_out); + auto [kvs, count, more] = native_out; + + auto& [out_kv, out_more] = out; + out_more = more; + out_kv.clear(); + for (int i = 0; i < count; ++i) { + fdb::native::FDBKeyValue nativeKv = *kvs++; + KeyValue kv; + kv.key = fdb::Key(nativeKv.key, nativeKv.key_length); + kv.value = fdb::Value(nativeKv.value, nativeKv.value_length); + out_kv.push_back(kv); + } + return Error(err); + } +}; +struct NativeKeyRangeArray { + using Type = std::tuple; + static Error extract(native::FDBFuture* f, Type& out) noexcept { + auto& [out_kv, out_count] = out; + auto err = native::fdb_future_get_keyrange_array(f, &out_kv, &out_count); + return Error(err); + } +}; +struct KeyRangeArray { + using Type = std::vector; + static Error extract(native::FDBFuture* f, Type& out) noexcept { + NativeKeyRangeArray::Type native_out{}; + auto err = NativeKeyRangeArray::extract(f, native_out); + auto [ranges, count] = native_out; + out.clear(); + for (int i = 0; i < count; ++i) { + fdb::native::FDBKeyRange nativeKr = *ranges++; + KeyRange range; + range.beginKey = fdb::Key(nativeKr.begin_key, nativeKr.begin_key_length); + range.endKey = fdb::Key(nativeKr.end_key, nativeKr.end_key_length); + out.push_back(range); + } + return Error(err); + } +}; + } // namespace future_var [[noreturn]] inline void throwError(std::string_view preamble, Error err) { @@ -175,11 +261,19 @@ inline Error setOptionNothrow(FDBNetworkOption option, BytesRef str) noexcept { return Error(native::fdb_network_set_option(option, str.data(), intSize(str))); } +inline Error setOptionNothrow(FDBNetworkOption option, CharsRef str) noexcept { + return setOptionNothrow(option, toBytesRef(str)); +} + inline Error setOptionNothrow(FDBNetworkOption option, int64_t value) noexcept { return Error(native::fdb_network_set_option( option, reinterpret_cast(&value), static_cast(sizeof(value)))); } +inline Error setOptionNothrow(FDBNetworkOption option) noexcept { + return setOptionNothrow(option, ""); +} + inline void setOption(FDBNetworkOption option, BytesRef str) { if (auto err = setOptionNothrow(option, str)) { throwError(fmt::format("ERROR: fdb_network_set_option({}): ", @@ -188,6 +282,14 @@ inline void setOption(FDBNetworkOption option, BytesRef str) { } } +inline void setOption(FDBNetworkOption option, CharsRef str) { + if (auto err = setOptionNothrow(option, str)) { + throwError(fmt::format("ERROR: fdb_network_set_option({}): ", + static_cast>(option)), + err); + } +} + inline void setOption(FDBNetworkOption option, int64_t value) { if (auto err = setOptionNothrow(option, value)) { throwError(fmt::format("ERROR: fdb_network_set_option({}, {}): ", @@ -197,6 +299,14 @@ inline void setOption(FDBNetworkOption option, int64_t value) { } } +inline void setOption(FDBNetworkOption option) { + if (auto err = setOptionNothrow(option)) { + throwError(fmt::format("ERROR: fdb_network_set_option({}): ", + static_cast>(option)), + err); + } +} + inline Error setupNothrow() noexcept { return Error(native::fdb_setup_network()); } @@ -229,9 +339,9 @@ class Result { } public: - using KeyValueArray = future_var::KeyValueArray::Type; + using NativeKeyValueArray = future_var::NativeKeyValueArray::Type; - Error getKeyValueArrayNothrow(KeyValueArray& out) const noexcept { + Error getKeyValueArrayNothrow(NativeKeyValueArray& out) const noexcept { auto out_more_native = native::fdb_bool_t{}; auto& [out_kv, out_count, out_more] = out; auto err_raw = native::fdb_result_get_keyvalue_array(r.get(), &out_kv, &out_count, &out_more_native); @@ -239,8 +349,8 @@ public: return Error(err_raw); } - KeyValueArray getKeyValueArray() const { - auto ret = KeyValueArray{}; + NativeKeyValueArray getKeyValueArray() const { + auto ret = NativeKeyValueArray{}; if (auto err = getKeyValueArrayNothrow(ret)) throwError("ERROR: result_get_keyvalue_array(): ", err); return ret; @@ -250,6 +360,8 @@ public: class Future { protected: friend class Transaction; + friend struct FutureHash; + friend struct FutureEquals; std::shared_ptr f; Future(native::FDBFuture* future) { @@ -332,6 +444,14 @@ public: } }; +struct FutureHash { + size_t operator()(const Future& f) const { return std::hash{}(f.f.get()); } +}; + +struct FutureEquals { + bool operator()(const Future& a, const Future& b) const { return a.f.get() == b.f.get(); } +}; + template class TypedFuture : public Future { friend class Future; @@ -413,6 +533,12 @@ public: return Error(native::fdb_transaction_set_option(tr.get(), option, str.data(), intSize(str))); } + Error setOptionNothrow(FDBTransactionOption option, CharsRef str) noexcept { + return setOptionNothrow(option, toBytesRef(str)); + } + + Error setOptionNothrow(FDBTransactionOption option) noexcept { return setOptionNothrow(option, ""); } + void setOption(FDBTransactionOption option, int64_t value) { if (auto err = setOptionNothrow(option, value)) { throwError(fmt::format("transaction_set_option({}, {}) returned error: ", @@ -430,6 +556,22 @@ public: } } + void setOption(FDBTransactionOption option, CharsRef str) { + if (auto err = setOptionNothrow(option, str)) { + throwError(fmt::format("transaction_set_option({}) returned error: ", + static_cast>(option)), + err); + } + } + + void setOption(FDBTransactionOption option) { + if (auto err = setOptionNothrow(option)) { + throwError(fmt::format("transaction_set_option({}) returned error: ", + static_cast>(option)), + err); + } + } + TypedFuture getReadVersion() { return native::fdb_transaction_get_read_version(tr.get()); } Error getCommittedVersionNothrow(int64_t& out) { @@ -448,7 +590,7 @@ public: return native::fdb_transaction_get_key(tr.get(), sel.key, sel.keyLength, sel.orEqual, sel.offset, snapshot); } - TypedFuture get(KeyRef key, bool snapshot) { + TypedFuture get(KeyRef key, bool snapshot) { return native::fdb_transaction_get(tr.get(), key.data(), intSize(key), snapshot); } @@ -479,6 +621,11 @@ public: reverse); } + TypedFuture getBlobGranuleRanges(KeyRef begin, KeyRef end) { + return native::fdb_transaction_get_blob_granule_ranges( + tr.get(), begin.data(), intSize(begin), end.data(), intSize(end)); + } + Result readBlobGranules(KeyRef begin, KeyRef end, int64_t begin_version, @@ -494,6 +641,8 @@ public: void reset() { return native::fdb_transaction_reset(tr.get()); } + void cancel() { return native::fdb_transaction_cancel(tr.get()); } + void set(KeyRef key, ValueRef value) { native::fdb_transaction_set(tr.get(), key.data(), intSize(key), value.data(), intSize(value)); } diff --git a/bindings/c/test/mako/operations.cpp b/bindings/c/test/mako/operations.cpp index 11fe9a58a4..3cd93448f1 100644 --- a/bindings/c/test/mako/operations.cpp +++ b/bindings/c/test/mako/operations.cpp @@ -51,7 +51,7 @@ const std::array opTable{ }, [](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) { if (f && !f.error()) { - f.get(); + f.get(); } } } }, 1, @@ -72,7 +72,7 @@ const std::array opTable{ }, [](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) { if (f && !f.error()) { - f.get(); + f.get(); } } } }, 1, @@ -84,7 +84,7 @@ const std::array opTable{ }, [](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) { if (f && !f.error()) { - f.get(); + f.get(); } } } }, 1, @@ -107,7 +107,7 @@ const std::array opTable{ }, [](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) { if (f && !f.error()) { - f.get(); + f.get(); } } } }, 1, @@ -119,7 +119,7 @@ const std::array opTable{ }, [](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) { if (f && !f.error()) { - f.get(); + f.get(); } } }, { StepKind::IMM, @@ -257,7 +257,7 @@ const std::array opTable{ user_context.clear(); - auto out = Result::KeyValueArray{}; + auto out = Result::NativeKeyValueArray{}; err = r.getKeyValueArrayNothrow(out); if (!err || err.is(2037 /*blob_granule_not_materialized*/)) return Future(); From a3c6ed2e865b76ce80d162f2ea001d3dbf351f67 Mon Sep 17 00:00:00 2001 From: Sreenath Bodagala Date: Fri, 3 Jun 2022 19:29:54 +0000 Subject: [PATCH 27/49] - Introduce a knob that will control the placement of the commit versions of remote storage servers in version vector. This optimization will help reduce the size of version vector in HA configuration. --- fdbclient/ServerKnobs.cpp | 1 + fdbclient/ServerKnobs.h | 1 + fdbclient/VersionVector.h | 6 ++++-- fdbserver/masterserver.actor.cpp | 6 +++++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 479190cc15..f905d7bc83 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -43,6 +43,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( ENABLE_VERSION_VECTOR_TLOG_UNICAST, false ); init( MAX_VERSION_RATE_MODIFIER, 0.1 ); init( MAX_VERSION_RATE_OFFSET, VERSIONS_PER_SECOND ); // If the calculated version is more than this amount away from the expected version, it will be clamped to this value. This prevents huge version jumps. + init( ENABLE_VERSION_VECTOR_HA_OPTIMIZATION, false ); // TLogs init( TLOG_TIMEOUT, 0.4 ); //cannot buggify because of availability diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index 231832929d..55a712f290 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -44,6 +44,7 @@ public: // often, so that versions always advance smoothly double MAX_VERSION_RATE_MODIFIER; int64_t MAX_VERSION_RATE_OFFSET; + bool ENABLE_VERSION_VECTOR_HA_OPTIMIZATION; // TLogs bool PEEK_USING_STREAMING; diff --git a/fdbclient/VersionVector.h b/fdbclient/VersionVector.h index afc24a7051..a0c132f9d6 100644 --- a/fdbclient/VersionVector.h +++ b/fdbclient/VersionVector.h @@ -79,12 +79,14 @@ public: invalidateCachedEncodedSize(); } - void setVersion(const std::set& tags, Version version) { + void setVersion(const std::set& tags, Version version, int8_t localityFilter = tagLocalityInvalid) { ASSERT(version > maxVersion); for (auto& tag : tags) { ASSERT(tag != invalidTag); ASSERT(tag.locality > tagLocalityInvalid); - versions[tag] = version; + if (localityFilter == tagLocalityInvalid || tag.locality == localityFilter) { + versions[tag] = version; + } } maxVersion = version; invalidateCachedEncodedSize(); diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp index c97b1ff91b..7b52855d8a 100644 --- a/fdbserver/masterserver.actor.cpp +++ b/fdbserver/masterserver.actor.cpp @@ -241,7 +241,11 @@ void updateLiveCommittedVersion(Reference self, ReportRawCommittedVe if (req.version > self->liveCommittedVersion.get()) { if (SERVER_KNOBS->ENABLE_VERSION_VECTOR && req.writtenTags.present()) { // TraceEvent("Received ReportRawCommittedVersionRequest").detail("Version",req.version); - self->ssVersionVector.setVersion(req.writtenTags.get(), req.version); + int8_t primaryLocality = tagLocalityInvalid; + if (SERVER_KNOBS->ENABLE_VERSION_VECTOR_HA_OPTIMIZATION && self->myInterface.locality.dcId().present()) { + primaryLocality = std::stoi(self->myInterface.locality.dcId().get().toString()); + } + self->ssVersionVector.setVersion(req.writtenTags.get(), req.version, primaryLocality); self->versionVectorTagUpdates += req.writtenTags.get().size(); } auto curTime = now(); From 3ec76b4b53d79432794f3524242184639d739870 Mon Sep 17 00:00:00 2001 From: Sreenath Bodagala Date: Fri, 3 Jun 2022 21:20:32 +0000 Subject: [PATCH 28/49] - Address a review comment --- fdbserver/masterserver.actor.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp index 7b52855d8a..cabda8447b 100644 --- a/fdbserver/masterserver.actor.cpp +++ b/fdbserver/masterserver.actor.cpp @@ -66,6 +66,8 @@ struct MasterData : NonCopyable, ReferenceCounted { // up-to-date in the presence of key range splits/merges. VersionVector ssVersionVector; + int8_t locality; // sequencer locality + CounterCollection cc; Counter getCommitVersionRequests; Counter getLiveCommittedVersionRequests; @@ -115,6 +117,8 @@ struct MasterData : NonCopyable, ReferenceCounted { forceRecovery = false; } balancer = resolutionBalancer.resolutionBalancing(); + locality = myInterface.locality.dcId().present() ? std::stoi(myInterface.locality.dcId().get().toString()) + : tagLocalityInvalid; } ~MasterData() = default; }; @@ -241,10 +245,8 @@ void updateLiveCommittedVersion(Reference self, ReportRawCommittedVe if (req.version > self->liveCommittedVersion.get()) { if (SERVER_KNOBS->ENABLE_VERSION_VECTOR && req.writtenTags.present()) { // TraceEvent("Received ReportRawCommittedVersionRequest").detail("Version",req.version); - int8_t primaryLocality = tagLocalityInvalid; - if (SERVER_KNOBS->ENABLE_VERSION_VECTOR_HA_OPTIMIZATION && self->myInterface.locality.dcId().present()) { - primaryLocality = std::stoi(self->myInterface.locality.dcId().get().toString()); - } + int8_t primaryLocality = + SERVER_KNOBS->ENABLE_VERSION_VECTOR_HA_OPTIMIZATION ? self->locality : tagLocalityInvalid; self->ssVersionVector.setVersion(req.writtenTags.get(), req.version, primaryLocality); self->versionVectorTagUpdates += req.writtenTags.get().size(); } From f887e3e364945579c0835694008b1e530f0c046b Mon Sep 17 00:00:00 2001 From: Marian Dvorsky Date: Mon, 6 Jun 2022 16:29:42 +0200 Subject: [PATCH 29/49] Address review comments --- .../c/test/apitester/TesterApiWorkload.cpp | 10 +++---- .../TesterBlobGranuleCorrectnessWorkload.cpp | 6 ++-- .../TesterCancelTransactionWorkload.cpp | 4 +-- .../apitester/TesterCorrectnessWorkload.cpp | 8 ++--- .../apitester/TesterTransactionExecutor.cpp | 4 +-- .../apitester/TesterTransactionExecutor.h | 2 +- .../c/test/apitester/fdb_c_api_tester.cpp | 12 ++++---- bindings/c/test/fdb_api.hpp | 30 ++++--------------- 8 files changed, 28 insertions(+), 48 deletions(-) diff --git a/bindings/c/test/apitester/TesterApiWorkload.cpp b/bindings/c/test/apitester/TesterApiWorkload.cpp index cd45e1b759..c1499adb0c 100644 --- a/bindings/c/test/apitester/TesterApiWorkload.cpp +++ b/bindings/c/test/apitester/TesterApiWorkload.cpp @@ -154,7 +154,7 @@ void ApiWorkload::populateDataTx(TTaskFct cont) { execTransaction( [kvPairs](auto ctx) { for (const fdb::KeyValue& kv : *kvPairs) { - ctx->tx()->set(kv.key, kv.value); + ctx->tx().set(kv.key, kv.value); } ctx->commit(); }, @@ -169,7 +169,7 @@ void ApiWorkload::populateDataTx(TTaskFct cont) { void ApiWorkload::clearData(TTaskFct cont) { execTransaction( [this](auto ctx) { - ctx->tx()->clearRange(keyPrefix, keyPrefix + fdb::Key(1, '\xff')); + ctx->tx().clearRange(keyPrefix, keyPrefix + fdb::Key(1, '\xff')); ctx->commit(); }, [this, cont]() { schedule(cont); }); @@ -193,7 +193,7 @@ void ApiWorkload::randomInsertOp(TTaskFct cont) { execTransaction( [kvPairs](auto ctx) { for (const fdb::KeyValue& kv : *kvPairs) { - ctx->tx()->set(kv.key, kv.value); + ctx->tx().set(kv.key, kv.value); } ctx->commit(); }, @@ -214,7 +214,7 @@ void ApiWorkload::randomClearOp(TTaskFct cont) { execTransaction( [keys](auto ctx) { for (const auto& key : *keys) { - ctx->tx()->clear(key); + ctx->tx().clear(key); } ctx->commit(); }, @@ -234,7 +234,7 @@ void ApiWorkload::randomClearRangeOp(TTaskFct cont) { } execTransaction( [begin, end](auto ctx) { - ctx->tx()->clearRange(begin, end); + ctx->tx().clearRange(begin, end); ctx->commit(); }, [this, begin, end, cont]() { diff --git a/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp b/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp index f731c050e0..5174940401 100644 --- a/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp +++ b/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp @@ -100,7 +100,7 @@ private: } execTransaction( [this, begin, end, results, tooOld](auto ctx) { - ctx->tx()->setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE); + ctx->tx().setOption(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE); TesterGranuleContext testerContext; testerContext.basePath = ctx->getBGBasePath(); @@ -112,7 +112,7 @@ private: granuleContext.get_load_f = &granule_get_load; granuleContext.free_load_f = &granule_free_load; - fdb::Result res = ctx->tx()->readBlobGranules( + fdb::Result res = ctx->tx().readBlobGranules( begin, end, 0 /* beginVersion */, -2 /* latest read version */, granuleContext); auto out = fdb::Result::NativeKeyValueArray{}; fdb::Error err = res.getKeyValueArrayNothrow(out); @@ -178,7 +178,7 @@ private: } execTransaction( [begin, end, results](auto ctx) { - fdb::Future f = ctx->tx()->getBlobGranuleRanges(begin, end).eraseType(); + fdb::Future f = ctx->tx().getBlobGranuleRanges(begin, end).eraseType(); ctx->continueAfter( f, [ctx, f, results]() { diff --git a/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp b/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp index 4f287aec90..01aa968444 100644 --- a/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp +++ b/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp @@ -41,7 +41,7 @@ private: [keys](auto ctx) { std::vector futures; for (const auto& key : *keys) { - futures.push_back(ctx->tx()->get(key, false).eraseType()); + futures.push_back(ctx->tx().get(key, false).eraseType()); } ctx->done(); }, @@ -59,7 +59,7 @@ private: [this, keys](auto ctx) { std::vector futures; for (const auto& key : *keys) { - futures.push_back(ctx->tx()->get(key, false).eraseType()); + futures.push_back(ctx->tx().get(key, false).eraseType()); } for (int i = 0; i < keys->size(); i++) { fdb::Future f = futures[i]; diff --git a/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp b/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp index dd8f426a53..0af25f979e 100644 --- a/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp +++ b/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp @@ -41,7 +41,7 @@ private: execTransaction( [kvPairs](auto ctx) { for (const fdb::KeyValue& kv : *kvPairs) { - ctx->tx()->set(kv.key, kv.value); + ctx->tx().set(kv.key, kv.value); } ctx->commit(); }, @@ -54,11 +54,11 @@ private: [kvPairs, results, this](auto ctx) { if (apiVersion >= 710) { // Test GRV caching in 7.1 and later - ctx->tx()->setOption(FDB_TR_OPTION_USE_GRV_CACHE); + ctx->tx().setOption(FDB_TR_OPTION_USE_GRV_CACHE); } auto futures = std::make_shared>(); for (const auto& kv : *kvPairs) { - futures->push_back(ctx->tx()->get(kv.key, false)); + futures->push_back(ctx->tx().get(kv.key, false)); } ctx->continueAfterAll(*futures, [ctx, futures, results]() { results->clear(); @@ -99,7 +99,7 @@ private: [keys, results](auto ctx) { auto futures = std::make_shared>(); for (const auto& key : *keys) { - futures->push_back(ctx->tx()->get(key, false)); + futures->push_back(ctx->tx().get(key, false)); } ctx->continueAfterAll(*futures, [ctx, futures, results]() { results->clear(); diff --git a/bindings/c/test/apitester/TesterTransactionExecutor.cpp b/bindings/c/test/apitester/TesterTransactionExecutor.cpp index 4a9ba83619..663cccee43 100644 --- a/bindings/c/test/apitester/TesterTransactionExecutor.cpp +++ b/bindings/c/test/apitester/TesterTransactionExecutor.cpp @@ -85,7 +85,7 @@ public: // IN_PROGRESS -> (ON_ERROR -> IN_PROGRESS)* [-> ON_ERROR] -> DONE enum class TxState { IN_PROGRESS, ON_ERROR, DONE }; - fdb::Transaction* tx() override { return &fdbTx; } + fdb::Transaction tx() override { return fdbTx; } // Set a continuation to be executed when a future gets ready void continueAfter(fdb::Future f, TTaskFct cont, bool retryOnError) override { @@ -417,7 +417,7 @@ protected: ASSERT(!onErrorFuture); onErrorArg = err; - onErrorFuture = tx()->onError(err); + onErrorFuture = tx().onError(err); onErrorCallTimePoint = timeNow(); onErrorThisRef = std::static_pointer_cast(shared_from_this()); try { diff --git a/bindings/c/test/apitester/TesterTransactionExecutor.h b/bindings/c/test/apitester/TesterTransactionExecutor.h index 09ab365827..31f6f3bc84 100644 --- a/bindings/c/test/apitester/TesterTransactionExecutor.h +++ b/bindings/c/test/apitester/TesterTransactionExecutor.h @@ -39,7 +39,7 @@ public: virtual ~ITransactionContext() {} // Current FDB transaction - virtual fdb::Transaction* tx() = 0; + virtual fdb::Transaction tx() = 0; // Schedule a continuation to be executed when the future gets ready // retryOnError controls whether transaction is retried in case of an error instead diff --git a/bindings/c/test/apitester/fdb_c_api_tester.cpp b/bindings/c/test/apitester/fdb_c_api_tester.cpp index 4b899fb66c..879b65ab98 100644 --- a/bindings/c/test/apitester/fdb_c_api_tester.cpp +++ b/bindings/c/test/apitester/fdb_c_api_tester.cpp @@ -270,9 +270,9 @@ bool parseArgs(TesterOptions& options, int argc, char** argv) { return true; } -void fdb_check(fdb::native::fdb_error_t e) { +void fdb_check(fdb::Error e) { if (e) { - fmt::print(stderr, "Unexpected FDB error: {}({})\n", e, fdb::native::fdb_get_error(e)); + fmt::print(stderr, "Unexpected FDB error: {}({})\n", e.code(), e.what()); std::abort(); } } @@ -398,17 +398,17 @@ int main(int argc, char** argv) { } randomizeOptions(options); - fdb_check(fdb::native::fdb_select_api_version(options.apiVersion)); + fdb::selectApiVersion(options.apiVersion); applyNetworkOptions(options); - fdb_check(fdb::native::fdb_setup_network()); + fdb::network::setup(); - std::thread network_thread{ &fdb::native::fdb_run_network }; + std::thread network_thread{ &fdb::network::run }; if (!runWorkloads(options)) { retCode = 1; } - fdb_check(fdb::native::fdb_stop_network()); + fdb_check(fdb::network::stop()); network_thread.join(); } catch (const std::runtime_error& err) { fmt::print(stderr, "ERROR: {}\n", err.what()); diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index 94e192d857..0365f3a9e2 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -114,7 +114,7 @@ public: bool retryable() const noexcept { return native::fdb_error_predicate(FDB_ERROR_PREDICATE_RETRYABLE, err) != 0; } - static Error success() { return Error(error_code_success); } + static Error success() { return Error(); } private: CodeType err; @@ -283,11 +283,7 @@ inline void setOption(FDBNetworkOption option, BytesRef str) { } inline void setOption(FDBNetworkOption option, CharsRef str) { - if (auto err = setOptionNothrow(option, str)) { - throwError(fmt::format("ERROR: fdb_network_set_option({}): ", - static_cast>(option)), - err); - } + setOption(option, toBytesRef(str)); } inline void setOption(FDBNetworkOption option, int64_t value) { @@ -300,11 +296,7 @@ inline void setOption(FDBNetworkOption option, int64_t value) { } inline void setOption(FDBNetworkOption option) { - if (auto err = setOptionNothrow(option)) { - throwError(fmt::format("ERROR: fdb_network_set_option({}): ", - static_cast>(option)), - err); - } + setOption(option, ""); } inline Error setupNothrow() noexcept { @@ -556,21 +548,9 @@ public: } } - void setOption(FDBTransactionOption option, CharsRef str) { - if (auto err = setOptionNothrow(option, str)) { - throwError(fmt::format("transaction_set_option({}) returned error: ", - static_cast>(option)), - err); - } - } + void setOption(FDBTransactionOption option, CharsRef str) { setOption(option, toBytesRef(str)); } - void setOption(FDBTransactionOption option) { - if (auto err = setOptionNothrow(option)) { - throwError(fmt::format("transaction_set_option({}) returned error: ", - static_cast>(option)), - err); - } - } + void setOption(FDBTransactionOption option) { setOption(option, ""); } TypedFuture getReadVersion() { return native::fdb_transaction_get_read_version(tr.get()); } From 418205eeab48c90de9f2dc1ad5331d804246f456 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Mon, 23 May 2022 11:55:35 -0700 Subject: [PATCH 30/49] Use bindings for brevity in DDTC UT and a few places where server_info is iterated over --- fdbserver/DDTeamCollection.actor.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fdbserver/DDTeamCollection.actor.cpp b/fdbserver/DDTeamCollection.actor.cpp index f4a5175dce..880212913b 100644 --- a/fdbserver/DDTeamCollection.actor.cpp +++ b/fdbserver/DDTeamCollection.actor.cpp @@ -190,8 +190,8 @@ public: } bool foundSrc = false; - for (int i = 0; i < req.src.size(); i++) { - if (self->server_info.count(req.src[i])) { + for (const auto& id : req.src) { + if (self->server_info.count(id)) { foundSrc = true; break; } @@ -516,7 +516,7 @@ public: } } - for (auto& [serverID, server] : self->server_info) { + for (const auto& [serverID, server] : self->server_info) { if (!self->server_status.get(serverID).isUnhealthy()) { ++serverCount; LocalityData const& serverLocation = server->getLastKnownInterface().locality; @@ -5281,8 +5281,8 @@ public: ASSERT(result >= 8); - for (auto process = collection->server_info.begin(); process != collection->server_info.end(); process++) { - auto teamCount = process->second->getTeams().size(); + for (const auto& [serverID, server] : collection->server_info) { + auto teamCount = server->getTeams().size(); ASSERT(teamCount >= 1); // ASSERT(teamCount <= targetTeamsPerServer); } @@ -5319,8 +5319,8 @@ public: // We need to guarantee a server always have at least a team so that the server can participate in data // distribution - for (auto process = collection->server_info.begin(); process != collection->server_info.end(); process++) { - auto teamCount = process->second->getTeams().size(); + for (const auto& [serverID, server] : collection->server_info) { + auto teamCount = server->getTeams().size(); ASSERT(teamCount >= 1); } @@ -5370,7 +5370,7 @@ public: wait(collection->getTeam(req)); - auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); + const auto [resTeam, srcFound] = req.reply.getFuture().get(); std::set expectedServers{ UID(1, 0), UID(2, 0), UID(3, 0) }; ASSERT(resTeam.present()); @@ -5422,7 +5422,7 @@ public: wait(collection->getTeam(req)); - auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); + const auto [resTeam, srcFound] = req.reply.getFuture().get(); std::set expectedServers{ UID(2, 0), UID(3, 0), UID(4, 0) }; ASSERT(resTeam.present()); @@ -5472,7 +5472,7 @@ public: wait(collection->getTeam(req)); - auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); + const auto [resTeam, srcFound] = req.reply.getFuture().get(); std::set expectedServers{ UID(2, 0), UID(3, 0), UID(4, 0) }; ASSERT(resTeam.present()); @@ -5521,7 +5521,7 @@ public: wait(collection->getTeam(req)); - auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); + const auto [resTeam, srcFound] = req.reply.getFuture().get(); std::set expectedServers{ UID(1, 0), UID(2, 0), UID(3, 0) }; ASSERT(resTeam.present()); @@ -5572,7 +5572,7 @@ public: wait(collection->getTeam(req)); - auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); + const auto [resTeam, srcFound] = req.reply.getFuture().get(); ASSERT(!resTeam.present()); @@ -5628,7 +5628,7 @@ public: wait(collection->getTeam(req)); - auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); + const auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); ASSERT(!resTeam.present()); @@ -5746,7 +5746,7 @@ public: wait(collection->getTeam(req)); - auto& [resTeam, srcTeamFound] = req.reply.getFuture().get(); + const auto [resTeam, srcFound] = req.reply.getFuture().get(); std::set expectedServers{ UID(1, 0), UID(2, 0), UID(3, 0) }; ASSERT(resTeam.present()); From 990c789a5cb145c44d66a4b464a13ef3c9f97808 Mon Sep 17 00:00:00 2001 From: "Bharadwaj V.R" Date: Mon, 6 Jun 2022 13:13:11 -0700 Subject: [PATCH 31/49] Increase quiet-database timeout when buggify is on; data-movements in simulation take longer than the timeout allows, and waiting for quiet-database does succeed when given some more time (#7290) --- fdbserver/QuietDatabase.actor.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fdbserver/QuietDatabase.actor.cpp b/fdbserver/QuietDatabase.actor.cpp index 9b4fed0f7b..d0acae8293 100644 --- a/fdbserver/QuietDatabase.actor.cpp +++ b/fdbserver/QuietDatabase.actor.cpp @@ -39,6 +39,7 @@ #include "fdbclient/ManagementAPI.actor.h" #include #include "flow/actorcompiler.h" // This must be the last #include. +#include "flow/flow.h" ACTOR Future> getWorkers(Reference const> dbInfo, int flags = 0) { loop { @@ -672,14 +673,18 @@ ACTOR Future reconfigureAfter(Database cx, struct QuietDatabaseChecker { double start = now(); - constexpr static double maxDDRunTime = 1000.0; + double maxDDRunTime; + + QuietDatabaseChecker(double maxDDRunTime) : maxDDRunTime(maxDDRunTime) {} struct Impl { double start; std::string const& phase; + double maxDDRunTime; std::vector failReasons; - Impl(double start, const std::string& phase) : start(start), phase(phase) {} + Impl(double start, const std::string& phase, const double maxDDRunTime) + : start(start), phase(phase), maxDDRunTime(maxDDRunTime) {} template > Impl& add(BaseTraceEvent& evt, @@ -719,7 +724,7 @@ struct QuietDatabaseChecker { }; Impl startIteration(std::string const& phase) const { - Impl res(start, phase); + Impl res(start, phase, maxDDRunTime); return res; } }; @@ -735,7 +740,7 @@ ACTOR Future waitForQuietDatabase(Database cx, int64_t maxDataDistributionQueueSize = 0, int64_t maxPoppedVersionLag = 30e6, int64_t maxVersionOffset = 1e6) { - state QuietDatabaseChecker checker; + state QuietDatabaseChecker checker(isBuggifyEnabled(BuggifyType::General) ? 1500.0 : 1000.0); state Future reconfig = reconfigureAfter(cx, 100 + (deterministicRandom()->random01() * 100), dbInfo, "QuietDatabase"); state Future dataInFlight; From bd47f390bddc5cca9f81fde48e527970f4075407 Mon Sep 17 00:00:00 2001 From: Dan Adkins <105679810+sfc-gh-dadkins@users.noreply.github.com> Date: Mon, 6 Jun 2022 16:14:49 -0400 Subject: [PATCH 32/49] Add simulation test for three_data_hall configuration (#7305) * Add simulation test for 1 data hall + 1 machine failure case. * Disable BUGGIFY for DEGRADED_RESET_INTERVAL. A simulation test discovered a situation where machines attempting to connect to a dead coordinator (with a well-known endpoint) were getting themselves marked degraded. This flapping of the degraded state prevented recovery from completing, as it started over any time it noticed that tlogs on degraded hosts could be relocated to non-degraded ones. bin/fdbserver -r simulation -f tests/rare/CycleWithDeadHall.toml -b on -s 276841956 --- fdbclient/ServerKnobs.cpp | 2 +- fdbrpc/sim2.actor.cpp | 19 ++++++ fdbrpc/simulator.h | 4 ++ .../workloads/MachineAttrition.actor.cpp | 25 ++++++-- tests/CMakeLists.txt | 1 + tests/rare/CycleWithDeadHall.toml | 58 +++++++++++++++++++ 6 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 tests/rare/CycleWithDeadHall.toml diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index f39a56639e..c80b77f648 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -734,7 +734,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( WORKER_LOGGING_INTERVAL, 5.0 ); init( HEAP_PROFILER_INTERVAL, 30.0 ); init( UNKNOWN_CC_TIMEOUT, 600.0 ); - init( DEGRADED_RESET_INTERVAL, 24*60*60 ); if ( randomize && BUGGIFY ) DEGRADED_RESET_INTERVAL = 10; + init( DEGRADED_RESET_INTERVAL, 24*60*60 ); // FIXME: short interval causes false positive degraded state to flap, e.g. when everyone tries and fails to connect to dead coordinator: if ( randomize && BUGGIFY ) DEGRADED_RESET_INTERVAL = 10; init( DEGRADED_WARNING_LIMIT, 1 ); init( DEGRADED_WARNING_RESET_DELAY, 7*24*60*60 ); init( TRACE_LOG_FLUSH_FAILURE_CHECK_INTERVAL_SECONDS, 10 ); diff --git a/fdbrpc/sim2.actor.cpp b/fdbrpc/sim2.actor.cpp index 87e7e723e1..bf9fe0fa52 100644 --- a/fdbrpc/sim2.actor.cpp +++ b/fdbrpc/sim2.actor.cpp @@ -1681,6 +1681,25 @@ public: } return result; } + bool killDataHall(Optional> dataHallId, + KillType kt, + bool forceKill, + KillType* ktFinal) override { + auto processes = getAllProcesses(); + std::set>> dataHallMachines; + for (auto& process : processes) { + if (process->locality.dataHallId() == dataHallId) { + dataHallMachines.insert(process->locality.machineId()); + } + } + bool result = false; + for (auto& machineId : dataHallMachines) { + if (killMachine(machineId, kt, forceKill, ktFinal)) { + result = true; + } + } + return result; + } bool killMachine(Optional> machineId, KillType kt, bool forceKill, diff --git a/fdbrpc/simulator.h b/fdbrpc/simulator.h index b584acb86d..eb7b4d65ec 100644 --- a/fdbrpc/simulator.h +++ b/fdbrpc/simulator.h @@ -266,6 +266,10 @@ public: KillType kt, bool forceKill = false, KillType* ktFinal = nullptr) = 0; + virtual bool killDataHall(Optional> dcId, + KillType kt, + bool forceKill = false, + KillType* ktFinal = nullptr) = 0; // virtual KillType getMachineKillState( UID zoneID ) = 0; virtual bool canKillProcesses(std::vector const& availableProcesses, std::vector const& deadProcesses, diff --git a/fdbserver/workloads/MachineAttrition.actor.cpp b/fdbserver/workloads/MachineAttrition.actor.cpp index 42f951dd37..e2a1f5492c 100644 --- a/fdbserver/workloads/MachineAttrition.actor.cpp +++ b/fdbserver/workloads/MachineAttrition.actor.cpp @@ -273,12 +273,11 @@ struct MachineAttritionWorkload : TestWorkload { } ACTOR static Future machineKillWorker(MachineAttritionWorkload* self, double meanDelay, Database cx) { - state int killedMachines = 0; - state double delayBeforeKill = deterministicRandom()->random01() * meanDelay; - ASSERT(g_network->isSimulated()); + state double delayBeforeKill; if (self->killDc) { + delayBeforeKill = deterministicRandom()->random01() * meanDelay; wait(delay(delayBeforeKill)); // decide on a machine to kill @@ -303,7 +302,20 @@ struct MachineAttritionWorkload : TestWorkload { .detail("KillType", kt); g_simulator.killDataCenter(target, kt); + } else if (self->killDatahall) { + delayBeforeKill = deterministicRandom()->random01() * meanDelay; + wait(delay(delayBeforeKill)); + + // It only makes sense to kill a single data hall. + ASSERT(self->targetIds.size() == 1); + auto target = self->targetIds.front(); + + auto kt = ISimulator::KillInstantly; + TraceEvent("Assassination").detail("TargetDataHall", target).detail("KillType", kt); + + g_simulator.killDataHall(target, kt); } else { + state int killedMachines = 0; while (killedMachines < self->machinesToKill && self->machines.size() > self->machinesToLeave) { TraceEvent("WorkerKillBegin") .detail("KilledMachines", killedMachines) @@ -312,6 +324,7 @@ struct MachineAttritionWorkload : TestWorkload { .detail("Machines", self->machines.size()); TEST(true); // Killing a machine + delayBeforeKill = deterministicRandom()->random01() * meanDelay; wait(delay(delayBeforeKill)); TraceEvent("WorkerKillAfterDelay").log(); @@ -385,8 +398,12 @@ struct MachineAttritionWorkload : TestWorkload { } killedMachines++; - if (!self->replacement) + if (self->replacement) { + // Replace by reshuffling, since we always pick from the back. + deterministicRandom()->randomShuffle(self->machines); + } else { self->machines.pop_back(); + } wait(delay(meanDelay - delayBeforeKill) && success(self->ignoreSSFailures)); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8a6163f049..1a5a0df6f1 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -206,6 +206,7 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES rare/ConflictRangeRYOWCheck.toml) add_fdb_test(TEST_FILES rare/CycleRollbackClogged.toml) add_fdb_test(TEST_FILES rare/CycleWithKills.toml) + add_fdb_test(TEST_FILES rare/CycleWithDeadHall.toml) add_fdb_test(TEST_FILES rare/FuzzTest.toml) add_fdb_test(TEST_FILES rare/HighContentionPrefixAllocator.toml) add_fdb_test(TEST_FILES rare/InventoryTestHeavyWrites.toml) diff --git a/tests/rare/CycleWithDeadHall.toml b/tests/rare/CycleWithDeadHall.toml new file mode 100644 index 0000000000..a15bacfdbe --- /dev/null +++ b/tests/rare/CycleWithDeadHall.toml @@ -0,0 +1,58 @@ +# Attempt to reproduce failures which occur in three_data_hall mode +# when one data hall is down and other machines are being rebooted. +# +# three_data_hall is supposed to tolerate the failure of one data hall +# plus one other machine. +# +# CONFIGURATION NOTES +# +# For the simulated test setup, there is currently no way to configure +# three data halls within one data center. Instead, we need to specify +# three data centers, since the simulated setup will place one data +# hall in each data center. +# +# We also need to disable 'generateFearless', since that option will +# sometimes generate configs with a satellite data center, and we have +# a policy of not placing tlogs there. It's impossible to place tlogs +# in a way that satisfies the three_data_hall contstraints. +[configuration] +config = 'three_data_hall' +datacenters = 3 +generateFearless = false + +[[test]] +testTitle = 'Two out of Three Data Halls' + + # Baseline workload during test. + [[test.workload]] + testName = 'Cycle' + transactionsPerSecond = 2500.0 + testDuration = 30.0 + expectedRate = 0.01 + + # Immediately take down a data hall. + [[test.workload]] + testName = 'Attrition' + killDatahall = true + killDc = false + machinesToKill = 1 + targetIds = 1 + testDuration = 0 + + # Continuously reboot machines. + # (waitForVersion ensures the cluster recovers between reboots.) + [[test.workload]] + testName = 'Attrition' + testDuration = 30.0 + machinesToKill = 300 # causes the mean delay to be 30s/300 = 100ms. + reboot = true # reboot, don't kill. + replacement = true # yes, we can reboot the same machine again. + waitForVersion = true # wait for the cluster to reform between reboots. + + # Consistency checks won't pass with one data hall missing. + # Change to fallback mode after the test as a workaround. + [[test.workload]] + testName = 'ChangeConfig' + configMode = 'three_data_hall_fallback' + minDelayBeforeChange = 30.0 + maxDelayBeforeChange = 30.0 From 043bc411eef3e87c46fab62e5ec9ca115114ee60 Mon Sep 17 00:00:00 2001 From: Clement Pang Date: Mon, 6 Jun 2022 13:15:33 -0700 Subject: [PATCH 33/49] Make KeyArrayResult constructor public. (#7308) --- .../java/src/main/com/apple/foundationdb/KeyArrayResult.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/java/src/main/com/apple/foundationdb/KeyArrayResult.java b/bindings/java/src/main/com/apple/foundationdb/KeyArrayResult.java index 26ceae306e..ab7961c511 100644 --- a/bindings/java/src/main/com/apple/foundationdb/KeyArrayResult.java +++ b/bindings/java/src/main/com/apple/foundationdb/KeyArrayResult.java @@ -26,7 +26,7 @@ import java.util.List; public class KeyArrayResult { final List keys; - KeyArrayResult(byte[] keyBytes, int[] keyLengths) { + public KeyArrayResult(byte[] keyBytes, int[] keyLengths) { int count = keyLengths.length; keys = new ArrayList(count); From 5f1a061e3a548d0e4e93697eef928175ce03cd4e Mon Sep 17 00:00:00 2001 From: Yao Xiao <87789492+yao-xiao-github@users.noreply.github.com> Date: Mon, 6 Jun 2022 14:27:41 -0700 Subject: [PATCH 34/49] Disable rocksdb metrics. (#7327) --- fdbserver/KeyValueStoreShardedRocksDB.actor.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp b/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp index 99e00caca2..f829733e8b 100644 --- a/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp @@ -286,9 +286,7 @@ rocksdb::Options getOptions() { options.IncreaseParallelism(SERVER_KNOBS->ROCKSDB_BACKGROUND_PARALLELISM); } - options.statistics = rocksdb::CreateDBStatistics(); - options.statistics->set_stats_level(rocksdb::kExceptHistogramOrTimers); - + // TODO: enable rocksdb metrics. options.db_log_dir = SERVER_KNOBS->LOG_DIRECTORY; return options; } From e5790380189052bcdf57e69bf7cee557f3a39da4 Mon Sep 17 00:00:00 2001 From: Kevin Hoxha Date: Mon, 6 Jun 2022 18:19:31 -0700 Subject: [PATCH 35/49] Add DDSketch to mako (#7167) * Add logic for DDSketch in mako * Return double from percentile() and fix crash in deserialize() * make sure to serialize and print result from mergeSketchReport() * clean up comments * move ddsketch into its own file * remove LatencySampleBin and add DDSketch to ThreadStatistics * Update DDSketch implementation * remove assertions that cause circular references * add DDSketchMako as a subsclass from DDSketch * Merge branch 'ddsketch_mako' of github.com:sfc-gh-khoxha/foundationdb into ddsketch_mako * Revert "Merge branch 'ddsketch_mako' of github.com:sfc-gh-khoxha/foundationdb into ddsketch_mako" This reverts commit cc29a68aefd1b385b563bfbaa09a32e399c0d233. * add ddsketch mako class and rename export flag * remove redundant decimal roundings * print max/min/avg from ddsketch * remove latency sample bin completly * Make ThreadStatistics dump latency to a file and read from file in printReport() * make sure to add latency data from file to final stats * change mergeSketchReport to use new ThreadStatistics serialization (1) * use C-style string arrays in Arguments instead of std::string * remove unused header * only serialize non-empty sketches * fix CentOS build error * Update report file count properly * avoid deserializing empty sketches * fix segmentation fault when getting file name for export_sketch_path * make sure to properly add file to report_files list * fix printing bugs when running in report mode * fix incorrect insertion of report files * don't use range based loop for char array * don't reset args.num_report_files * Update the usage info for new options * switch to using std::vector for sketches instead of std::array * make sure to use true/false instead of 1/0 for booleans * remove op_name if not being used * remove fp code in dumpThreadSamples * replace lambda with function in printReport * merge and print stats in seperate functions * make sure to exit after printing report * address review feedback * make defaultMin, defaultMax static and move setBucketSize to protected * switch to reverse iterators when moving backwards along bucket array --- bindings/c/test/mako/async.cpp | 7 - bindings/c/test/mako/async.hpp | 2 - bindings/c/test/mako/ddsketch.hpp | 275 +++++++++++++ bindings/c/test/mako/mako.cpp | 621 ++++++++++++++++-------------- bindings/c/test/mako/mako.hpp | 6 + bindings/c/test/mako/stats.hpp | 233 +++++++---- 6 files changed, 773 insertions(+), 371 deletions(-) create mode 100644 bindings/c/test/mako/ddsketch.hpp diff --git a/bindings/c/test/mako/async.cpp b/bindings/c/test/mako/async.cpp index 79ffc23de9..a49fa65d34 100644 --- a/bindings/c/test/mako/async.cpp +++ b/bindings/c/test/mako/async.cpp @@ -70,8 +70,6 @@ void ResumableStateForPopulate::runOneTick() { const auto tx_duration = watch_tx.diff(); stats.addLatency(OP_COMMIT, commit_latency); stats.addLatency(OP_TRANSACTION, tx_duration); - sample_bins[OP_COMMIT].put(commit_latency); - sample_bins[OP_TRANSACTION].put(tx_duration); } stats.incrOpCount(OP_COMMIT); stats.incrOpCount(OP_TRANSACTION); @@ -190,7 +188,6 @@ void ResumableStateForRunWorkload::updateStepStats() { const auto step_latency = watch_step.diff(); if (do_sample) { stats.addLatency(OP_COMMIT, step_latency); - sample_bins[OP_COMMIT].put(step_latency); } tx.reset(); stats.incrOpCount(OP_COMMIT); @@ -204,7 +201,6 @@ void ResumableStateForRunWorkload::updateStepStats() { if (do_sample) { const auto op_latency = watch_op.diff(); stats.addLatency(iter.op, op_latency); - sample_bins[iter.op].put(op_latency); } stats.incrOpCount(iter.op); } @@ -248,8 +244,6 @@ void ResumableStateForRunWorkload::onTransactionSuccess() { const auto tx_duration = watch_tx.diff(); stats.addLatency(OP_COMMIT, commit_latency); stats.addLatency(OP_TRANSACTION, tx_duration); - sample_bins[OP_COMMIT].put(commit_latency); - sample_bins[OP_TRANSACTION].put(tx_duration); } stats.incrOpCount(OP_COMMIT); stats.incrOpCount(OP_TRANSACTION); @@ -270,7 +264,6 @@ void ResumableStateForRunWorkload::onTransactionSuccess() { if (stats.getOpCount(OP_TRANSACTION) % args.sampling == 0) { const auto tx_duration = watch_tx.diff(); stats.addLatency(OP_TRANSACTION, tx_duration); - sample_bins[OP_TRANSACTION].put(tx_duration); } stats.incrOpCount(OP_TRANSACTION); watch_tx.startFromStop(); diff --git a/bindings/c/test/mako/async.hpp b/bindings/c/test/mako/async.hpp index 643b31081f..89f4aeb616 100644 --- a/bindings/c/test/mako/async.hpp +++ b/bindings/c/test/mako/async.hpp @@ -42,7 +42,6 @@ struct ResumableStateForPopulate : std::enable_shared_from_this& stopcount; - LatencySampleBinArray sample_bins; int key_begin; int key_end; int key_checkpoint; @@ -84,7 +83,6 @@ struct ResumableStateForRunWorkload : std::enable_shared_from_this const& signal; int max_iters; OpIterator iter; - LatencySampleBinArray sample_bins; fdb::ByteString key1; fdb::ByteString key2; fdb::ByteString val; diff --git a/bindings/c/test/mako/ddsketch.hpp b/bindings/c/test/mako/ddsketch.hpp new file mode 100644 index 0000000000..4c0eb01f99 --- /dev/null +++ b/bindings/c/test/mako/ddsketch.hpp @@ -0,0 +1,275 @@ +/* + * DDSketch.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2020 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DDSKETCH_H +#define DDSKETCH_H +#include +#include +#include +#pragma once + +#include +#include +#include +#include + +// A namespace for fast log() computation. +namespace fastLogger { +// Basically, the goal is to compute log(x)/log(r). +// For double, it is represented as 2^e*(1+s) (0<=s<1), so our goal becomes +// e*log(2)/log(r)*log(1+s), and we approximate log(1+s) with a cubic function. +// See more details on Datadog's paper, or CubicallyInterpolatedMapping.java in +// https://github.com/DataDog/sketches-java/ +inline const double correctingFactor = 1.00988652862227438516; // = 7 / (10 * log(2)); +constexpr inline const double A = 6.0 / 35.0, B = -3.0 / 5.0, C = 10.0 / 7.0; + +inline double fastlog(double value) { + int e; + double s = frexp(value, &e); + s = s * 2 - 1; + return ((A * s + B) * s + C) * s + e - 1; +} + +inline double reverseLog(double index) { + long exponent = floor(index); + // Derived from Cardano's formula + double d0 = B * B - 3 * A * C; + double d1 = 2 * B * B * B - 9 * A * B * C - 27 * A * A * (index - exponent); + double p = cbrt((d1 - sqrt(d1 * d1 - 4 * d0 * d0 * d0)) / 2); + double significandPlusOne = -(B + p + d0 / p) / (3 * A) + 1; + return ldexp(significandPlusOne / 2, exponent + 1); +} +} // namespace fastLogger + +// DDSketch for non-negative numbers (those < EPS = 10^-18 are +// treated as 0, and huge numbers (>1/EPS) fail ASSERT). This is the base +// class without a concrete log() implementation. +template +class DDSketchBase { + + static constexpr T defaultMin() { return std::numeric_limits::max(); } + + static constexpr T defaultMax() { + if constexpr (std::is_floating_point_v) { + return -std::numeric_limits::max(); + } else { + return std::numeric_limits::min(); + } + } + +public: + explicit DDSketchBase(double errorGuarantee) + : errorGuarantee(errorGuarantee), populationSize(0), zeroPopulationSize(0), minValue(defaultMin()), + maxValue(defaultMax()), sum(T()) {} + + DDSketchBase& addSample(T sample) { + // Call it addSample for now, while it is not a sample anymore + if (!populationSize) + minValue = maxValue = sample; + + if (sample <= EPS) { + zeroPopulationSize++; + } else { + int index = static_cast(this)->getIndex(sample); + assert(index >= 0 && index < int(buckets.size())); + buckets[index]++; + } + + populationSize++; + sum += sample; + maxValue = std::max(maxValue, sample); + minValue = std::min(minValue, sample); + return *this; + } + + double mean() const { + if (populationSize == 0) + return 0; + return (double)sum / populationSize; + } + + T median() { return percentile(0.5); } + + T percentile(double percentile) { + assert(percentile >= 0 && percentile <= 1); + + if (populationSize == 0) + return T(); + uint64_t targetPercentilePopulation = percentile * (populationSize - 1); + // Now find the tPP-th (0-indexed) element + if (targetPercentilePopulation < zeroPopulationSize) + return T(0); + + int index = -1; + bool found = false; + if (percentile <= 0.5) { // count up + uint64_t count = zeroPopulationSize; + for (size_t i = 0; i < buckets.size(); i++) { + if (targetPercentilePopulation < count + buckets[i]) { + // count + buckets[i] = # of numbers so far (from the rightmost to + // this bucket, inclusive), so if target is in this bucket, it should + // means tPP < cnt + bck[i] + found = true; + index = i; + break; + } + count += buckets[i]; + } + } else { // and count down + uint64_t count = 0; + for (auto rit = buckets.rbegin(); rit != buckets.rend(); rit++) { + if (targetPercentilePopulation + count + *rit >= populationSize) { + // cnt + bkt[i] is # of numbers to the right of this bucket (incl.), + // so if target is not in this bucket (i.e., to the left of this + // bucket), it would be as right as the left bucket's rightmost + // number, so we would have tPP + cnt + bkt[i] < total population (tPP + // is 0-indexed), that means target is in this bucket if this + // condition is not satisfied. + found = true; + index = std::distance(rit, buckets.rend()) - 1; + break; + } + count += *rit; + } + } + assert(found); + return static_cast(this)->getValue(index); + } + + T min() const { return minValue; } + T max() const { return maxValue; } + + void clear() { + std::fill(buckets.begin(), buckets.end(), 0); + populationSize = zeroPopulationSize = 0; + sum = 0; + minValue = defaultMin(); + maxValue = defaultMax(); + } + + uint64_t getPopulationSize() const { return populationSize; } + + double getErrorGurantee() const { return errorGuarantee; } + + DDSketchBase& mergeWith(const DDSketchBase& anotherSketch) { + // Must have the same guarantee + assert(fabs(errorGuarantee - anotherSketch.errorGuarantee) < EPS && + anotherSketch.buckets.size() == buckets.size()); + for (size_t i = 0; i < anotherSketch.buckets.size(); i++) { + buckets[i] += anotherSketch.buckets[i]; + } + populationSize += anotherSketch.populationSize; + zeroPopulationSize += anotherSketch.zeroPopulationSize; + minValue = std::min(minValue, anotherSketch.minValue); + maxValue = std::max(maxValue, anotherSketch.maxValue); + sum += anotherSketch.sum; + return *this; + } + + constexpr static double EPS = 1e-18; // smaller numbers are considered as 0 +protected: + double errorGuarantee; // As defined in the paper + + uint64_t populationSize, zeroPopulationSize; // we need to separately count 0s + std::vector buckets; + T minValue, maxValue, sum; + void setBucketSize(int capacity) { buckets.resize(capacity, 0); } +}; + +// DDSketch with fast log implementation for float numbers +template +class DDSketch : public DDSketchBase, T> { +public: + explicit DDSketch(double errorGuarantee = 0.005) + : DDSketchBase, T>(errorGuarantee), gamma((1.0 + errorGuarantee) / (1.0 - errorGuarantee)), + multiplier(fastLogger::correctingFactor * log(2) / log(gamma)) { + offset = getIndex(1.0 / DDSketchBase, T>::EPS); + this->setBucketSize(2 * offset); + } + + int getIndex(T sample) { + static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems"); + return ceil(fastLogger::fastlog(sample) * multiplier) + offset; + } + + T getValue(int index) { return fastLogger::reverseLog((index - offset) / multiplier) * 2.0 / (1 + gamma); } + +private: + double gamma, multiplier; + int offset = 0; +}; + +// DDSketch with log. Slow and only use this when others doesn't work. +template +class DDSketchSlow : public DDSketchBase, T> { +public: + DDSketchSlow(double errorGuarantee = 0.1) + : DDSketchBase, T>(errorGuarantee), gamma((1.0 + errorGuarantee) / (1.0 - errorGuarantee)), + logGamma(log(gamma)) { + offset = getIndex(1.0 / DDSketchBase, T>::EPS) + 5; + this->setBucketSize(2 * offset); + } + + int getIndex(T sample) { return ceil(log(sample) / logGamma) + offset; } + + T getValue(int index) { return (T)(2.0 * pow(gamma, (index - offset)) / (1 + gamma)); } + +private: + double gamma, logGamma; + int offset = 0; +}; + +// DDSketch for unsigned int. Faster than the float version. Fixed accuracy. +class DDSketchFastUnsigned : public DDSketchBase { +public: + DDSketchFastUnsigned() : DDSketchBase(errorGuarantee) { this->setBucketSize(129); } + + int getIndex(unsigned sample) { + __uint128_t v = sample; + v *= v; + v *= v; // sample^4 + uint64_t low = (uint64_t)v, high = (uint64_t)(v >> 64); + + return 128 - (high == 0 ? ((low == 0 ? 64 : __builtin_clzll(low)) + 64) : __builtin_clzll(high)); + } + + unsigned getValue(int index) { + double r = 1, g = gamma; + while (index) { // quick power method for power(gamma, index) + if (index & 1) + r *= g; + g *= g; + index >>= 1; + } + // 2.0 * pow(gamma, index) / (1 + gamma) is what we need + return (unsigned)(2.0 * r / (1 + gamma) + 0.5); // round to nearest int + } + +private: + constexpr static double errorGuarantee = 0.08642723372; + // getIndex basically calc floor(log_2(x^4)) + 1, + // which is almost ceil(log_2(x^4)) as it only matters when x is a power of 2, + // and it does not change the error bound. Original sketch asks for + // ceil(log_r(x)), so we know r = pow(2, 1/4) = 1.189207115. And r = (1 + eG) + // / (1 - eG) so eG = 0.08642723372. + constexpr static double gamma = 1.189207115; +}; + +#endif \ No newline at end of file diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 028efc216a..1abf249793 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -18,12 +18,14 @@ * limitations under the License. */ +#include #include #include #include #include #include #include +#include #include #include #include @@ -43,6 +45,7 @@ #include #include #include +#include #include "fdbclient/zipf.h" #include "async.hpp" @@ -63,7 +66,6 @@ struct alignas(64) ThreadArgs { int worker_id; int thread_id; pid_t parent_id; - LatencySampleBinArray sample_bins; Arguments const* args; shared_memory::Access shm; fdb::Database database; // database to work with @@ -121,8 +123,7 @@ int populate(Transaction tx, int worker_id, int thread_id, int thread_tps, - ThreadStatistics& stats, - LatencySampleBinArray& sample_bins) { + ThreadStatistics& stats) { const auto key_begin = insertBegin(args.rows, worker_id, thread_id, args.num_processes, args.num_threads); const auto key_end = insertEnd(args.rows, worker_id, thread_id, args.num_processes, args.num_threads); auto xacts = 0; @@ -197,8 +198,6 @@ int populate(Transaction tx, const auto tx_duration = watch_tx.diff(); stats.addLatency(OP_COMMIT, commit_latency); stats.addLatency(OP_TRANSACTION, tx_duration); - sample_bins[OP_COMMIT].put(commit_latency); - sample_bins[OP_TRANSACTION].put(tx_duration); } stats.incrOpCount(OP_COMMIT); stats.incrOpCount(OP_TRANSACTION); @@ -219,7 +218,6 @@ int populate(Transaction tx, int runOneTransaction(Transaction& tx, Arguments const& args, ThreadStatistics& stats, - LatencySampleBinArray& sample_bins, ByteString& key1, ByteString& key2, ByteString& val) { @@ -271,7 +269,6 @@ transaction_begin: if (do_sample) { const auto step_latency = watch_step.diff(); stats.addLatency(OP_COMMIT, step_latency); - sample_bins[OP_COMMIT].put(step_latency); } tx.reset(); stats.incrOpCount(OP_COMMIT); @@ -286,7 +283,6 @@ transaction_begin: if (do_sample) { const auto op_latency = watch_op.diff(); stats.addLatency(op, op_latency); - sample_bins[op].put(op_latency); } stats.incrOpCount(op); } @@ -304,7 +300,6 @@ transaction_begin: if (do_sample) { const auto commit_latency = watch_commit.diff(); stats.addLatency(OP_COMMIT, commit_latency); - sample_bins[OP_COMMIT].put(commit_latency); } stats.incrOpCount(OP_COMMIT); } else { @@ -323,7 +318,6 @@ transaction_begin: // one transaction has completed successfully if (do_sample) { const auto tx_duration = watch_tx.stop().diff(); - sample_bins[OP_TRANSACTION].put(tx_duration); stats.addLatency(OP_TRANSACTION, tx_duration); } stats.incrOpCount(OP_TRANSACTION); @@ -339,7 +333,6 @@ int runWorkload(Transaction tx, int const thread_iters, std::atomic const& signal, ThreadStatistics& stats, - LatencySampleBinArray& sample_bins, int const dotrace, int const dotagging) { auto traceid = std::string{}; @@ -421,7 +414,7 @@ int runWorkload(Transaction tx, } } - rc = runOneTransaction(tx, args, stats, sample_bins, key1, key2, val); + rc = runOneTransaction(tx, args, stats, key1, key2, val); if (rc) { logr.warn("runOneTransaction failed ({})", rc); } @@ -446,11 +439,15 @@ std::string getStatsFilename(std::string_view dirname, int worker_id, int thread return fmt::format("{}/{}_{}_{}", dirname, worker_id + 1, thread_id + 1, opTable[op].name()); } +std::string getStatsFilename(std::string_view dirname, int worker_id, int thread_id) { + return fmt::format("{}/{}_{}", dirname, worker_id + 1, thread_id + 1); +} + void dumpThreadSamples(Arguments const& args, pid_t parent_id, int worker_id, int thread_id, - const LatencySampleBinArray& sample_bins, + const ThreadStatistics& stats, bool overwrite = true) { const auto dirname = fmt::format("{}{}", TEMP_DATA_STORE, parent_id); const auto rc = mkdir(dirname.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); @@ -460,14 +457,7 @@ void dumpThreadSamples(Arguments const& args, } for (auto op = 0; op < MAX_OP; op++) { if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { - const auto filename = getStatsFilename(dirname, worker_id, thread_id, op); - auto fp = fopen(filename.c_str(), overwrite ? "w" : "a"); - if (!fp) { - logr.error("fopen({}): {}", filename, strerror(errno)); - continue; - } - auto fclose_guard = ExitGuard([fp]() { fclose(fp); }); - sample_bins[op].forEachBlock([fp](auto ptr, auto count) { fwrite(ptr, sizeof(*ptr) * count, 1, fp); }); + stats.writeToFile(getStatsFilename(dirname, worker_id, thread_id, op), op); } } } @@ -481,7 +471,7 @@ void runAsyncWorkload(Arguments const& args, auto dump_samples = [&args, pid_main, worker_id](auto&& states) { auto overwrite = true; /* overwrite or append */ for (const auto& state : states) { - dumpThreadSamples(args, pid_main, worker_id, 0 /*thread_id*/, state->sample_bins, overwrite); + dumpThreadSamples(args, pid_main, worker_id, 0 /*thread_id*/, state->stats, overwrite); overwrite = false; } }; @@ -585,28 +575,25 @@ void workerThread(ThreadArgs& thread_args) { usleep(10000); /* 10ms */ } - auto& sample_bins = thread_args.sample_bins; - if (args.mode == MODE_CLEAN) { auto rc = cleanup(tx, args); if (rc < 0) { logr.error("cleanup failed"); } } else if (args.mode == MODE_BUILD) { - auto rc = populate(tx, args, worker_id, thread_id, thread_tps, stats, sample_bins); + auto rc = populate(tx, args, worker_id, thread_id, thread_tps, stats); if (rc < 0) { logr.error("populate failed"); } } else if (args.mode == MODE_RUN) { - auto rc = runWorkload( - tx, args, thread_tps, throttle_factor, thread_iters, signal, stats, sample_bins, dotrace, dotagging); + auto rc = runWorkload(tx, args, thread_tps, throttle_factor, thread_iters, signal, stats, dotrace, dotagging); if (rc < 0) { logr.error("runWorkload failed"); } } if (args.mode == MODE_BUILD || args.mode == MODE_RUN) { - dumpThreadSamples(args, parent_id, worker_id, thread_id, sample_bins); + dumpThreadSamples(args, parent_id, worker_id, thread_id, stats); } } @@ -743,13 +730,6 @@ int workerProcessMain(Arguments const& args, int worker_id, shared_memory::Acces this_args.args = &args; this_args.shm = shm; this_args.database = databases[i % args.num_databases]; - - /* for ops to run, pre-allocate one latency sample block */ - for (auto op = 0; op < MAX_OP; op++) { - if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { - this_args.sample_bins[op].reserveOneBlock(); - } - } worker_threads[i] = std::thread(workerThread, std::ref(this_args)); } /* wait for everyone to finish */ @@ -835,6 +815,7 @@ int initArguments(Arguments& args) { args.client_threads_per_version = 0; args.disable_ryw = 0; args.json_output_path[0] = '\0'; + args.stats_export_path[0] = '\0'; args.bg_materialize_files = false; args.bg_file_path[0] = '\0'; args.distributed_tracer_client = 0; @@ -994,7 +975,7 @@ void usage() { printf("%-24s %s\n", " --tpsinterval=SEC", "Specify the TPS change interval (Default: 10 seconds)"); printf("%-24s %s\n", " --tpschange=", "Specify the TPS change type (Default: sin)"); printf("%-24s %s\n", " --sampling=RATE", "Specify the sampling rate for latency stats"); - printf("%-24s %s\n", "-m, --mode=MODE", "Specify the mode (build, run, clean)"); + printf("%-24s %s\n", "-m, --mode=MODE", "Specify the mode (build, run, clean, report)"); printf("%-24s %s\n", "-z, --zipf", "Use zipfian distribution instead of uniform distribution"); printf("%-24s %s\n", " --commitget", "Commit GETs"); printf("%-24s %s\n", " --loggroup=LOGGROUP", "Set client logr group"); @@ -1016,6 +997,9 @@ void usage() { printf("%-24s %s\n", " --bg_file_path=PATH", "Read blob granule files from the local filesystem at PATH and materialize the results."); + printf("%-24s %s\n", + " --stats_export_path=PATH", + "Write the serialized DDSketch data to file at PATH. Can be used in either run or build mode."); printf( "%-24s %s\n", " --distributed_tracer_client=CLIENT", "Specify client (disabled, network_lossy, log_file)"); } @@ -1069,6 +1053,7 @@ int parseArguments(int argc, char* argv[], Arguments& args) { { "disable_ryw", no_argument, NULL, ARG_DISABLE_RYW }, { "json_report", optional_argument, NULL, ARG_JSON_REPORT }, { "bg_file_path", required_argument, NULL, ARG_BG_FILE_PATH }, + { "stats_export_path", optional_argument, NULL, ARG_EXPORT_PATH }, { "distributed_tracer_client", required_argument, NULL, ARG_DISTRIBUTED_TRACER_CLIENT }, { NULL, 0, NULL, 0 } }; @@ -1131,6 +1116,19 @@ int parseArguments(int argc, char* argv[], Arguments& args) { args.mode = MODE_BUILD; } else if (strcmp(optarg, "run") == 0) { args.mode = MODE_RUN; + } else if (strcmp(optarg, "report") == 0) { + args.mode = MODE_REPORT; + int i = optind; + for (; i < argc; i++) { + if (argv[i][0] != '-') { + const std::string report_file = argv[i]; + strncpy(args.report_files[args.num_report_files], report_file.c_str(), report_file.size()); + args.num_report_files++; + } else { + optind = i - 1; + break; + } + } } break; case ARG_ASYNC: @@ -1257,6 +1255,16 @@ int parseArguments(int argc, char* argv[], Arguments& args) { case ARG_BG_FILE_PATH: args.bg_materialize_files = true; strncpy(args.bg_file_path, optarg, std::min(sizeof(args.bg_file_path), strlen(optarg) + 1)); + case ARG_EXPORT_PATH: + if (optarg == NULL && (argv[optind] == NULL || (argv[optind] != NULL && argv[optind][0] == '-'))) { + char default_file[] = "sketch_data.json"; + strncpy(args.stats_export_path, default_file, sizeof(default_file)); + } else { + strncpy(args.stats_export_path, + argv[optind], + std::min(sizeof(args.stats_export_path), strlen(argv[optind]) + 1)); + } + break; case ARG_DISTRIBUTED_TRACER_CLIENT: if (strcmp(optarg, "disabled") == 0) { args.distributed_tracer_client = 0; @@ -1335,6 +1343,20 @@ int validateArguments(Arguments const& args) { return -1; } } + + // ensure that all of the files provided to mako are valid and exist + if (args.mode == MODE_REPORT) { + if (!args.num_report_files) { + logr.error("No files to merge"); + } + for (int i = 0; i < args.num_report_files; i++) { + struct stat buffer; + if (stat(args.report_files[i], &buffer) != 0) { + logr.error("Couldn't open file {}", args.report_files[i]); + return -1; + } + } + } if (args.distributed_tracer_client < 0) { logr.error("--disibuted_tracer_client must specify either (disabled, network_lossy, log_file)"); return -1; @@ -1447,6 +1469,248 @@ void printStatsHeader(Arguments const& args, bool show_commit, bool is_first_hea fmt::print("\n"); } +void printThreadStats(ThreadStatistics& final_stats, Arguments args, FILE* fp, bool is_report = false) { + + if (is_report) { + for (auto op = 0; op < MAX_OP; op++) { + if (final_stats.getLatencySampleCount(op) > 0 && op != OP_COMMIT && op != OP_TRANSACTION) { + args.txnspec.ops[op][OP_COUNT] = 1; + } + } + } + + fmt::print("Latency (us)"); + printStatsHeader(args, true, false, true); + + /* Total Samples */ + putTitle("Samples"); + bool first_op = true; + for (auto op = 0; op < MAX_OP; op++) { + if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { + auto sample_size = final_stats.getLatencySampleCount(op); + if (sample_size > 0) { + putField(sample_size); + } else { + putField("N/A"); + } + if (fp) { + if (first_op) { + first_op = false; + } else { + fmt::fprintf(fp, ","); + } + fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), sample_size); + } + } + } + fmt::print("\n"); + + /* Min Latency */ + if (fp) { + fmt::fprintf(fp, "}, \"minLatency\": {"); + } + putTitle("Min"); + first_op = true; + for (auto op = 0; op < MAX_OP; op++) { + if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { + const auto lat_min = final_stats.getLatencyUsMin(op); + if (lat_min == -1) { + putField("N/A"); + } else { + putField(lat_min); + if (fp) { + if (first_op) { + first_op = false; + } else { + fmt::fprintf(fp, ","); + } + fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), lat_min); + } + } + } + } + fmt::print("\n"); + + /* Avg Latency */ + if (fp) { + fmt::fprintf(fp, "}, \"avgLatency\": {"); + } + putTitle("Avg"); + first_op = true; + for (auto op = 0; op < MAX_OP; op++) { + if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { + if (final_stats.getLatencySampleCount(op) > 0) { + putField(final_stats.mean(op)); + if (fp) { + if (first_op) { + first_op = false; + } else { + fmt::fprintf(fp, ","); + } + fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), final_stats.mean(op)); + } + } else { + putField("N/A"); + } + } + } + fmt::printf("\n"); + + /* Max Latency */ + if (fp) { + fmt::fprintf(fp, "}, \"maxLatency\": {"); + } + putTitle("Max"); + first_op = true; + for (auto op = 0; op < MAX_OP; op++) { + if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { + const auto lat_max = final_stats.getLatencyUsMax(op); + if (lat_max == 0) { + putField("N/A"); + } else { + putField(lat_max); + if (fp) { + if (first_op) { + first_op = false; + } else { + fmt::fprintf(fp, ","); + } + fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), final_stats.getLatencyUsMax(op)); + } + } + } + } + fmt::print("\n"); + + /* Median Latency */ + if (fp) { + fmt::fprintf(fp, "}, \"medianLatency\": {"); + } + putTitle("Median"); + first_op = true; + for (auto op = 0; op < MAX_OP; op++) { + if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { + const auto lat_total = final_stats.getLatencyUsTotal(op); + const auto lat_samples = final_stats.getLatencySampleCount(op); + if (lat_total && lat_samples) { + auto median = final_stats.percentile(op, 0.5); + putField(median); + if (fp) { + if (first_op) { + first_op = false; + } else { + fmt::fprintf(fp, ","); + } + fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), median); + } + } else { + putField("N/A"); + } + } + } + fmt::print("\n"); + + /* 95%ile Latency */ + if (fp) { + fmt::fprintf(fp, "}, \"p95Latency\": {"); + } + putTitle("95.0 pctile"); + first_op = true; + for (auto op = 0; op < MAX_OP; op++) { + if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { + if (!final_stats.getLatencySampleCount(op) || !final_stats.getLatencyUsTotal(op)) { + putField("N/A"); + continue; + } + const auto point_95pct = final_stats.percentile(op, 0.95); + putField(point_95pct); + if (fp) { + if (first_op) { + first_op = false; + } else { + fmt::fprintf(fp, ","); + } + fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), point_95pct); + } + } + } + fmt::printf("\n"); + + /* 99%ile Latency */ + if (fp) { + fmt::fprintf(fp, "}, \"p99Latency\": {"); + } + putTitle("99.0 pctile"); + first_op = true; + for (auto op = 0; op < MAX_OP; op++) { + if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { + if (!final_stats.getLatencySampleCount(op) || !final_stats.getLatencyUsTotal(op)) { + putField("N/A"); + continue; + } + const auto point_99pct = final_stats.percentile(op, 0.99); + putField(point_99pct); + if (fp) { + if (first_op) { + first_op = false; + } else { + fmt::fprintf(fp, ","); + } + fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), point_99pct); + } + } + } + fmt::print("\n"); + + /* 99.9%ile Latency */ + if (fp) { + fmt::fprintf(fp, "}, \"p99.9Latency\": {"); + } + putTitle("99.9 pctile"); + first_op = true; + for (auto op = 0; op < MAX_OP; op++) { + if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { + if (!final_stats.getLatencySampleCount(op) || !final_stats.getLatencyUsTotal(op)) { + putField("N/A"); + continue; + } + const auto point_99_9pct = final_stats.percentile(op, 0.999); + putField(point_99_9pct); + if (fp) { + if (first_op) { + first_op = false; + } else { + fmt::fprintf(fp, ","); + } + fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), point_99_9pct); + } + } + } + fmt::print("\n"); + if (fp) { + fmt::fprintf(fp, "}}"); + } +} + +void loadSample(int pid_main, int op, std::vector& data_points, int process_id, int thread_id) { + const auto dirname = fmt::format("{}{}", TEMP_DATA_STORE, pid_main); + const auto filename = getStatsFilename(dirname, process_id, thread_id, op); + std::ifstream fp{ filename }; + std::ostringstream sstr; + sstr << fp.rdbuf(); + DDSketchMako sketch; + rapidjson::Document doc; + doc.Parse(sstr.str().c_str()); + if (!doc.HasParseError()) { + sketch.deserialize(doc); + if (data_points[op].getPopulationSize() > 0) { + data_points[op].mergeWith(sketch); + } else { + data_points[op] = sketch; + } + } +} + void printReport(Arguments const& args, ThreadStatistics const* stats, double const duration_sec, @@ -1520,7 +1784,7 @@ void printReport(Arguments const& args, putField(final_stats.getOpCount(op)); if (fp) { if (first_op) { - first_op = 0; + first_op = false; } else { fmt::fprintf(fp, ","); } @@ -1544,13 +1808,13 @@ void printReport(Arguments const& args, /* Errors */ putTitle("Errors"); - first_op = 1; + first_op = true; for (auto op = 0; op < MAX_OP; op++) { if (args.txnspec.ops[op][OP_COUNT] > 0 && op != OP_TRANSACTION) { putField(final_stats.getErrorCount(op)); if (fp) { if (first_op) { - first_op = 0; + first_op = false; } else { fmt::fprintf(fp, ","); } @@ -1563,262 +1827,29 @@ void printReport(Arguments const& args, } fmt::print("\n\n"); - fmt::print("Latency (us)"); - printStatsHeader(args, true, false, true); - - /* Total Samples */ - putTitle("Samples"); - first_op = 1; + // Get the sketches stored in file and merge them together + std::vector data_points(MAX_OP); for (auto op = 0; op < MAX_OP; op++) { - if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { - if (final_stats.getLatencyUsTotal(op)) { - putField(final_stats.getLatencySampleCount(op)); - } else { - putField("N/A"); - } - if (fp) { - if (first_op) { - first_op = 0; - } else { - fmt::fprintf(fp, ","); - } - fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), final_stats.getLatencySampleCount(op)); - } - } - } - fmt::print("\n"); + for (auto i = 0; i < args.num_processes; i++) { - /* Min Latency */ - if (fp) { - fmt::fprintf(fp, "}, \"minLatency\": {"); - } - putTitle("Min"); - first_op = 1; - for (auto op = 0; op < MAX_OP; op++) { - if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { - const auto lat_min = final_stats.getLatencyUsMin(op); - if (lat_min == -1) { - putField("N/A"); - } else { - putField(lat_min); - if (fp) { - if (first_op) { - first_op = 0; - } else { - fmt::fprintf(fp, ","); - } - fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), lat_min); - } - } - } - } - fmt::print("\n"); - - /* Avg Latency */ - if (fp) { - fmt::fprintf(fp, "}, \"avgLatency\": {"); - } - putTitle("Avg"); - first_op = 1; - for (auto op = 0; op < MAX_OP; op++) { - if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { - const auto lat_total = final_stats.getLatencyUsTotal(op); - const auto lat_samples = final_stats.getLatencySampleCount(op); - if (lat_total) { - putField(lat_total / lat_samples); - if (fp) { - if (first_op) { - first_op = 0; - } else { - fmt::fprintf(fp, ","); - } - fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), lat_total / lat_samples); + if (args.async_xacts == 0) { + for (auto j = 0; j < args.num_threads; j++) { + loadSample(pid_main, op, data_points, i, j); } } else { - putField("N/A"); + // async mode uses only one file per process + loadSample(pid_main, op, data_points, i, 0); } } } - fmt::printf("\n"); + final_stats.updateLatencies(data_points); - /* Max Latency */ - if (fp) { - fmt::fprintf(fp, "}, \"maxLatency\": {"); - } - putTitle("Max"); - first_op = 1; - for (auto op = 0; op < MAX_OP; op++) { - if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { - const auto lat_max = final_stats.getLatencyUsMax(op); - if (lat_max == 0) { - putField("N/A"); - } else { - putField(lat_max); - if (fp) { - if (first_op) { - first_op = 0; - } else { - fmt::fprintf(fp, ","); - } - fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), final_stats.getLatencyUsMax(op)); - } - } - } - } - fmt::print("\n"); + printThreadStats(final_stats, args, fp); - auto data_points = std::array, MAX_OP>{}; - - /* Median Latency */ - if (fp) { - fmt::fprintf(fp, "}, \"medianLatency\": {"); - } - putTitle("Median"); - first_op = 1; - for (auto op = 0; op < MAX_OP; op++) { - if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { - const auto lat_total = final_stats.getLatencyUsTotal(op); - const auto lat_samples = final_stats.getLatencySampleCount(op); - data_points[op].reserve(lat_samples); - if (lat_total && lat_samples) { - for (auto i = 0; i < args.num_processes; i++) { - auto load_sample = [pid_main, op, &data_points](int process_id, int thread_id) { - const auto dirname = fmt::format("{}{}", TEMP_DATA_STORE, pid_main); - const auto filename = getStatsFilename(dirname, process_id, thread_id, op); - auto fp = fopen(filename.c_str(), "r"); - if (!fp) { - logr.error("fopen({}): {}", filename, strerror(errno)); - return; - } - auto fclose_guard = ExitGuard([fp]() { fclose(fp); }); - fseek(fp, 0, SEEK_END); - const auto num_points = ftell(fp) / sizeof(uint64_t); - fseek(fp, 0, 0); - for (auto index = 0u; index < num_points; index++) { - auto value = uint64_t{}; - auto nread = fread(&value, sizeof(uint64_t), 1, fp); - if (nread != 1) { - logr.error("Read sample returned {}", nread); - break; - } - data_points[op].push_back(value); - } - }; - if (args.async_xacts == 0) { - for (auto j = 0; j < args.num_threads; j++) { - load_sample(i, j); - } - } else { - // async mode uses only one file per process - load_sample(i, 0); - } - } - std::sort(data_points[op].begin(), data_points[op].end()); - const auto num_points = data_points[op].size(); - auto median = uint64_t{}; - if (num_points & 1) { - median = data_points[op][num_points / 2]; - } else { - median = (data_points[op][num_points / 2] + data_points[op][num_points / 2 - 1]) >> 1; - } - putField(median); - if (fp) { - if (first_op) { - first_op = 0; - } else { - fmt::fprintf(fp, ","); - } - fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), median); - } - } else { - putField("N/A"); - } - } - } - fmt::print("\n"); - - /* 95%ile Latency */ - if (fp) { - fmt::fprintf(fp, "}, \"p95Latency\": {"); - } - putTitle("95.0 pctile"); - first_op = 1; - for (auto op = 0; op < MAX_OP; op++) { - if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { - if (data_points[op].empty() || !final_stats.getLatencyUsTotal(op)) { - putField("N/A"); - continue; - } - const auto num_points = data_points[op].size(); - const auto point_95pct = static_cast(std::max(0., (num_points * 0.95) - 1)); - putField(data_points[op][point_95pct]); - if (fp) { - if (first_op) { - first_op = 0; - } else { - fmt::fprintf(fp, ","); - } - fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), data_points[op][point_95pct]); - } - } - } - fmt::printf("\n"); - - /* 99%ile Latency */ - if (fp) { - fmt::fprintf(fp, "}, \"p99Latency\": {"); - } - putTitle("99.0 pctile"); - first_op = 1; - for (auto op = 0; op < MAX_OP; op++) { - if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { - if (data_points[op].empty() || !final_stats.getLatencyUsTotal(op)) { - putField("N/A"); - continue; - } - const auto num_points = data_points[op].size(); - const auto point_99pct = static_cast(std::max(0., (num_points * 0.99) - 1)); - putField(data_points[op][point_99pct]); - if (fp) { - if (first_op) { - first_op = 0; - } else { - fmt::fprintf(fp, ","); - } - fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), data_points[op][point_99pct]); - } - } - } - fmt::print("\n"); - - /* 99.9%ile Latency */ - if (fp) { - fmt::fprintf(fp, "}, \"p99.9Latency\": {"); - } - putTitle("99.9 pctile"); - first_op = 1; - for (auto op = 0; op < MAX_OP; op++) { - if (args.txnspec.ops[op][OP_COUNT] > 0 || isAbstractOp(op)) { - if (data_points[op].empty() || !final_stats.getLatencyUsTotal(op)) { - putField("N/A"); - continue; - } - const auto num_points = data_points[op].size(); - const auto point_99_9pct = static_cast(std::max(0., (num_points * 0.999) - 1)); - putField(data_points[op][point_99_9pct]); - if (fp) { - if (first_op) { - first_op = 0; - } else { - fmt::fprintf(fp, ","); - } - fmt::fprintf(fp, "\"%s\": %lu", getOpName(op), data_points[op][point_99_9pct]); - } - } - } - fmt::print("\n"); - if (fp) { - fmt::fprintf(fp, "}}"); + // export the ddsketch if the flag was set + if (args.stats_export_path[0] != 0) { + std::ofstream f(args.stats_export_path); + f << final_stats; } const auto command_remove = fmt::format("rm -rf {}{}", TEMP_DATA_STORE, pid_main); @@ -1960,6 +1991,18 @@ int statsProcessMain(Arguments const& args, return 0; } +ThreadStatistics mergeSketchReport(Arguments& args) { + + ThreadStatistics stats; + for (int i = 0; i < args.num_report_files; i++) { + std::ifstream f{ args.report_files[i] }; + ThreadStatistics tmp; + f >> tmp; + stats.combine(tmp); + } + return stats; +} + int main(int argc, char* argv[]) { setlinebuf(stdout); @@ -1993,6 +2036,12 @@ int main(int argc, char* argv[]) { } } + if (args.mode == MODE_REPORT) { + ThreadStatistics stats = mergeSketchReport(args); + printThreadStats(stats, args, NULL, true); + return 0; + } + const auto pid_main = getpid(); /* create the shared memory for stats */ const auto shmpath = fmt::format("mako{}", pid_main); diff --git a/bindings/c/test/mako/mako.hpp b/bindings/c/test/mako/mako.hpp index 7c7a44b020..d2eb1708a7 100644 --- a/bindings/c/test/mako/mako.hpp +++ b/bindings/c/test/mako/mako.hpp @@ -44,6 +44,7 @@ constexpr const int MODE_INVALID = -1; constexpr const int MODE_CLEAN = 0; constexpr const int MODE_BUILD = 1; constexpr const int MODE_RUN = 2; +constexpr const int MODE_REPORT = 3; /* for long arguments */ enum ArgKind { @@ -73,6 +74,7 @@ enum ArgKind { ARG_CLIENT_THREADS_PER_VERSION, ARG_JSON_REPORT, ARG_BG_FILE_PATH, // if blob granule files are stored locally, mako will read and materialize them if this is set + ARG_EXPORT_PATH, ARG_DISTRIBUTED_TRACER_CLIENT }; @@ -119,6 +121,7 @@ constexpr const int NUM_CLUSTERS_MAX = 3; constexpr const int NUM_DATABASES_MAX = 10; constexpr const std::string_view KEY_PREFIX{ "mako" }; constexpr const std::string_view TEMP_DATA_STORE{ "/tmp/makoTemp" }; +constexpr const int MAX_REPORT_FILES = 200; /* benchmark parameters */ struct Arguments { @@ -162,6 +165,9 @@ struct Arguments { char json_output_path[PATH_MAX]; bool bg_materialize_files; char bg_file_path[PATH_MAX]; + char stats_export_path[PATH_MAX]; + char report_files[MAX_REPORT_FILES][PATH_MAX]; + int num_report_files; int distributed_tracer_client; }; diff --git a/bindings/c/test/mako/stats.hpp b/bindings/c/test/mako/stats.hpp index a718be3bb0..7a0e86e8c3 100644 --- a/bindings/c/test/mako/stats.hpp +++ b/bindings/c/test/mako/stats.hpp @@ -24,76 +24,66 @@ #include #include #include +#include +#include +#include #include #include +#include #include +#include "mako/mako.hpp" #include "operations.hpp" #include "time.hpp" +#include "ddsketch.hpp" +#include "contrib/rapidjson/rapidjson/document.h" +#include "contrib/rapidjson/rapidjson/rapidjson.h" +#include "contrib/rapidjson/rapidjson/stringbuffer.h" +#include "contrib/rapidjson/rapidjson/writer.h" +#include +#include +#include namespace mako { -/* rough cap on the number of samples to avoid OOM hindering benchmark */ -constexpr const size_t SAMPLE_CAP = 2000000; - -/* size of each block to get detailed latency for each operation */ -constexpr const size_t LAT_BLOCK_SIZE = 4093; - -/* hard cap on the number of sample blocks = 488 */ -constexpr const size_t MAX_LAT_BLOCKS = SAMPLE_CAP / LAT_BLOCK_SIZE; - -/* memory block allocated to each operation when collecting detailed latency */ -class LatencySampleBlock { - uint64_t samples[LAT_BLOCK_SIZE]{ - 0, - }; - uint64_t index{ 0 }; - +class DDSketchMako : public DDSketch { public: - LatencySampleBlock() noexcept = default; - bool full() const noexcept { return index >= LAT_BLOCK_SIZE; } - void put(timediff_t td) { - assert(!full()); - samples[index++] = toIntegerMicroseconds(td); - } - // return {data block, number of samples} - std::pair data() const noexcept { return { samples, index }; } -}; + void serialize(rapidjson::Writer& writer) const { + writer.StartObject(); + writer.String("errorGuarantee"); + writer.Double(errorGuarantee); + writer.String("minValue"); + writer.Uint64(minValue); + writer.String("maxValue"); + writer.Uint64(maxValue); + writer.String("populationSize"); + writer.Uint64(populationSize); + writer.String("zeroPopulationSize"); + writer.Uint64(zeroPopulationSize); + writer.String("sum"); + writer.Uint64(sum); -/* collect sampled latencies until OOM is hit */ -class LatencySampleBin { - std::list blocks; - bool noMoreAlloc{ false }; - - bool tryAlloc() { - try { - blocks.emplace_back(); - } catch (const std::bad_alloc&) { - noMoreAlloc = true; - return false; + writer.String("buckets"); + writer.StartArray(); + for (auto b : buckets) { + writer.Uint64(b); } - return true; - } + writer.EndArray(); -public: - void reserveOneBlock() { - if (blocks.empty()) - tryAlloc(); + writer.EndObject(); } + void deserialize(const rapidjson::Value& obj) { + errorGuarantee = obj["errorGuarantee"].GetDouble(); + minValue = obj["minValue"].GetUint64(); + maxValue = obj["maxValue"].GetUint64(); + populationSize = obj["populationSize"].GetUint64(); + zeroPopulationSize = obj["zeroPopulationSize"].GetUint64(); + sum = obj["sum"].GetUint64(); - void put(timediff_t td) { - if (blocks.empty() || blocks.back().full()) { - if (blocks.size() >= MAX_LAT_BLOCKS || noMoreAlloc || !tryAlloc()) - return; - } - blocks.back().put(td); - } - - // iterate & apply for each block user function void(uint64_t const*, size_t) - template - void forEachBlock(Func&& fn) const { - for (const auto& block : blocks) { - auto [ptr, cnt] = block.data(); - fn(ptr, cnt); + auto jsonBuckets = obj["buckets"].GetArray(); + uint64_t idx = 0; + for (auto it = jsonBuckets.Begin(); it != jsonBuckets.End(); it++) { + buckets[idx] = it->GetUint64(); + idx++; } } }; @@ -101,21 +91,20 @@ public: class alignas(64) ThreadStatistics { uint64_t conflicts; uint64_t total_errors; - uint64_t ops[MAX_OP]; - uint64_t errors[MAX_OP]; - uint64_t latency_samples[MAX_OP]; - uint64_t latency_us_total[MAX_OP]; - uint64_t latency_us_min[MAX_OP]; - uint64_t latency_us_max[MAX_OP]; + std::array ops; + std::array errors; + std::array latency_samples; + std::array latency_us_total; + std::vector sketches; public: ThreadStatistics() noexcept { memset(this, 0, sizeof(ThreadStatistics)); - memset(latency_us_min, 0xff, sizeof(latency_us_min)); + sketches.resize(MAX_OP); } - ThreadStatistics(const ThreadStatistics& other) noexcept = default; - ThreadStatistics& operator=(const ThreadStatistics& other) noexcept = default; + ThreadStatistics(const ThreadStatistics& other) = default; + ThreadStatistics& operator=(const ThreadStatistics& other) = default; uint64_t getConflictCount() const noexcept { return conflicts; } @@ -129,23 +118,24 @@ public: uint64_t getLatencyUsTotal(int op) const noexcept { return latency_us_total[op]; } - uint64_t getLatencyUsMin(int op) const noexcept { return latency_us_min[op]; } + uint64_t getLatencyUsMin(int op) const noexcept { return sketches[op].min(); } - uint64_t getLatencyUsMax(int op) const noexcept { return latency_us_max[op]; } + uint64_t getLatencyUsMax(int op) const noexcept { return sketches[op].max(); } + + uint64_t percentile(int op, double quantile) { return sketches[op].percentile(quantile); } + + uint64_t mean(int op) const noexcept { return sketches[op].mean(); } // with 'this' as final aggregation, factor in 'other' void combine(const ThreadStatistics& other) { conflicts += other.conflicts; for (auto op = 0; op < MAX_OP; op++) { + sketches[op].mergeWith(other.sketches[op]); ops[op] += other.ops[op]; errors[op] += other.errors[op]; total_errors += other.errors[op]; latency_samples[op] += other.latency_samples[op]; latency_us_total[op] += other.latency_us_total[op]; - if (latency_us_min[op] > other.latency_us_min[op]) - latency_us_min[op] = other.latency_us_min[op]; - if (latency_us_max[op] < other.latency_us_max[op]) - latency_us_max[op] = other.latency_us_max[op]; } } @@ -162,15 +152,106 @@ public: void addLatency(int op, timediff_t diff) noexcept { const auto latency_us = toIntegerMicroseconds(diff); latency_samples[op]++; + sketches[op].addSample(latency_us); latency_us_total[op] += latency_us; - if (latency_us_min[op] > latency_us) - latency_us_min[op] = latency_us; - if (latency_us_max[op] < latency_us) - latency_us_max[op] = latency_us; } + + void writeToFile(const std::string& filename, int op) const { + rapidjson::StringBuffer ss; + rapidjson::Writer writer(ss); + sketches[op].serialize(writer); + std::ofstream f(filename); + f << ss.GetString(); + } + + void updateLatencies(const std::vector other_sketches) { sketches = other_sketches; } + + friend std::ofstream& operator<<(std::ofstream& os, ThreadStatistics& stats); + friend std::ifstream& operator>>(std::ifstream& is, ThreadStatistics& stats); }; -using LatencySampleBinArray = std::array; +inline std::ofstream& operator<<(std::ofstream& os, ThreadStatistics& stats) { + rapidjson::StringBuffer ss; + rapidjson::Writer writer(ss); + writer.StartObject(); + writer.String("conflicts"); + writer.Uint64(stats.conflicts); + writer.String("total_errors"); + writer.Uint64(stats.total_errors); + + writer.String("ops"); + writer.StartArray(); + for (auto op = 0; op < MAX_OP; op++) { + writer.Uint64(stats.ops[op]); + } + writer.EndArray(); + + writer.String("errors"); + writer.StartArray(); + for (auto op = 0; op < MAX_OP; op++) { + writer.Uint64(stats.errors[op]); + } + writer.EndArray(); + + writer.String("latency_samples"); + writer.StartArray(); + for (auto op = 0; op < MAX_OP; op++) { + writer.Uint64(stats.latency_samples[op]); + } + writer.EndArray(); + + writer.String("latency_us_total"); + writer.StartArray(); + for (auto op = 0; op < MAX_OP; op++) { + writer.Uint64(stats.latency_us_total[op]); + } + writer.EndArray(); + + for (auto op = 0; op < MAX_OP; op++) { + if (stats.sketches[op].getPopulationSize() > 0) { + std::string op_name = getOpName(op); + writer.String(op_name.c_str()); + stats.sketches[op].serialize(writer); + } + } + writer.EndObject(); + os << ss.GetString(); + return os; +} + +inline void populateArray(std::array& arr, + rapidjson::GenericArray>>& json) { + uint64_t idx = 0; + for (auto it = json.Begin(); it != json.End(); it++) { + arr[idx] = it->GetUint64(); + idx++; + } +} + +inline std::ifstream& operator>>(std::ifstream& is, ThreadStatistics& stats) { + std::stringstream buffer; + buffer << is.rdbuf(); + rapidjson::Document doc; + doc.Parse(buffer.str().c_str()); + stats.conflicts = doc["conflicts"].GetUint64(); + stats.total_errors = doc["total_errors"].GetUint64(); + + auto jsonOps = doc["ops"].GetArray(); + auto jsonErrors = doc["errors"].GetArray(); + auto jsonLatencySamples = doc["latency_samples"].GetArray(); + auto jsonLatencyUsTotal = doc["latency_us_total"].GetArray(); + + populateArray(stats.ops, jsonOps); + populateArray(stats.errors, jsonErrors); + populateArray(stats.latency_samples, jsonLatencySamples); + populateArray(stats.latency_us_total, jsonLatencyUsTotal); + for (int op = 0; op < MAX_OP; op++) { + const std::string op_name = getOpName(op); + stats.sketches[op].deserialize(doc[op_name.c_str()]); + } + + return is; +} } // namespace mako From c97a1168cbfe090b8f4d1d121c6437ae888d9759 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Tue, 7 Jun 2022 08:51:10 -0700 Subject: [PATCH 36/49] Fix formatting issue --- bindings/c/test/mako/mako.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 74451ed9f8..2d3ff9669b 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -655,8 +655,8 @@ void workerThread(ThreadArgs& thread_args) { logr.error("populate failed"); } } else if (args.mode == MODE_RUN) { - auto rc = runWorkload( - database, args, thread_tps, throttle_factor, thread_iters, signal, stats, dotrace, dotagging); + auto rc = + runWorkload(database, args, thread_tps, throttle_factor, thread_iters, signal, stats, dotrace, dotagging); if (rc < 0) { logr.error("runWorkload failed"); } From 4f308b34fc990a542e24c8bcaaba81acda188ca6 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Mon, 6 Jun 2022 15:22:55 -0700 Subject: [PATCH 37/49] Fix an off-by-one error in determining whether to include the entire range in the conflict ranges when a reverse range read returns early due to limit. --- fdbclient/ReadYourWrites.actor.cpp | 6 ++- fdbserver/workloads/ConflictRange.actor.cpp | 43 +++++++++++++++------ 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index cd7d59c041..a64a65e58a 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -253,6 +253,8 @@ public: if (read.begin.getKey() < read.end.getKey()) { rangeBegin = read.begin.getKey(); + // If the end offset is 1 (first greater than / first greater or equal) or more, then no changes to the + // range after the returned results can change the outcome. rangeEnd = read.end.offset > 0 && result.more ? read.begin.getKey() : read.end.getKey(); } else { rangeBegin = read.end.getKey(); @@ -289,7 +291,9 @@ public: bool endInArena = false; if (read.begin.getKey() < read.end.getKey()) { - rangeBegin = read.begin.offset <= 0 && result.more ? read.end.getKey() : read.begin.getKey(); + // If the begin offset is 1 (first greater than / first greater or equal) or less, then no changes to the + // range prior to the returned results can change the outcome. + rangeBegin = read.begin.offset <= 1 && result.more ? read.end.getKey() : read.begin.getKey(); rangeEnd = read.end.getKey(); } else { rangeBegin = read.end.getKey(); diff --git a/fdbserver/workloads/ConflictRange.actor.cpp b/fdbserver/workloads/ConflictRange.actor.cpp index 8509482a8f..c95ad02320 100644 --- a/fdbserver/workloads/ConflictRange.actor.cpp +++ b/fdbserver/workloads/ConflictRange.actor.cpp @@ -80,6 +80,7 @@ struct ConflictRangeWorkload : TestWorkload { state int offsetA; state int offsetB; state int randomLimit; + state Reverse reverse = Reverse::False; state bool randomSets = false; state std::set insertedSet; state RangeResult originalResults; @@ -159,10 +160,13 @@ struct ConflictRangeWorkload : TestWorkload { offsetA = deterministicRandom()->randomInt(-1 * self->maxOffset, self->maxOffset); offsetB = deterministicRandom()->randomInt(-1 * self->maxOffset, self->maxOffset); randomLimit = deterministicRandom()->randomInt(1, self->maxKeySpace); + reverse.set(deterministicRandom()->coinflip()); RangeResult res = wait(tr1.getRange(KeySelectorRef(StringRef(myKeyA), onEqualA, offsetA), KeySelectorRef(StringRef(myKeyB), onEqualB, offsetB), - randomLimit)); + randomLimit, + Snapshot::False, + reverse)); if (res.size()) { originalResults = res; break; @@ -225,13 +229,17 @@ struct ConflictRangeWorkload : TestWorkload { StringRef(format("%010d", clearedEnd)))); RangeResult res = wait(trRYOW.getRange(KeySelectorRef(StringRef(myKeyA), onEqualA, offsetA), KeySelectorRef(StringRef(myKeyB), onEqualB, offsetB), - randomLimit)); + randomLimit, + Snapshot::False, + reverse)); wait(trRYOW.commit()); } else { tr3.clear(StringRef(format("%010d", self->maxKeySpace + 1))); RangeResult res = wait(tr3.getRange(KeySelectorRef(StringRef(myKeyA), onEqualA, offsetA), KeySelectorRef(StringRef(myKeyB), onEqualB, offsetB), - randomLimit)); + randomLimit, + Snapshot::False, + reverse)); wait(tr3.commit()); } } catch (Error& e) { @@ -252,7 +260,9 @@ struct ConflictRangeWorkload : TestWorkload { RangeResult res = wait(tr4.getRange(KeySelectorRef(StringRef(myKeyA), onEqualA, offsetA), KeySelectorRef(StringRef(myKeyB), onEqualB, offsetB), - randomLimit)); + randomLimit, + Snapshot::False, + reverse)); ++self->withConflicts; if (res.size() == originalResults.size()) { @@ -261,20 +271,27 @@ struct ConflictRangeWorkload : TestWorkload { throw not_committed(); // Discard known cases where conflicts do not change the results - if (originalResults.size() == randomLimit && offsetB <= 0) { - // Hit limit but end offset goes backwards, so changes could effect results even though in - // this instance they did not + if (originalResults.size() == randomLimit && + ((offsetB <= 0 && !reverse) || (offsetA > 1 && reverse))) { + // Hit limit but end offset goes into the range, so changes could effect results even though + // in this instance they did not throw not_committed(); } - if (originalResults[originalResults.size() - 1].key >= sentinelKey) { + KeyRef smallestResult = originalResults[0].key; + KeyRef largestResult = originalResults[originalResults.size() - 1].key; + if (reverse) { + std::swap(smallestResult, largestResult); + } + + if (largestResult >= sentinelKey) { // Results go into server keyspace, so if a key selector does not fully resolve offset, a // change won't effect results throw not_committed(); } - if ((originalResults[0].key == firstElement || - originalResults[0].key == StringRef(format("%010d", *(insertedSet.begin())))) && + if ((smallestResult == firstElement || + smallestResult == StringRef(format("%010d", *(insertedSet.begin())))) && offsetA < 0) { // Results return the first element, and the begin offset is negative, so if a key selector // does not fully resolve the offset, a change won't effect results @@ -308,6 +325,7 @@ struct ConflictRangeWorkload : TestWorkload { .detail("OffsetA", offsetA) .detail("OffsetB", offsetB) .detail("RandomLimit", randomLimit) + .detail("Reverse", reverse) .detail("Size", originalResults.size()) .detail("Results", keyStr1) .detail("Original", keyStr2); @@ -328,7 +346,9 @@ struct ConflictRangeWorkload : TestWorkload { // If the commit is successful, check that the result matches the first execution. RangeResult res = wait(tr4.getRange(KeySelectorRef(StringRef(myKeyA), onEqualA, offsetA), KeySelectorRef(StringRef(myKeyB), onEqualB, offsetB), - randomLimit)); + randomLimit, + Snapshot::False, + reverse)); ++self->withoutConflicts; if (res.size() == originalResults.size()) { @@ -366,6 +386,7 @@ struct ConflictRangeWorkload : TestWorkload { .detail("OffsetA", offsetA) .detail("OffsetB", offsetB) .detail("RandomLimit", randomLimit) + .detail("Reverse", reverse) .detail("Size", originalResults.size()) .detail("Results", keyStr1) .detail("Original", keyStr2); From ae865027d6d7bb537cd6433c093ae9f1855c2cd4 Mon Sep 17 00:00:00 2001 From: Josh Slocum Date: Tue, 7 Jun 2022 13:50:11 -0500 Subject: [PATCH 38/49] fixes for blob metadata memory from valgrind --- fdbserver/BlobManager.actor.cpp | 1 + fdbserver/BlobWorker.actor.cpp | 1 + fdbserver/workloads/BlobGranuleCorrectnessWorkload.actor.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/fdbserver/BlobManager.actor.cpp b/fdbserver/BlobManager.actor.cpp index 24e341ac71..bec6ef2e89 100644 --- a/fdbserver/BlobManager.actor.cpp +++ b/fdbserver/BlobManager.actor.cpp @@ -2298,6 +2298,7 @@ ACTOR Future> getBStoreForGranule(Reference data = self->tenantData.getDataForGranule(granuleRange); if (data.isValid()) { wait(data->bstoreLoaded.getFuture()); + wait(delay(0)); return data->bstore; } else { // race on startup between loading tenant ranges and bgcc/purging. just wait diff --git a/fdbserver/BlobWorker.actor.cpp b/fdbserver/BlobWorker.actor.cpp index f5f915bd17..527768b32f 100644 --- a/fdbserver/BlobWorker.actor.cpp +++ b/fdbserver/BlobWorker.actor.cpp @@ -2748,6 +2748,7 @@ ACTOR Future> loadBStoreForTenant(Reference data = bwData->tenantData.getDataForGranule(keyRange); if (data.isValid()) { wait(data->bstoreLoaded.getFuture()); + wait(delay(0)); return data->bstore; } else { TEST(true); // bstore for unknown tenant diff --git a/fdbserver/workloads/BlobGranuleCorrectnessWorkload.actor.cpp b/fdbserver/workloads/BlobGranuleCorrectnessWorkload.actor.cpp index d07f948277..a9de181479 100644 --- a/fdbserver/workloads/BlobGranuleCorrectnessWorkload.actor.cpp +++ b/fdbserver/workloads/BlobGranuleCorrectnessWorkload.actor.cpp @@ -265,6 +265,7 @@ struct BlobGranuleCorrectnessWorkload : TestWorkload { state Reference data = tenantData.getDataForGranule(self->directories[directoryIdx]->directoryRange); wait(data->bstoreLoaded.getFuture()); + wait(delay(0)); self->directories[directoryIdx]->bstore = data->bstore; } From 5a4082bfe8693a3c008b63f0e58fe16609733fe2 Mon Sep 17 00:00:00 2001 From: Ray Jenkins Date: Tue, 7 Jun 2022 14:07:08 -0500 Subject: [PATCH 39/49] Add DistributedTracerClient enum to mako. (#7297) --- bindings/c/test/mako/mako.cpp | 10 +++++----- bindings/c/test/mako/mako.hpp | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/bindings/c/test/mako/mako.cpp b/bindings/c/test/mako/mako.cpp index 2d3ff9669b..d709491273 100644 --- a/bindings/c/test/mako/mako.cpp +++ b/bindings/c/test/mako/mako.cpp @@ -678,10 +678,10 @@ int workerProcessMain(Arguments const& args, int worker_id, shared_memory::Acces /* enable distributed tracing */ switch (args.distributed_tracer_client) { - case 1: + case DistributedTracerClient::NETWORK_LOSSY: err = network::setOptionNothrow(FDB_NET_OPTION_DISTRIBUTED_CLIENT_TRACER, BytesRef(toBytePtr("network_lossy"))); break; - case 2: + case DistributedTracerClient::LOG_FILE: err = network::setOptionNothrow(FDB_NET_OPTION_DISTRIBUTED_CLIENT_TRACER, BytesRef(toBytePtr("log_file"))); break; } @@ -1344,11 +1344,11 @@ int parseArguments(int argc, char* argv[], Arguments& args) { break; case ARG_DISTRIBUTED_TRACER_CLIENT: if (strcmp(optarg, "disabled") == 0) { - args.distributed_tracer_client = 0; + args.distributed_tracer_client = DistributedTracerClient::DISABLED; } else if (strcmp(optarg, "network_lossy") == 0) { - args.distributed_tracer_client = 1; + args.distributed_tracer_client = DistributedTracerClient::NETWORK_LOSSY; } else if (strcmp(optarg, "log_file") == 0) { - args.distributed_tracer_client = 2; + args.distributed_tracer_client = DistributedTracerClient::LOG_FILE; } else { args.distributed_tracer_client = -1; } diff --git a/bindings/c/test/mako/mako.hpp b/bindings/c/test/mako/mako.hpp index 8670f72b57..5f67af5ae1 100644 --- a/bindings/c/test/mako/mako.hpp +++ b/bindings/c/test/mako/mako.hpp @@ -106,6 +106,8 @@ enum OpKind { enum TPSChangeTypes { TPS_SIN, TPS_SQUARE, TPS_PULSE }; +enum DistributedTracerClient { DISABLED, NETWORK_LOSSY, LOG_FILE }; + /* we set WorkloadSpec and Arguments only once in the master process, * and won't be touched by child processes. */ From 1f8fc32f41deb572618dd05f611986650b353b78 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 7 Jun 2022 13:22:56 -0700 Subject: [PATCH 40/49] Save a memcpy in the tlog peek path (#7328) --- fdbserver/LogRouter.actor.cpp | 4 +++- fdbserver/TLogServer.actor.cpp | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fdbserver/LogRouter.actor.cpp b/fdbserver/LogRouter.actor.cpp index bb9574fc1b..aa3e159be5 100644 --- a/fdbserver/LogRouter.actor.cpp +++ b/fdbserver/LogRouter.actor.cpp @@ -573,7 +573,9 @@ Future logRouterPeekMessages(PromiseType replyPromise, TLogPeekReply reply; reply.maxKnownVersion = self->version.get(); reply.minKnownCommittedVersion = self->poppedVersion; - reply.messages = StringRef(reply.arena, messages.toValue()); + auto messagesValue = messages.toValue(); + reply.arena.dependsOn(messagesValue.arena()); + reply.messages = messagesValue; reply.popped = self->minPopped.get() >= self->startVersion ? self->minPopped.get() : 0; reply.end = endVersion; reply.onlySpilled = false; diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 797e0fb3f9..b04d6247e5 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -2020,7 +2020,9 @@ Future tLogPeekMessages(PromiseType replyPromise, TLogPeekReply reply; reply.maxKnownVersion = logData->version.get(); reply.minKnownCommittedVersion = logData->minKnownCommittedVersion; - reply.messages = StringRef(reply.arena, messages.toValue()); + auto messagesValue = messages.toValue(); + reply.arena.dependsOn(messagesValue.arena()); + reply.messages = messagesValue; reply.end = endVersion; reply.onlySpilled = onlySpilled; From 1997e6057cb6e51c5c9c9f5a0daf315ed67f204a Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 7 Jun 2022 14:48:01 -0700 Subject: [PATCH 41/49] Fix a heap-use-after-free in a unit test (#7230) * Fix a heap-use-after-free in a unit test The data passed to IAsyncFile::write must remain valid until the future is ready. * Use holdWhile instead of a new state variable --- fdbserver/worker.actor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 27e355368b..175723de86 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -2706,8 +2706,8 @@ ACTOR Future updateNewestSoftwareVersion(std::string folder, 0600)); SWVersion swVersion(latestVersion, currentVersion, minCompatibleVersion); - auto s = swVersionValue(swVersion); - ErrorOr e = wait(errorOr(newVersionFile->write(s.toString().c_str(), s.size(), 0))); + Value s = swVersionValue(swVersion); + ErrorOr e = wait(holdWhile(s, errorOr(newVersionFile->write(s.begin(), s.size(), 0)))); if (e.isError()) { throw e.getError(); } From 217ba24b6f68bc6ae3bb20367433e3fa97ca0e4d Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 7 Jun 2022 16:39:46 -0700 Subject: [PATCH 42/49] Add rss_bytes to process memory and fix available_bytes calculation Since memory is now limited with RSS size, add RSS size in status json for reporting. Also change how available_bytes is calculated from: (available + virtual memory) * process_limit / machine_limit to: (available memory) * process_limit / machine_limit --- documentation/sphinx/source/mr-status-json-schemas.rst.inc | 3 ++- fdbserver/Status.actor.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/documentation/sphinx/source/mr-status-json-schemas.rst.inc b/documentation/sphinx/source/mr-status-json-schemas.rst.inc index af96e3e1f2..79cad80a8f 100644 --- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc +++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc @@ -260,7 +260,8 @@ "available_bytes":0, // an estimate of the process' fair share of the memory available to fdbservers "limit_bytes":0, // memory limit per process "unused_allocated_memory":0, - "used_bytes":0 // virtual memory size of the process + "used_bytes":0, // virtual memory size of the process + "rss_bytes":0 // resident memory size of the process }, "messages":[ { diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 1e807098a2..7870a735c8 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -979,6 +979,7 @@ ACTOR static Future processStatusFetcher( statusObj["network"] = networkObj; memoryObj.setKeyRawNumber("used_bytes", processMetrics.getValue("Memory")); + memoryObj.setKeyRawNumber("rss_bytes", processMetrics.getValue("ResidentMemory")); memoryObj.setKeyRawNumber("unused_allocated_memory", processMetrics.getValue("UnusedAllocatedMemory")); } @@ -1010,8 +1011,7 @@ ACTOR static Future processStatusFetcher( auto machineMemInfo = machineMemoryUsage[workerItr->interf.locality.machineId()]; if (machineMemInfo.valid() && memoryLimit > 0) { ASSERT(machineMemInfo.aggregateLimit > 0); - int64_t memory = - (availableMemory + machineMemInfo.memoryUsage) * memoryLimit / machineMemInfo.aggregateLimit; + int64_t memory = availableMemory * memoryLimit / machineMemInfo.aggregateLimit; memoryObj["available_bytes"] = std::min(std::max(memory, 0), memoryLimit); } } From bbf8cb4b0274a8888ff4c590438f087c382ff41d Mon Sep 17 00:00:00 2001 From: Yi Wu Date: Tue, 7 Jun 2022 21:00:13 -0700 Subject: [PATCH 43/49] GetEncryptCipherKeys helper function and misc encryption changes (#7252) Adding GetEncryptCipherKeys and GetLatestCipherKeys helper actors, which encapsulate cipher key fetch logic: getting cipher keys from local BlobCipherKeyCache, and on cache miss fetch from EKP (encrypt key proxy). These helper actors also handles the case if EKP get shutdown in the middle, they listen on ServerDBInfo to wait for new EKP start and send new request there instead. The PR also have other misc changes: * EKP is by default started in simulation regardless of. ENABLE_ENCRYPTION knob, so that in restart tests, if ENABLE_ENCRYPTION is switch from on to off after restart, encrypted data will still be able to be read. * API tweaks for BlobCipher * Adding a ENABLE_TLOG_ENCRYPTION knob which will be used in later PRs. The knob should normally be consistent with ENABLE_ENCRYPTION knob, but could be used to disable TLog encryption alone. This PR is split out from #6942. --- fdbclient/ServerKnobs.cpp | 15 +- fdbclient/ServerKnobs.h | 1 + fdbserver/CMakeLists.txt | 2 + fdbserver/ClusterController.actor.cpp | 16 +- fdbserver/GetEncryptCipherKeys.actor.cpp | 256 ++++++++++++++++++ fdbserver/GetEncryptCipherKeys.h | 58 ++++ fdbserver/Status.actor.cpp | 2 +- .../workloads/ConsistencyCheck.actor.cpp | 2 +- fdbserver/workloads/EncryptionOps.actor.cpp | 2 +- flow/BlobCipher.cpp | 79 +++--- flow/BlobCipher.h | 92 +++++-- flow/EncryptUtils.h | 3 +- 12 files changed, 446 insertions(+), 82 deletions(-) create mode 100644 fdbserver/GetEncryptCipherKeys.actor.cpp create mode 100644 fdbserver/GetEncryptCipherKeys.h diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index c80b77f648..0493295dc0 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -864,16 +864,17 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( LATENCY_METRICS_LOGGING_INTERVAL, 60.0 ); // Cluster recovery - init ( CLUSTER_RECOVERY_EVENT_NAME_PREFIX, "Master"); + init ( CLUSTER_RECOVERY_EVENT_NAME_PREFIX, "Master" ); - // encrypt key proxy - init( ENABLE_ENCRYPTION, false ); - init( ENCRYPTION_MODE, "AES-256-CTR"); - init( SIM_KMS_MAX_KEYS, 4096); - init( ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH, 100000); + // encrypt key proxy + init( ENABLE_ENCRYPTION, false ); if ( randomize && BUGGIFY ) { ENABLE_ENCRYPTION = deterministicRandom()->coinflip(); } + init( ENCRYPTION_MODE, "AES-256-CTR" ); + init( SIM_KMS_MAX_KEYS, 4096 ); + init( ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH, 100000 ); + init( ENABLE_TLOG_ENCRYPTION, ENABLE_ENCRYPTION ); if ( randomize && BUGGIFY) { ENABLE_TLOG_ENCRYPTION = (ENABLE_ENCRYPTION && !PROXY_USE_RESOLVER_PRIVATE_MUTATIONS && deterministicRandom()->coinflip()); } // KMS connector type - init( KMS_CONNECTOR_TYPE, "RESTKmsConnector"); + init( KMS_CONNECTOR_TYPE, "RESTKmsConnector" ); // Blob granlues init( BG_URL, isSimulated ? "file://fdbblob/" : "" ); // TODO: store in system key space or something, eventually diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index 1768e340b7..d8180eaf08 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -837,6 +837,7 @@ public: std::string ENCRYPTION_MODE; int SIM_KMS_MAX_KEYS; int ENCRYPT_PROXY_MAX_DBG_TRACE_LENGTH; + bool ENABLE_TLOG_ENCRYPTION; // Key Management Service (KMS) Connector std::string KMS_CONNECTOR_TYPE; diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index 754a372e07..34ac56673b 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -44,6 +44,8 @@ set(FDBSERVER_SRCS FDBExecHelper.actor.cpp FDBExecHelper.actor.h fdbserver.actor.cpp + GetEncryptCipherKeys.actor.cpp + GetEncryptCipherKeys.h GrvProxyServer.actor.cpp IConfigConsumer.cpp IConfigConsumer.h diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index dfe0b380a8..ef4a34e8e0 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -619,7 +619,7 @@ void checkBetterSingletons(ClusterControllerData* self) { } WorkerDetails newEKPWorker; - if (SERVER_KNOBS->ENABLE_ENCRYPTION) { + if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) { newEKPWorker = findNewProcessForSingleton(self, ProcessClass::EncryptKeyProxy, id_used); } @@ -633,7 +633,7 @@ void checkBetterSingletons(ClusterControllerData* self) { } ProcessClass::Fitness bestFitnessForEKP; - if (SERVER_KNOBS->ENABLE_ENCRYPTION) { + if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) { bestFitnessForEKP = findBestFitnessForSingleton(self, newEKPWorker, ProcessClass::EncryptKeyProxy); } @@ -658,7 +658,7 @@ void checkBetterSingletons(ClusterControllerData* self) { } bool ekpHealthy = true; - if (SERVER_KNOBS->ENABLE_ENCRYPTION) { + if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) { ekpHealthy = isHealthySingleton( self, newEKPWorker, ekpSingleton, bestFitnessForEKP, self->recruitingEncryptKeyProxyID); } @@ -682,7 +682,7 @@ void checkBetterSingletons(ClusterControllerData* self) { } Optional> currEKPProcessId, newEKPProcessId; - if (SERVER_KNOBS->ENABLE_ENCRYPTION) { + if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) { currEKPProcessId = ekpSingleton.interface.get().locality.processId(); newEKPProcessId = newEKPWorker.interf.locality.processId(); } @@ -694,7 +694,7 @@ void checkBetterSingletons(ClusterControllerData* self) { newPids.emplace_back(newBMProcessId); } - if (SERVER_KNOBS->ENABLE_ENCRYPTION) { + if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) { currPids.emplace_back(currEKPProcessId); newPids.emplace_back(newEKPProcessId); } @@ -709,7 +709,7 @@ void checkBetterSingletons(ClusterControllerData* self) { } // if the knob is disabled, the EKP coloc counts should have no affect on the coloc counts check below - if (!SERVER_KNOBS->ENABLE_ENCRYPTION) { + if (!SERVER_KNOBS->ENABLE_ENCRYPTION && !g_network->isSimulated()) { ASSERT(currColocMap[currEKPProcessId] == 0); ASSERT(newColocMap[newEKPProcessId] == 0); } @@ -1266,7 +1266,7 @@ ACTOR Future registerWorker(RegisterWorkerRequest req, self, w, currSingleton, registeringSingleton, self->recruitingBlobManagerID); } - if (SERVER_KNOBS->ENABLE_ENCRYPTION && req.encryptKeyProxyInterf.present()) { + if ((SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) && req.encryptKeyProxyInterf.present()) { auto currSingleton = EncryptKeyProxySingleton(self->db.serverInfo->get().encryptKeyProxy); auto registeringSingleton = EncryptKeyProxySingleton(req.encryptKeyProxyInterf); haltRegisteringOrCurrentSingleton( @@ -2519,7 +2519,7 @@ ACTOR Future clusterControllerCore(ClusterControllerFullInterface interf, state Future> error = errorOr(actorCollection(self.addActor.getFuture())); // EncryptKeyProxy is necessary for TLog recovery, recruit it as the first process - if (SERVER_KNOBS->ENABLE_ENCRYPTION) { + if (SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) { self.addActor.send(monitorEncryptKeyProxy(&self)); } self.addActor.send(clusterWatchDatabase(&self, &self.db, coordinators, leaderFail)); // Start the master database diff --git a/fdbserver/GetEncryptCipherKeys.actor.cpp b/fdbserver/GetEncryptCipherKeys.actor.cpp new file mode 100644 index 0000000000..83fac1e28e --- /dev/null +++ b/fdbserver/GetEncryptCipherKeys.actor.cpp @@ -0,0 +1,256 @@ +/* + * GetCipherKeys.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbserver/GetEncryptCipherKeys.h" + +#include + +namespace { + +Optional getEncryptKeyProxyId(const Reference const>& db) { + return db->get().encryptKeyProxy.map([](EncryptKeyProxyInterface proxy) { return proxy.id(); }); +} + +ACTOR Future onEncryptKeyProxyChange(Reference const> db) { + state Optional previousProxyId = getEncryptKeyProxyId(db); + state Optional currentProxyId; + loop { + wait(db->onChange()); + currentProxyId = getEncryptKeyProxyId(db); + if (currentProxyId != previousProxyId) { + break; + } + } + TraceEvent("GetCipherKeys_EncryptKeyProxyChanged") + .detail("PreviousProxyId", previousProxyId.orDefault(UID())) + .detail("CurrentProxyId", currentProxyId.orDefault(UID())); + return Void(); +} + +ACTOR Future getUncachedLatestEncryptCipherKeys( + Reference const> db, + EKPGetLatestBaseCipherKeysRequest request) { + Optional proxy = db->get().encryptKeyProxy; + if (!proxy.present()) { + // Wait for onEncryptKeyProxyChange. + TraceEvent("GetLatestCipherKeys_EncryptKeyProxyNotPresent"); + return Never(); + } + request.reply.reset(); + try { + EKPGetLatestBaseCipherKeysReply reply = wait(proxy.get().getLatestBaseCipherKeys.getReply(request)); + if (reply.error.present()) { + TraceEvent("GetLatestCipherKeys_RequestFailed").error(reply.error.get()); + throw encrypt_keys_fetch_failed(); + } + return reply; + } catch (Error& e) { + TraceEvent("GetLatestCipherKeys_CaughtError").error(e); + if (e.code() == error_code_broken_promise) { + // Wait for onEncryptKeyProxyChange. + return Never(); + } + throw; + } +} + +} // anonymous namespace + +ACTOR Future>> getLatestEncryptCipherKeys( + Reference const> db, + std::unordered_map domains) { + state Reference cipherKeyCache = BlobCipherKeyCache::getInstance(); + state std::unordered_map> cipherKeys; + state EKPGetLatestBaseCipherKeysRequest request; + + if (!db.isValid()) { + TraceEvent(SevError, "GetLatestCipherKeys_ServerDBInfoNotAvailable"); + throw encrypt_ops_error(); + } + + // Collect cached cipher keys. + for (auto& domain : domains) { + Reference cachedCipherKey = cipherKeyCache->getLatestCipherKey(domain.first /*domainId*/); + if (cachedCipherKey.isValid()) { + cipherKeys[domain.first] = cachedCipherKey; + } else { + request.encryptDomainInfos.emplace_back( + domain.first /*domainId*/, domain.second /*domainName*/, request.arena); + } + } + if (request.encryptDomainInfos.empty()) { + return cipherKeys; + } + // Fetch any uncached cipher keys. + loop choose { + when(EKPGetLatestBaseCipherKeysReply reply = wait(getUncachedLatestEncryptCipherKeys(db, request))) { + // Insert base cipher keys into cache and construct result. + for (const EKPBaseCipherDetails& details : reply.baseCipherDetails) { + EncryptCipherDomainId domainId = details.encryptDomainId; + if (domains.count(domainId) > 0 && cipherKeys.count(domainId) == 0) { + Reference cipherKey = cipherKeyCache->insertCipherKey( + domainId, details.baseCipherId, details.baseCipherKey.begin(), details.baseCipherKey.size()); + ASSERT(cipherKey.isValid()); + cipherKeys[domainId] = cipherKey; + } + } + // Check for any missing cipher keys. + for (auto& domain : request.encryptDomainInfos) { + if (cipherKeys.count(domain.domainId) == 0) { + TraceEvent(SevWarn, "GetLatestCipherKeys_KeyMissing").detail("DomainId", domain.domainId); + throw encrypt_key_not_found(); + } + } + break; + } + // In case encryptKeyProxy has changed, retry the request. + when(wait(onEncryptKeyProxyChange(db))) {} + } + return cipherKeys; +} + +namespace { + +ACTOR Future getUncachedEncryptCipherKeys(Reference const> db, + EKPGetBaseCipherKeysByIdsRequest request) { + Optional proxy = db->get().encryptKeyProxy; + if (!proxy.present()) { + // Wait for onEncryptKeyProxyChange. + TraceEvent("GetCipherKeys_EncryptKeyProxyNotPresent"); + return Never(); + } + request.reply.reset(); + try { + EKPGetBaseCipherKeysByIdsReply reply = wait(proxy.get().getBaseCipherKeysByIds.getReply(request)); + if (reply.error.present()) { + TraceEvent(SevWarn, "GetCipherKeys_RequestFailed").error(reply.error.get()); + throw encrypt_keys_fetch_failed(); + } + return reply; + } catch (Error& e) { + TraceEvent("GetCipherKeys_CaughtError").error(e); + if (e.code() == error_code_broken_promise) { + // Wait for onEncryptKeyProxyChange. + return Never(); + } + throw; + } +} + +using BaseCipherIndex = std::pair; + +} // anonymous namespace + +ACTOR Future>> getEncryptCipherKeys( + Reference const> db, + std::unordered_set cipherDetails) { + state Reference cipherKeyCache = BlobCipherKeyCache::getInstance(); + state std::unordered_map> cipherKeys; + state std::unordered_set> uncachedBaseCipherIds; + state EKPGetBaseCipherKeysByIdsRequest request; + + if (!db.isValid()) { + TraceEvent(SevError, "GetCipherKeys_ServerDBInfoNotAvailable"); + throw encrypt_ops_error(); + } + + // Collect cached cipher keys. + for (const BlobCipherDetails& details : cipherDetails) { + Reference cachedCipherKey = + cipherKeyCache->getCipherKey(details.encryptDomainId, details.baseCipherId, details.salt); + if (cachedCipherKey.isValid()) { + cipherKeys.emplace(details, cachedCipherKey); + } else { + uncachedBaseCipherIds.insert(std::make_pair(details.encryptDomainId, details.baseCipherId)); + } + } + if (uncachedBaseCipherIds.empty()) { + return cipherKeys; + } + for (const BaseCipherIndex& id : uncachedBaseCipherIds) { + request.baseCipherInfos.emplace_back( + id.first /*domainId*/, id.second /*baseCipherId*/, StringRef() /*domainName*/, request.arena); + } + // Fetch any uncached cipher keys. + loop choose { + when(EKPGetBaseCipherKeysByIdsReply reply = wait(getUncachedEncryptCipherKeys(db, request))) { + std::unordered_map> baseCipherKeys; + for (const EKPBaseCipherDetails& baseDetails : reply.baseCipherDetails) { + BaseCipherIndex baseIdx = std::make_pair(baseDetails.encryptDomainId, baseDetails.baseCipherId); + baseCipherKeys[baseIdx] = baseDetails.baseCipherKey; + } + // Insert base cipher keys into cache and construct result. + for (const BlobCipherDetails& details : cipherDetails) { + if (cipherKeys.count(details) > 0) { + continue; + } + BaseCipherIndex baseIdx = std::make_pair(details.encryptDomainId, details.baseCipherId); + const auto& itr = baseCipherKeys.find(baseIdx); + if (itr == baseCipherKeys.end()) { + TraceEvent(SevError, "GetCipherKeys_KeyMissing") + .detail("DomainId", details.encryptDomainId) + .detail("BaseCipherId", details.baseCipherId); + throw encrypt_key_not_found(); + } + Reference cipherKey = cipherKeyCache->insertCipherKey(details.encryptDomainId, + details.baseCipherId, + itr->second.begin(), + itr->second.size(), + details.salt); + ASSERT(cipherKey.isValid()); + cipherKeys[details] = cipherKey; + } + break; + } + // In case encryptKeyProxy has changed, retry the request. + when(wait(onEncryptKeyProxyChange(db))) {} + } + return cipherKeys; +} + +ACTOR Future getLatestSystemEncryptCipherKeys(Reference const> db) { + static std::unordered_map domains = { + { SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME }, + { ENCRYPT_HEADER_DOMAIN_ID, FDB_DEFAULT_ENCRYPT_DOMAIN_NAME } + }; + std::unordered_map> cipherKeys = + wait(getLatestEncryptCipherKeys(db, domains)); + ASSERT(cipherKeys.count(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID) > 0); + ASSERT(cipherKeys.count(ENCRYPT_HEADER_DOMAIN_ID) > 0); + TextAndHeaderCipherKeys result{ cipherKeys.at(SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID), + cipherKeys.at(ENCRYPT_HEADER_DOMAIN_ID) }; + ASSERT(result.cipherTextKey.isValid()); + ASSERT(result.cipherHeaderKey.isValid()); + return result; +} + +ACTOR Future getEncryptCipherKeys(Reference const> db, + BlobCipherEncryptHeader header) { + std::unordered_set cipherDetails{ header.cipherTextDetails, header.cipherHeaderDetails }; + std::unordered_map> cipherKeys = + wait(getEncryptCipherKeys(db, cipherDetails)); + ASSERT(cipherKeys.count(header.cipherTextDetails) > 0); + ASSERT(cipherKeys.count(header.cipherHeaderDetails) > 0); + TextAndHeaderCipherKeys result{ cipherKeys.at(header.cipherTextDetails), + cipherKeys.at(header.cipherHeaderDetails) }; + ASSERT(result.cipherTextKey.isValid()); + ASSERT(result.cipherHeaderKey.isValid()); + return result; +} diff --git a/fdbserver/GetEncryptCipherKeys.h b/fdbserver/GetEncryptCipherKeys.h new file mode 100644 index 0000000000..6af7ccc71c --- /dev/null +++ b/fdbserver/GetEncryptCipherKeys.h @@ -0,0 +1,58 @@ +/* + * GetCipherKeys.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#ifndef FDBSERVER_GETCIPHERKEYS_H +#define FDBSERVER_GETCIPHERKEYS_H + +#include "fdbserver/ServerDBInfo.h" +#include "flow/BlobCipher.h" + +#include +#include + +// Get latest cipher keys for given encryption domains. It tries to get the cipher keys from local cache. +// In case of cache miss, it fetches the cipher keys from EncryptKeyProxy and put the result in the local cache +// before return. +Future>> getLatestEncryptCipherKeys( + const Reference const>& db, + const std::unordered_map& domains); + +// Get cipher keys specified by the list of cipher details. It tries to get the cipher keys from local cache. +// In case of cache miss, it fetches the cipher keys from EncryptKeyProxy and put the result in the local cache +// before return. +Future>> getEncryptCipherKeys( + const Reference const>& db, + const std::unordered_set& cipherDetails); + +struct TextAndHeaderCipherKeys { + Reference cipherTextKey; + Reference cipherHeaderKey; +}; + +// Helper method to get latest cipher text key and cipher header key for system domain, +// used for encrypting system data. +Future getLatestSystemEncryptCipherKeys(const Reference const>& db); + +// Helper method to get both text cipher key and header cipher key for the given encryption header, +// used for decrypting given encrypted data with encryption header. +Future getEncryptCipherKeys(const Reference const>& db, + const BlobCipherEncryptHeader& header); +#endif \ No newline at end of file diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 1e807098a2..f307f46db9 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -815,7 +815,7 @@ ACTOR static Future processStatusFetcher( roles.addRole("blob_manager", db->get().blobManager.get()); } - if (SERVER_KNOBS->ENABLE_ENCRYPTION && db->get().encryptKeyProxy.present()) { + if ((SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) && db->get().encryptKeyProxy.present()) { roles.addRole("encrypt_key_proxy", db->get().encryptKeyProxy.get()); } diff --git a/fdbserver/workloads/ConsistencyCheck.actor.cpp b/fdbserver/workloads/ConsistencyCheck.actor.cpp index f55d8f975b..4cf52624f1 100644 --- a/fdbserver/workloads/ConsistencyCheck.actor.cpp +++ b/fdbserver/workloads/ConsistencyCheck.actor.cpp @@ -2378,7 +2378,7 @@ struct ConsistencyCheckWorkload : TestWorkload { } // Check EncryptKeyProxy - if (SERVER_KNOBS->ENABLE_ENCRYPTION && db.encryptKeyProxy.present() && + if ((SERVER_KNOBS->ENABLE_ENCRYPTION || g_network->isSimulated()) && db.encryptKeyProxy.present() && (!nonExcludedWorkerProcessMap.count(db.encryptKeyProxy.get().address()) || nonExcludedWorkerProcessMap[db.encryptKeyProxy.get().address()].processClass.machineClassFitness( ProcessClass::EncryptKeyProxy) > fitnessLowerBound)) { diff --git a/fdbserver/workloads/EncryptionOps.actor.cpp b/fdbserver/workloads/EncryptionOps.actor.cpp index 6c47611f76..eb86ec12fc 100644 --- a/fdbserver/workloads/EncryptionOps.actor.cpp +++ b/fdbserver/workloads/EncryptionOps.actor.cpp @@ -280,7 +280,7 @@ struct EncryptionOpsWorkload : TestWorkload { ASSERT(cipherKey.isValid()); ASSERT(cipherKey->isEqual(orgCipherKey)); - DecryptBlobCipherAes256Ctr decryptor(cipherKey, headerCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(cipherKey, headerCipherKey, header.iv); const bool validateHeaderAuthToken = deterministicRandom()->randomInt(0, 100) < 65; auto start = std::chrono::high_resolution_clock::now(); diff --git a/flow/BlobCipher.cpp b/flow/BlobCipher.cpp index 1b7e8d19e7..4b36623f60 100644 --- a/flow/BlobCipher.cpp +++ b/flow/BlobCipher.cpp @@ -184,10 +184,10 @@ Reference BlobCipherKeyIdCache::insertBaseCipherKey(const Encrypt return cipherKey; } -void BlobCipherKeyIdCache::insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId, - const uint8_t* baseCipher, - int baseCipherLen, - const EncryptCipherRandomSalt& salt) { +Reference BlobCipherKeyIdCache::insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId, + const uint8_t* baseCipher, + int baseCipherLen, + const EncryptCipherRandomSalt& salt) { ASSERT_NE(baseCipherId, ENCRYPT_INVALID_CIPHER_KEY_ID); ASSERT_NE(salt, ENCRYPT_INVALID_RANDOM_SALT); @@ -201,7 +201,7 @@ void BlobCipherKeyIdCache::insertBaseCipherKey(const EncryptCipherBaseKeyId& bas .detail("BaseCipherKeyId", baseCipherId) .detail("DomainId", domainId); // Key is already present; nothing more to do. - return; + return itr->second; } else { TraceEvent("InsertBaseCipherKey_UpdateCipher") .detail("BaseCipherKeyId", baseCipherId) @@ -213,6 +213,7 @@ void BlobCipherKeyIdCache::insertBaseCipherKey(const EncryptCipherBaseKeyId& bas Reference cipherKey = makeReference(domainId, baseCipherId, baseCipher, baseCipherLen, salt); keyIdCache.emplace(cacheKey, cipherKey); + return cipherKey; } void BlobCipherKeyIdCache::cleanup() { @@ -263,27 +264,28 @@ Reference BlobCipherKeyCache::insertCipherKey(const EncryptCipher } } -void BlobCipherKeyCache::insertCipherKey(const EncryptCipherDomainId& domainId, - const EncryptCipherBaseKeyId& baseCipherId, - const uint8_t* baseCipher, - int baseCipherLen, - const EncryptCipherRandomSalt& salt) { +Reference BlobCipherKeyCache::insertCipherKey(const EncryptCipherDomainId& domainId, + const EncryptCipherBaseKeyId& baseCipherId, + const uint8_t* baseCipher, + int baseCipherLen, + const EncryptCipherRandomSalt& salt) { if (domainId == ENCRYPT_INVALID_DOMAIN_ID || baseCipherId == ENCRYPT_INVALID_CIPHER_KEY_ID || salt == ENCRYPT_INVALID_RANDOM_SALT) { throw encrypt_invalid_id(); } + Reference cipherKey; try { auto domainItr = domainCacheMap.find(domainId); if (domainItr == domainCacheMap.end()) { // Add mapping to track new encryption domain Reference keyIdCache = makeReference(domainId); - keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt); + cipherKey = keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt); domainCacheMap.emplace(domainId, keyIdCache); } else { // Track new baseCipher keys Reference keyIdCache = domainItr->second; - keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt); + cipherKey = keyIdCache->insertBaseCipherKey(baseCipherId, baseCipher, baseCipherLen, salt); } TraceEvent("InsertCipherKey") @@ -297,6 +299,8 @@ void BlobCipherKeyCache::insertCipherKey(const EncryptCipherDomainId& domainId, .detail("Salt", salt); throw; } + + return cipherKey; } Reference BlobCipherKeyCache::getLatestCipherKey(const EncryptCipherDomainId& domainId) { @@ -376,16 +380,27 @@ EncryptBlobCipherAes265Ctr::EncryptBlobCipherAes265Ctr(Reference : ctx(EVP_CIPHER_CTX_new()), textCipherKey(tCipherKey), headerCipherKey(hCipherKey), authTokenMode(mode) { ASSERT(isEncryptHeaderAuthTokenModeValid(mode)); ASSERT_EQ(ivLen, AES_256_IV_LENGTH); - memcpy(&iv[0], cipherIV, ivLen); + init(); +} +EncryptBlobCipherAes265Ctr::EncryptBlobCipherAes265Ctr(Reference tCipherKey, + Reference hCipherKey, + const EncryptAuthTokenMode mode) + : ctx(EVP_CIPHER_CTX_new()), textCipherKey(tCipherKey), headerCipherKey(hCipherKey), authTokenMode(mode) { + ASSERT(isEncryptHeaderAuthTokenModeValid(mode)); + generateRandomData(iv, AES_256_IV_LENGTH); + init(); +} + +void EncryptBlobCipherAes265Ctr::init() { if (ctx == nullptr) { throw encrypt_ops_error(); } if (EVP_EncryptInit_ex(ctx, EVP_aes_256_ctr(), nullptr, nullptr, nullptr) != 1) { throw encrypt_ops_error(); } - if (EVP_EncryptInit_ex(ctx, nullptr, nullptr, textCipherKey.getPtr()->data(), cipherIV) != 1) { + if (EVP_EncryptInit_ex(ctx, nullptr, nullptr, textCipherKey.getPtr()->data(), iv) != 1) { throw encrypt_ops_error(); } } @@ -439,7 +454,7 @@ Reference EncryptBlobCipherAes265Ctr::encrypt(const uint8_t* plainte header->cipherTextDetails.baseCipherId = textCipherKey->getBaseCipherId(); header->cipherTextDetails.encryptDomainId = textCipherKey->getDomainId(); header->cipherTextDetails.salt = textCipherKey->getSalt(); - memcpy(&header->cipherTextDetails.iv[0], &iv[0], AES_256_IV_LENGTH); + memcpy(&header->iv[0], &iv[0], AES_256_IV_LENGTH); if (authTokenMode == ENCRYPT_HEADER_AUTH_TOKEN_MODE_NONE) { // No header 'authToken' generation needed. @@ -887,8 +902,7 @@ TEST_CASE("flow/BlobCipher") { header.cipherTextDetails.baseCipherId, header.cipherTextDetails.salt); ASSERT(tCipherKeyKey->isEqual(cipherKey)); - DecryptBlobCipherAes256Ctr decryptor( - tCipherKeyKey, Reference(), &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, Reference(), &header.iv[0]); Reference decrypted = decryptor.decrypt(encrypted->begin(), bufLen, header, arena); ASSERT_EQ(decrypted->getLogicalSize(), bufLen); @@ -903,8 +917,7 @@ TEST_CASE("flow/BlobCipher") { headerCopy.flags.headerVersion += 1; try { encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena); - DecryptBlobCipherAes256Ctr decryptor( - tCipherKeyKey, Reference(), &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, Reference(), header.iv); decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena); ASSERT(false); // error expected } catch (Error& e) { @@ -920,8 +933,7 @@ TEST_CASE("flow/BlobCipher") { headerCopy.flags.encryptMode += 1; try { encrypted = encryptor.encrypt(&orgData[0], bufLen, &header, arena); - DecryptBlobCipherAes256Ctr decryptor( - tCipherKeyKey, Reference(), &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, Reference(), header.iv); decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena); ASSERT(false); // error expected } catch (Error& e) { @@ -937,8 +949,7 @@ TEST_CASE("flow/BlobCipher") { memcpy(encrypted->begin(), &temp[0], bufLen); int tIdx = deterministicRandom()->randomInt(0, bufLen - 1); temp[tIdx] += 1; - DecryptBlobCipherAes256Ctr decryptor( - tCipherKeyKey, Reference(), &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, Reference(), header.iv); decrypted = decryptor.decrypt(&temp[0], bufLen, header, arena); } catch (Error& e) { // No authToken, hence, no corruption detection supported @@ -978,7 +989,7 @@ TEST_CASE("flow/BlobCipher") { header.cipherHeaderDetails.baseCipherId, header.cipherHeaderDetails.salt); ASSERT(tCipherKeyKey->isEqual(cipherKey)); - DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, header.iv); Reference decrypted = decryptor.decrypt(encrypted->begin(), bufLen, header, arena); ASSERT_EQ(decrypted->getLogicalSize(), bufLen); @@ -993,7 +1004,7 @@ TEST_CASE("flow/BlobCipher") { sizeof(BlobCipherEncryptHeader)); headerCopy.flags.headerVersion += 1; try { - DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, header.iv); decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena); ASSERT(false); // error expected } catch (Error& e) { @@ -1009,7 +1020,7 @@ TEST_CASE("flow/BlobCipher") { sizeof(BlobCipherEncryptHeader)); headerCopy.flags.encryptMode += 1; try { - DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, header.iv); decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena); ASSERT(false); // error expected } catch (Error& e) { @@ -1026,7 +1037,7 @@ TEST_CASE("flow/BlobCipher") { int hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_SIZE - 1); headerCopy.singleAuthToken.authToken[hIdx] += 1; try { - DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, header.iv); decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena); ASSERT(false); // error expected } catch (Error& e) { @@ -1042,7 +1053,7 @@ TEST_CASE("flow/BlobCipher") { memcpy(encrypted->begin(), &temp[0], bufLen); int tIdx = deterministicRandom()->randomInt(0, bufLen - 1); temp[tIdx] += 1; - DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKeyKey, hCipherKey, header.iv); decrypted = decryptor.decrypt(&temp[0], bufLen, header, arena); } catch (Error& e) { if (e.code() != error_code_encrypt_header_authtoken_mismatch) { @@ -1084,7 +1095,7 @@ TEST_CASE("flow/BlobCipher") { header.cipherHeaderDetails.salt); ASSERT(tCipherKey->isEqual(cipherKey)); - DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv); Reference decrypted = decryptor.decrypt(encrypted->begin(), bufLen, header, arena); ASSERT_EQ(decrypted->getLogicalSize(), bufLen); @@ -1099,7 +1110,7 @@ TEST_CASE("flow/BlobCipher") { sizeof(BlobCipherEncryptHeader)); headerCopy.flags.headerVersion += 1; try { - DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv); decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena); ASSERT(false); // error expected } catch (Error& e) { @@ -1115,7 +1126,7 @@ TEST_CASE("flow/BlobCipher") { sizeof(BlobCipherEncryptHeader)); headerCopy.flags.encryptMode += 1; try { - DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv); decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena); ASSERT(false); // error expected } catch (Error& e) { @@ -1132,7 +1143,7 @@ TEST_CASE("flow/BlobCipher") { int hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_SIZE - 1); headerCopy.multiAuthTokens.cipherTextAuthToken[hIdx] += 1; try { - DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv); decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena); ASSERT(false); // error expected } catch (Error& e) { @@ -1149,7 +1160,7 @@ TEST_CASE("flow/BlobCipher") { hIdx = deterministicRandom()->randomInt(0, AUTH_TOKEN_SIZE - 1); headerCopy.multiAuthTokens.headerAuthToken[hIdx] += 1; try { - DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv); decrypted = decryptor.decrypt(encrypted->begin(), bufLen, headerCopy, arena); ASSERT(false); // error expected } catch (Error& e) { @@ -1164,7 +1175,7 @@ TEST_CASE("flow/BlobCipher") { memcpy(encrypted->begin(), &temp[0], bufLen); int tIdx = deterministicRandom()->randomInt(0, bufLen - 1); temp[tIdx] += 1; - DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, &header.cipherTextDetails.iv[0]); + DecryptBlobCipherAes256Ctr decryptor(tCipherKey, hCipherKey, header.iv); decrypted = decryptor.decrypt(&temp[0], bufLen, header, arena); } catch (Error& e) { if (e.code() != error_code_encrypt_header_authtoken_mismatch) { diff --git a/flow/BlobCipher.h b/flow/BlobCipher.h index e868f45aff..d11bfe7fc0 100644 --- a/flow/BlobCipher.h +++ b/flow/BlobCipher.h @@ -20,10 +20,12 @@ #pragma once #include "flow/network.h" +#include #include #include #include #include +#include #include "flow/Arena.h" #include "flow/EncryptUtils.h" @@ -67,12 +69,42 @@ public: } uint8_t* begin() { return buffer; } + StringRef toStringRef() { return StringRef(buffer, logicalSize); } + private: int allocSize; int logicalSize; uint8_t* buffer; }; +#pragma pack(push, 1) // exact fit - no padding +struct BlobCipherDetails { + // Encryption domain boundary identifier. + EncryptCipherDomainId encryptDomainId = ENCRYPT_INVALID_DOMAIN_ID; + // BaseCipher encryption key identifier + EncryptCipherBaseKeyId baseCipherId = ENCRYPT_INVALID_CIPHER_KEY_ID; + // Random salt + EncryptCipherRandomSalt salt{}; + + bool operator==(const BlobCipherDetails& o) const { + return encryptDomainId == o.encryptDomainId && baseCipherId == o.baseCipherId && salt == o.salt; + } +}; +#pragma pack(pop) + +namespace std { +template <> +struct hash { + std::size_t operator()(BlobCipherDetails const& details) const { + std::size_t seed = 0; + boost::hash_combine(seed, std::hash{}(details.encryptDomainId)); + boost::hash_combine(seed, std::hash{}(details.baseCipherId)); + boost::hash_combine(seed, std::hash{}(details.salt)); + return seed; + } +}; +} // namespace std + // BlobCipher Encryption header format // This header is persisted along with encrypted buffer, it contains information necessary // to assist decrypting the buffers to serve read requests. @@ -95,25 +127,11 @@ typedef struct BlobCipherEncryptHeader { }; // Cipher text encryption information - struct { - // Encryption domain boundary identifier. - EncryptCipherDomainId encryptDomainId{}; - // BaseCipher encryption key identifier - EncryptCipherBaseKeyId baseCipherId{}; - // Random salt - EncryptCipherRandomSalt salt{}; - // Initialization vector used to encrypt the payload. - uint8_t iv[AES_256_IV_LENGTH]; - } cipherTextDetails; - - struct { - // Encryption domainId for the header - EncryptCipherDomainId encryptDomainId{}; - // BaseCipher encryption key identifier. - EncryptCipherBaseKeyId baseCipherId{}; - // Random salt - EncryptCipherRandomSalt salt{}; - } cipherHeaderDetails; + BlobCipherDetails cipherTextDetails; + // Cipher header encryption information + BlobCipherDetails cipherHeaderDetails; + // Initialization vector used to encrypt the payload. + uint8_t iv[AES_256_IV_LENGTH]; // Encryption header is stored as plaintext on a persistent storage to assist reconstruction of cipher-key(s) for // reads. FIPS compliance recommendation is to leverage cryptographic digest mechanism to generate 'authentication @@ -144,6 +162,17 @@ typedef struct BlobCipherEncryptHeader { }; BlobCipherEncryptHeader() {} + + template + void serialize(Ar& ar) { + ar.serializeBytes(this, headerSize); + } + + std::string toString() const { + return format("domain id: %" PRId64 ", cipher id: %" PRIu64, + cipherTextDetails.encryptDomainId, + cipherTextDetails.baseCipherId); + } } BlobCipherEncryptHeader; #pragma pack(pop) @@ -276,10 +305,10 @@ public: // 'baseCipherId' & 'salt'. The caller needs to fetch 'baseCipherKey' detail and re-populate KeyCache. // Also, the invocation will NOT update the latest cipher-key details. - void insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId, - const uint8_t* baseCipher, - int baseCipherLen, - const EncryptCipherRandomSalt& salt); + Reference insertBaseCipherKey(const EncryptCipherBaseKeyId& baseCipherId, + const uint8_t* baseCipher, + int baseCipherLen, + const EncryptCipherRandomSalt& salt); // API cleanup the cache by dropping all cached cipherKeys void cleanup(); @@ -328,11 +357,11 @@ public: // 'baseCipherId' & 'salt'. The caller needs to fetch 'baseCipherKey' detail and re-populate KeyCache. // Also, the invocation will NOT update the latest cipher-key details. - void insertCipherKey(const EncryptCipherDomainId& domainId, - const EncryptCipherBaseKeyId& baseCipherId, - const uint8_t* baseCipher, - int baseCipherLen, - const EncryptCipherRandomSalt& salt); + Reference insertCipherKey(const EncryptCipherDomainId& domainId, + const EncryptCipherBaseKeyId& baseCipherId, + const uint8_t* baseCipher, + int baseCipherLen, + const EncryptCipherRandomSalt& salt); // API returns the last insert cipherKey for a given encryption domain Id. // If domain Id is invalid, it would throw 'encrypt_invalid_id' exception, @@ -389,6 +418,9 @@ public: const uint8_t* iv, const int ivLen, const EncryptAuthTokenMode mode); + EncryptBlobCipherAes265Ctr(Reference tCipherKey, + Reference hCipherKey, + const EncryptAuthTokenMode mode); ~EncryptBlobCipherAes265Ctr(); Reference encrypt(const uint8_t* plaintext, @@ -402,6 +434,8 @@ private: Reference headerCipherKey; EncryptAuthTokenMode authTokenMode; uint8_t iv[AES_256_IV_LENGTH]; + + void init(); }; // This interface enable data block decryption. An invocation to decrypt() would generate @@ -464,4 +498,4 @@ StringRef computeAuthToken(const uint8_t* payload, const int payloadLen, const uint8_t* key, const int keyLen, - Arena& arena); + Arena& arena); \ No newline at end of file diff --git a/flow/EncryptUtils.h b/flow/EncryptUtils.h index 9a6232a483..67c81aa0be 100644 --- a/flow/EncryptUtils.h +++ b/flow/EncryptUtils.h @@ -37,7 +37,8 @@ #define SYSTEM_KEYSPACE_ENCRYPT_DOMAIN_ID -1 #define ENCRYPT_HEADER_DOMAIN_ID -2 -#define FDB_DEFAULT_ENCRYPT_DOMAIN_NAME "FdbDefaultEncryptDomain" + +const std::string FDB_DEFAULT_ENCRYPT_DOMAIN_NAME = "FdbDefaultEncryptDomain"; using EncryptCipherDomainId = int64_t; using EncryptCipherDomainName = StringRef; From 21b041317cfc2ff832ecf27d12c44ec741192329 Mon Sep 17 00:00:00 2001 From: Marian Dvorsky Date: Wed, 8 Jun 2022 16:45:29 +0200 Subject: [PATCH 44/49] Address review comments --- .../TesterBlobGranuleCorrectnessWorkload.cpp | 4 +- .../TesterCancelTransactionWorkload.cpp | 2 +- .../apitester/TesterCorrectnessWorkload.cpp | 4 +- .../apitester/TesterTransactionExecutor.cpp | 2 +- bindings/c/test/apitester/TesterUtil.cpp | 41 +++++ bindings/c/test/apitester/TesterUtil.h | 13 ++ bindings/c/test/fdb_api.hpp | 158 +++++++----------- bindings/c/test/mako/operations.cpp | 12 +- 8 files changed, 122 insertions(+), 114 deletions(-) diff --git a/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp b/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp index 5174940401..52d8ddc651 100644 --- a/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp +++ b/bindings/c/test/apitester/TesterBlobGranuleCorrectnessWorkload.cpp @@ -114,7 +114,7 @@ private: fdb::Result res = ctx->tx().readBlobGranules( begin, end, 0 /* beginVersion */, -2 /* latest read version */, granuleContext); - auto out = fdb::Result::NativeKeyValueArray{}; + auto out = fdb::Result::KeyValueRefArray{}; fdb::Error err = res.getKeyValueArrayNothrow(out); if (err.code() == error_code_blob_granule_transaction_too_old) { info("BlobGranuleCorrectness::randomReadOp bg too old\n"); @@ -182,7 +182,7 @@ private: ctx->continueAfter( f, [ctx, f, results]() { - *results = f.get(); + *results = copyKeyRangeArray(f.get()); ctx->done(); }, true); diff --git a/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp b/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp index 01aa968444..b569cdb35f 100644 --- a/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp +++ b/bindings/c/test/apitester/TesterCancelTransactionWorkload.cpp @@ -65,7 +65,7 @@ private: fdb::Future f = futures[i]; auto expectedVal = store.get((*keys)[i]); ctx->continueAfter(f, [expectedVal, f, this, ctx]() { - auto val = f.get(); + auto val = f.get(); if (expectedVal != val) { error(fmt::format("cancelAfterFirstResTx mismatch. expected: {:.80} actual: {:.80}", fdb::toCharsRef(expectedVal.value()), diff --git a/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp b/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp index 0af25f979e..5ce643a99f 100644 --- a/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp +++ b/bindings/c/test/apitester/TesterCorrectnessWorkload.cpp @@ -63,7 +63,7 @@ private: ctx->continueAfterAll(*futures, [ctx, futures, results]() { results->clear(); for (auto& f : *futures) { - results->push_back(f.get()); + results->push_back(copyValueRef(f.get())); } ASSERT(results->size() == futures->size()); ctx->done(); @@ -104,7 +104,7 @@ private: ctx->continueAfterAll(*futures, [ctx, futures, results]() { results->clear(); for (auto& f : *futures) { - results->push_back(f.get()); + results->push_back(copyValueRef(f.get())); } ASSERT(results->size() == futures->size()); ctx->done(); diff --git a/bindings/c/test/apitester/TesterTransactionExecutor.cpp b/bindings/c/test/apitester/TesterTransactionExecutor.cpp index 663cccee43..221774854d 100644 --- a/bindings/c/test/apitester/TesterTransactionExecutor.cpp +++ b/bindings/c/test/apitester/TesterTransactionExecutor.cpp @@ -490,7 +490,7 @@ protected: }; // Map for keeping track of future waits and holding necessary object references - std::unordered_map callbackMap; + std::unordered_map callbackMap; // Holding reference to this for onError future C callback std::shared_ptr onErrorThisRef; diff --git a/bindings/c/test/apitester/TesterUtil.cpp b/bindings/c/test/apitester/TesterUtil.cpp index 3062fcd5ed..0e19081180 100644 --- a/bindings/c/test/apitester/TesterUtil.cpp +++ b/bindings/c/test/apitester/TesterUtil.cpp @@ -65,4 +65,45 @@ void print_internal_error(const char* msg, const char* file, int line) { fflush(stderr); } +std::optional copyValueRef(fdb::future_var::ValueRef::Type value) { + if (value) { + return std::make_optional(fdb::Value(value.value())); + } else { + return std::nullopt; + } +} + +KeyValueArray copyKeyValueArray(fdb::future_var::KeyValueRefArray::Type array) { + auto& [in_kvs, in_count, in_more] = array; + + KeyValueArray out; + auto& [out_kv, out_more] = out; + + out_more = in_more; + out_kv.clear(); + for (int i = 0; i < in_count; ++i) { + fdb::native::FDBKeyValue nativeKv = *in_kvs++; + fdb::KeyValue kv; + kv.key = fdb::Key(nativeKv.key, nativeKv.key_length); + kv.value = fdb::Value(nativeKv.value, nativeKv.value_length); + out_kv.push_back(kv); + } + return out; +}; + +KeyRangeArray copyKeyRangeArray(fdb::future_var::KeyRangeRefArray::Type array) { + auto& [in_ranges, in_count] = array; + + KeyRangeArray out; + + for (int i = 0; i < in_count; ++i) { + fdb::native::FDBKeyRange nativeKr = *in_ranges++; + fdb::KeyRange range; + range.beginKey = fdb::Key(nativeKr.begin_key, nativeKr.begin_key_length); + range.endKey = fdb::Key(nativeKr.end_key, nativeKr.end_key_length); + out.push_back(range); + } + return out; +}; + } // namespace FdbApiTester \ No newline at end of file diff --git a/bindings/c/test/apitester/TesterUtil.h b/bindings/c/test/apitester/TesterUtil.h index 21c083b905..092e0d7f1f 100644 --- a/bindings/c/test/apitester/TesterUtil.h +++ b/bindings/c/test/apitester/TesterUtil.h @@ -31,6 +31,11 @@ #include "test/fdb_api.hpp" +#undef ERROR +#define ERROR(name, number, description) enum { error_code_##name = number }; + +#include "flow/error_definitions.h" + namespace fmt { // fmt::format formatting for std::optional @@ -107,6 +112,14 @@ static inline double microsecToSec(TimeDuration timeUs) { return timeUs / 1000000.0; } +std::optional copyValueRef(fdb::future_var::ValueRef::Type value); + +using KeyValueArray = std::pair, bool>; +KeyValueArray copyKeyValueArray(fdb::future_var::KeyValueRefArray::Type array); + +using KeyRangeArray = std::vector; +KeyRangeArray copyKeyRangeArray(fdb::future_var::KeyRangeRefArray::Type array); + } // namespace FdbApiTester #endif diff --git a/bindings/c/test/fdb_api.hpp b/bindings/c/test/fdb_api.hpp index 0365f3a9e2..9a08d83e52 100644 --- a/bindings/c/test/fdb_api.hpp +++ b/bindings/c/test/fdb_api.hpp @@ -39,11 +39,6 @@ // introduce the option enums #include -#undef ERROR -#define ERROR(name, number, description) enum { error_code_##name = number }; - -#include "flow/error_definitions.h" - namespace fdb { // hide C API to discourage mixing C/C++ API @@ -133,40 +128,24 @@ struct Int64 { return Error(native::fdb_future_get_int64(f, &out)); } }; -struct NativeKey { - using Type = std::pair; +struct KeyRef { + using Type = fdb::KeyRef; static Error extract(native::FDBFuture* f, Type& out) noexcept { - auto& [out_key, out_key_length] = out; - return Error(native::fdb_future_get_key(f, &out_key, &out_key_length)); - } -}; -struct Key { - using Type = fdb::Key; - static Error extract(native::FDBFuture* f, Type& out) noexcept { - NativeKey::Type native_out{}; - auto err = NativeKey::extract(f, native_out); - auto& [out_key, out_key_length] = native_out; - out = fdb::Key(out_key, out_key_length); + uint8_t const* out_key = nullptr; + int out_key_length = 0; + auto err = Error(native::fdb_future_get_key(f, &out_key, &out_key_length)); + out = fdb::KeyRef(out_key, out_key_length); return Error(err); } }; -struct NativeValue { - using Type = std::tuple; +struct ValueRef { + using Type = std::optional; static Error extract(native::FDBFuture* f, Type& out) noexcept { - auto& [out_present, out_value, out_value_length] = out; - auto out_present_native = native::fdb_bool_t{}; - auto err = native::fdb_future_get_value(f, &out_present_native, &out_value, &out_value_length); - out_present = (out_present_native != 0); - return Error(err); - } -}; -struct OptionalValue { - using Type = std::optional; - static Error extract(native::FDBFuture* f, Type& out) noexcept { - NativeValue::Type native_out{}; - auto err = NativeValue::extract(f, native_out); - auto& [out_present, out_value, out_value_length] = native_out; - out = out_present ? std::make_optional(fdb::Value(out_value, out_value_length)) : std::nullopt; + auto out_present = native::fdb_bool_t{}; + uint8_t const* out_value = nullptr; + int out_value_length = 0; + auto err = native::fdb_future_get_value(f, &out_present, &out_value, &out_value_length); + out = out_present != 0 ? std::make_optional(fdb::ValueRef(out_value, out_value_length)) : std::nullopt; return Error(err); } }; @@ -177,58 +156,31 @@ struct StringArray { return Error(native::fdb_future_get_string_array(f, &out_strings, &out_count)); } }; -struct NativeKeyValueArray { - using Type = std::tuple; +struct KeyValueRef : native::FDBKeyValue { + fdb::KeyRef key() const noexcept { return fdb::KeyRef(native::FDBKeyValue::key, key_length); } + fdb::ValueRef value() const noexcept { return fdb::ValueRef(native::FDBKeyValue::value, value_length); } +}; +struct KeyValueRefArray { + using Type = std::tuple; static Error extract(native::FDBFuture* f, Type& out) noexcept { auto& [out_kv, out_count, out_more] = out; auto out_more_native = native::fdb_bool_t{}; - auto err = native::fdb_future_get_keyvalue_array(f, &out_kv, &out_count, &out_more_native); + auto err = native::fdb_future_get_keyvalue_array( + f, reinterpret_cast(&out_kv), &out_count, &out_more_native); out_more = (out_more_native != 0); return Error(err); } }; -struct KeyValueArray { - using Type = std::pair, bool>; - static Error extract(native::FDBFuture* f, Type& out) noexcept { - NativeKeyValueArray::Type native_out{}; - auto err = NativeKeyValueArray::extract(f, native_out); - auto [kvs, count, more] = native_out; - - auto& [out_kv, out_more] = out; - out_more = more; - out_kv.clear(); - for (int i = 0; i < count; ++i) { - fdb::native::FDBKeyValue nativeKv = *kvs++; - KeyValue kv; - kv.key = fdb::Key(nativeKv.key, nativeKv.key_length); - kv.value = fdb::Value(nativeKv.value, nativeKv.value_length); - out_kv.push_back(kv); - } - return Error(err); - } +struct KeyRangeRef : native::FDBKeyRange { + fdb::KeyRef beginKey() const noexcept { return fdb::KeyRef(native::FDBKeyRange::begin_key, begin_key_length); } + fdb::KeyRef endKey() const noexcept { return fdb::KeyRef(native::FDBKeyRange::end_key, end_key_length); } }; -struct NativeKeyRangeArray { - using Type = std::tuple; +struct KeyRangeRefArray { + using Type = std::tuple; static Error extract(native::FDBFuture* f, Type& out) noexcept { - auto& [out_kv, out_count] = out; - auto err = native::fdb_future_get_keyrange_array(f, &out_kv, &out_count); - return Error(err); - } -}; -struct KeyRangeArray { - using Type = std::vector; - static Error extract(native::FDBFuture* f, Type& out) noexcept { - NativeKeyRangeArray::Type native_out{}; - auto err = NativeKeyRangeArray::extract(f, native_out); - auto [ranges, count] = native_out; - out.clear(); - for (int i = 0; i < count; ++i) { - fdb::native::FDBKeyRange nativeKr = *ranges++; - KeyRange range; - range.beginKey = fdb::Key(nativeKr.begin_key, nativeKr.begin_key_length); - range.endKey = fdb::Key(nativeKr.end_key, nativeKr.end_key_length); - out.push_back(range); - } + auto& [out_ranges, out_count] = out; + auto err = native::fdb_future_get_keyrange_array( + f, reinterpret_cast(&out_ranges), &out_count); return Error(err); } }; @@ -331,18 +283,19 @@ class Result { } public: - using NativeKeyValueArray = future_var::NativeKeyValueArray::Type; + using KeyValueRefArray = future_var::KeyValueRefArray::Type; - Error getKeyValueArrayNothrow(NativeKeyValueArray& out) const noexcept { + Error getKeyValueArrayNothrow(KeyValueRefArray& out) const noexcept { auto out_more_native = native::fdb_bool_t{}; auto& [out_kv, out_count, out_more] = out; - auto err_raw = native::fdb_result_get_keyvalue_array(r.get(), &out_kv, &out_count, &out_more_native); + auto err_raw = native::fdb_result_get_keyvalue_array( + r.get(), reinterpret_cast(&out_kv), &out_count, &out_more_native); out_more = out_more_native != 0; return Error(err_raw); } - NativeKeyValueArray getKeyValueArray() const { - auto ret = NativeKeyValueArray{}; + KeyValueRefArray getKeyValueArray() const { + auto ret = KeyValueRefArray{}; if (auto err = getKeyValueArrayNothrow(ret)) throwError("ERROR: result_get_keyvalue_array(): ", err); return ret; @@ -352,8 +305,7 @@ public: class Future { protected: friend class Transaction; - friend struct FutureHash; - friend struct FutureEquals; + friend std::hash; std::shared_ptr f; Future(native::FDBFuture* future) { @@ -361,6 +313,8 @@ protected: f = std::shared_ptr(future, &native::fdb_future_destroy); } + native::FDBFuture* nativeHandle() const noexcept { return f.get(); } + // wrap any capturing lambda as callback passable to fdb_future_set_callback(). // destroy after invocation. template @@ -434,14 +388,9 @@ public: void then(UserFunc&& fn) { then(std::forward(fn)); } -}; -struct FutureHash { - size_t operator()(const Future& f) const { return std::hash{}(f.f.get()); } -}; - -struct FutureEquals { - bool operator()(const Future& a, const Future& b) const { return a.f.get() == b.f.get(); } + bool operator==(const Future& other) const { return nativeHandle() == other.nativeHandle(); } + bool operator!=(const Future& other) const { return !(*this == other); } }; template @@ -566,24 +515,24 @@ public: return out; } - TypedFuture getKey(KeySelector sel, bool snapshot) { + TypedFuture getKey(KeySelector sel, bool snapshot) { return native::fdb_transaction_get_key(tr.get(), sel.key, sel.keyLength, sel.orEqual, sel.offset, snapshot); } - TypedFuture get(KeyRef key, bool snapshot) { + TypedFuture get(KeyRef key, bool snapshot) { return native::fdb_transaction_get(tr.get(), key.data(), intSize(key), snapshot); } // Usage: tx.getRange(key_select::firstGreaterOrEqual(firstKey), key_select::lastLessThan(lastKey), ...) // gets key-value pairs in key range [begin, end) - TypedFuture getRange(KeySelector first, - KeySelector last, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - bool snapshot, - bool reverse) { + TypedFuture getRange(KeySelector first, + KeySelector last, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + bool snapshot, + bool reverse) { return native::fdb_transaction_get_range(tr.get(), first.key, first.keyLength, @@ -601,7 +550,7 @@ public: reverse); } - TypedFuture getBlobGranuleRanges(KeyRef begin, KeyRef end) { + TypedFuture getBlobGranuleRanges(KeyRef begin, KeyRef end) { return native::fdb_transaction_get_blob_granule_ranges( tr.get(), begin.data(), intSize(begin), end.data(), intSize(end)); } @@ -687,4 +636,9 @@ public: } // namespace fdb +template <> +struct std::hash { + size_t operator()(const fdb::Future& f) const { return std::hash{}(f.nativeHandle()); } +}; + #endif /*FDB_API_HPP*/ diff --git a/bindings/c/test/mako/operations.cpp b/bindings/c/test/mako/operations.cpp index 3cd93448f1..aebb1ff4a5 100644 --- a/bindings/c/test/mako/operations.cpp +++ b/bindings/c/test/mako/operations.cpp @@ -51,7 +51,7 @@ const std::array opTable{ }, [](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) { if (f && !f.error()) { - f.get(); + f.get(); } } } }, 1, @@ -72,7 +72,7 @@ const std::array opTable{ }, [](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) { if (f && !f.error()) { - f.get(); + f.get(); } } } }, 1, @@ -84,7 +84,7 @@ const std::array opTable{ }, [](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) { if (f && !f.error()) { - f.get(); + f.get(); } } } }, 1, @@ -107,7 +107,7 @@ const std::array opTable{ }, [](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) { if (f && !f.error()) { - f.get(); + f.get(); } } } }, 1, @@ -119,7 +119,7 @@ const std::array opTable{ }, [](Future& f, Transaction&, Arguments const&, ByteString&, ByteString&, ByteString& val) { if (f && !f.error()) { - f.get(); + f.get(); } } }, { StepKind::IMM, @@ -257,7 +257,7 @@ const std::array opTable{ user_context.clear(); - auto out = Result::NativeKeyValueArray{}; + auto out = Result::KeyValueRefArray{}; err = r.getKeyValueArrayNothrow(out); if (!err || err.is(2037 /*blob_granule_not_materialized*/)) return Future(); From e0eedc6a3758133393e574d7dabc69295944f7ed Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Wed, 8 Jun 2022 09:35:30 -0700 Subject: [PATCH 45/49] Fix asan message detection (#7338) * Fix asan message detection * Fix heap-buffer-overflow Call to strlen on a not necessarily null terminated string * Fix gcc build --- bindings/c/test/unit/unit_tests.cpp | 2 +- tests/TestRunner/tmp_cluster.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bindings/c/test/unit/unit_tests.cpp b/bindings/c/test/unit/unit_tests.cpp index 113759d23a..9d5986144c 100644 --- a/bindings/c/test/unit/unit_tests.cpp +++ b/bindings/c/test/unit/unit_tests.cpp @@ -2212,7 +2212,7 @@ TEST_CASE("special-key-space custom transaction ID") { fdb_check(f1.get(&out_present, (const uint8_t**)&val, &vallen)); REQUIRE(out_present); - UID transaction_id = UID::fromString(val); + UID transaction_id = UID::fromString(std::string(val, vallen)); CHECK(transaction_id == randomTransactionID); break; } diff --git a/tests/TestRunner/tmp_cluster.py b/tests/TestRunner/tmp_cluster.py index 99fa698ce2..ebe789ab6b 100755 --- a/tests/TestRunner/tmp_cluster.py +++ b/tests/TestRunner/tmp_cluster.py @@ -177,9 +177,9 @@ if __name__ == "__main__": for line in sev40s: # When running ASAN we expect to see this message. Boost coroutine should be using the correct asan # annotations so that it shouldn't produce any false positives. - if line.endswith( - "WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false " - "positives in some cases! " + if ( + "WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false positives in some cases!" + in line ): continue print(">>>>>>>>>>>>>>>>>>>> Found severity 40 events - the test fails") From 07f49392ac01fefa1711852b4ccfbcaca6af79ee Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Wed, 8 Jun 2022 09:36:18 -0700 Subject: [PATCH 46/49] Avoid using structured bindings in doctest assertions (#7335) * Avoid using structured bindings in doctest assertions clang doesn't allow this with the latest releases of doctest This will unblock #7319 * Add constructor to MappedKV --- bindings/c/test/unit/unit_tests.cpp | 79 ++++++++++++++++------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/bindings/c/test/unit/unit_tests.cpp b/bindings/c/test/unit/unit_tests.cpp index 9d5986144c..83e91775cb 100644 --- a/bindings/c/test/unit/unit_tests.cpp +++ b/bindings/c/test/unit/unit_tests.cpp @@ -177,13 +177,24 @@ struct GetRangeResult { }; struct GetMappedRangeResult { - std::vector>, // range results - fdb_bool_t>> - mkvs; + struct MappedKV { + MappedKV(const std::string& key, + const std::string& value, + const std::string& begin, + const std::string& end, + const std::vector>& range_results, + fdb_bool_t boundaryAndExist) + : key(key), value(value), begin(begin), end(end), range_results(range_results), + boundaryAndExist(boundaryAndExist) {} + + std::string key; + std::string value; + std::string begin; + std::string end; + std::vector> range_results; + fdb_bool_t boundaryAndExist; + }; + std::vector mkvs; // True if values remain in the key range requested. bool more; // Set to a non-zero value if an error occurred during the transaction. @@ -1093,24 +1104,24 @@ TEST_CASE("fdb_transaction_get_mapped_range") { bool boundary; for (int i = 0; i < expectSize; i++, id++) { boundary = i == 0 || i == expectSize - 1; - const auto& [key, value, begin, end, range_results, boundaryAndExist] = result.mkvs[i]; + const auto& mkv = result.mkvs[i]; if (matchIndex == MATCH_INDEX_ALL || i == 0 || i == expectSize - 1) { - CHECK(indexEntryKey(id).compare(key) == 0); + CHECK(indexEntryKey(id).compare(mkv.key) == 0); } else if (matchIndex == MATCH_INDEX_MATCHED_ONLY) { - CHECK(indexEntryKey(id).compare(key) == 0); + CHECK(indexEntryKey(id).compare(mkv.key) == 0); } else if (matchIndex == MATCH_INDEX_UNMATCHED_ONLY) { - CHECK(EMPTY.compare(key) == 0); + CHECK(EMPTY.compare(mkv.key) == 0); } else { - CHECK(EMPTY.compare(key) == 0); + CHECK(EMPTY.compare(mkv.key) == 0); } - bool empty = range_results.empty(); - CHECK(boundaryAndExist == (boundary && !empty)); - CHECK(EMPTY.compare(value) == 0); - CHECK(range_results.size() == SPLIT_SIZE); + bool empty = mkv.range_results.empty(); + CHECK(mkv.boundaryAndExist == (boundary && !empty)); + CHECK(EMPTY.compare(mkv.value) == 0); + CHECK(mkv.range_results.size() == SPLIT_SIZE); for (int split = 0; split < SPLIT_SIZE; split++) { - auto& [k, v] = range_results[split]; - CHECK(recordKey(id, split).compare(k) == 0); - CHECK(recordValue(id, split).compare(v) == 0); + auto& kv = mkv.range_results[split]; + CHECK(recordKey(id, split).compare(kv.first) == 0); + CHECK(recordValue(id, split).compare(kv.second) == 0); } } break; @@ -1151,19 +1162,19 @@ TEST_CASE("fdb_transaction_get_mapped_range_missing_all_secondary") { bool boundary; for (int i = 0; i < expectSize; i++, id++) { boundary = i == 0 || i == expectSize - 1; - const auto& [key, value, begin, end, range_results, boundaryAndExist] = result.mkvs[i]; + const auto& mkv = result.mkvs[i]; if (matchIndex == MATCH_INDEX_ALL || i == 0 || i == expectSize - 1) { - CHECK(indexEntryKey(id).compare(key) == 0); + CHECK(indexEntryKey(id).compare(mkv.key) == 0); } else if (matchIndex == MATCH_INDEX_MATCHED_ONLY) { - CHECK(EMPTY.compare(key) == 0); + CHECK(EMPTY.compare(mkv.key) == 0); } else if (matchIndex == MATCH_INDEX_UNMATCHED_ONLY) { - CHECK(indexEntryKey(id).compare(key) == 0); + CHECK(indexEntryKey(id).compare(mkv.key) == 0); } else { - CHECK(EMPTY.compare(key) == 0); + CHECK(EMPTY.compare(mkv.key) == 0); } - bool empty = range_results.empty(); - CHECK(boundaryAndExist == (boundary && !empty)); - CHECK(EMPTY.compare(value) == 0); + bool empty = mkv.range_results.empty(); + CHECK(mkv.boundaryAndExist == (boundary && !empty)); + CHECK(EMPTY.compare(mkv.value) == 0); } break; } @@ -1269,10 +1280,8 @@ TEST_CASE("fdb_transaction_get_range reverse") { std::string data_key = it->first; std::string data_value = it->second; - auto [key, value] = *results_it; - - CHECK(data_key.compare(key) == 0); - CHECK(data[data_key].compare(value) == 0); + CHECK(data_key.compare(results_it->first /*key*/) == 0); + CHECK(data[data_key].compare(results_it->second /*value*/) == 0); } break; } @@ -1306,8 +1315,8 @@ TEST_CASE("fdb_transaction_get_range limit") { CHECK(result.more); } - for (const auto& [key, value] : result.kvs) { - CHECK(data[key].compare(value) == 0); + for (const auto& kv : result.kvs) { + CHECK(data[kv.first].compare(kv.second) == 0); } break; } @@ -1338,8 +1347,8 @@ TEST_CASE("fdb_transaction_get_range FDB_STREAMING_MODE_EXACT") { CHECK(result.kvs.size() == 3); CHECK(result.more); - for (const auto& [key, value] : result.kvs) { - CHECK(data[key].compare(value) == 0); + for (const auto& kv : result.kvs) { + CHECK(data[kv.first].compare(kv.second) == 0); } break; } From b9ff6bc1296d5fb4fc148b1ec86ce4e66a784f98 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 8 Jun 2022 09:38:32 -0700 Subject: [PATCH 47/49] Address AJ's comments --- fdbclient/Schemas.cpp | 3 ++- fdbserver/Status.actor.cpp | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fdbclient/Schemas.cpp b/fdbclient/Schemas.cpp index 5d750e6683..babb4a1e50 100644 --- a/fdbclient/Schemas.cpp +++ b/fdbclient/Schemas.cpp @@ -301,7 +301,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema( "available_bytes":0, "limit_bytes":0, "unused_allocated_memory":0, - "used_bytes":0 + "used_bytes":0, + "rss_bytes":0 }, "messages":[ { diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 7870a735c8..d869dc4d1e 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -422,10 +422,11 @@ static JsonBuilderObject getBounceImpactInfo(int recoveryStatusCode) { } struct MachineMemoryInfo { - double memoryUsage; + double memoryUsage; // virtual memory usage + double rssUsage; // RSS memory usage double aggregateLimit; - MachineMemoryInfo() : memoryUsage(0), aggregateLimit(0) {} + MachineMemoryInfo() : memoryUsage(0), rssUsage(0), aggregateLimit(0) {} bool valid() { return memoryUsage >= 0; } void invalidate() { memoryUsage = -1; } @@ -789,6 +790,7 @@ ACTOR static Future processStatusFetcher( if (memInfo->second.valid()) { if (processMetrics.size() > 0 && programStart.size() > 0) { memInfo->second.memoryUsage += processMetrics.getDouble("Memory"); + memInfo->second.rssUsage += processMetrics.getDouble("ResidentMemory"); memInfo->second.aggregateLimit += programStart.getDouble("MemoryLimit"); } else memInfo->second.invalidate(); @@ -1011,7 +1013,8 @@ ACTOR static Future processStatusFetcher( auto machineMemInfo = machineMemoryUsage[workerItr->interf.locality.machineId()]; if (machineMemInfo.valid() && memoryLimit > 0) { ASSERT(machineMemInfo.aggregateLimit > 0); - int64_t memory = availableMemory * memoryLimit / machineMemInfo.aggregateLimit; + int64_t memory = + (availableMemory + machineMemInfo.rssUsage) * memoryLimit / machineMemInfo.aggregateLimit; memoryObj["available_bytes"] = std::min(std::max(memory, 0), memoryLimit); } } From 8606923da235df2e44be8a183e4924360c673c2c Mon Sep 17 00:00:00 2001 From: Robert Barabas Date: Wed, 8 Jun 2022 14:20:27 -0400 Subject: [PATCH 48/49] Arm64 related build fixes (#7319) * Add missing include * Fix open call on arm64 * Bump up doctest to 2.4.8 --- bindings/c/test/unit/third_party/CMakeLists.txt | 2 +- fdbbackup/FileDecoder.actor.cpp | 2 +- flow/WriteOnlySet.actor.cpp | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bindings/c/test/unit/third_party/CMakeLists.txt b/bindings/c/test/unit/third_party/CMakeLists.txt index 014157553c..5f26630744 100644 --- a/bindings/c/test/unit/third_party/CMakeLists.txt +++ b/bindings/c/test/unit/third_party/CMakeLists.txt @@ -6,7 +6,7 @@ ExternalProject_Add( doctest PREFIX ${CMAKE_BINARY_DIR}/doctest GIT_REPOSITORY https://github.com/onqtam/doctest.git - GIT_TAG 8424be522357e68d8c6178375546bb0cf9d5f6b3 # v2.4.1 + GIT_TAG 7b9885133108ae301ddd16e2651320f54cafeba7 # v2.4.8 TIMEOUT 10 CONFIGURE_COMMAND "" BUILD_COMMAND "" diff --git a/fdbbackup/FileDecoder.actor.cpp b/fdbbackup/FileDecoder.actor.cpp index 2ad7a55df2..248c7d4adf 100644 --- a/fdbbackup/FileDecoder.actor.cpp +++ b/fdbbackup/FileDecoder.actor.cpp @@ -428,7 +428,7 @@ public: platform::createDirectory(path); } } - self->lfd = open(self->file.fileName.c_str(), O_WRONLY | O_CREAT | O_TRUNC); + self->lfd = open(self->file.fileName.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0600); if (self->lfd == -1) { TraceEvent(SevError, "OpenLocalFileFailed").detail("File", self->file.fileName); throw platform_error(); diff --git a/flow/WriteOnlySet.actor.cpp b/flow/WriteOnlySet.actor.cpp index 46077e4aa2..96d7792209 100644 --- a/flow/WriteOnlySet.actor.cpp +++ b/flow/WriteOnlySet.actor.cpp @@ -25,6 +25,7 @@ #include #include +#include #include "flow/actorcompiler.h" // has to be last include #ifdef ENABLE_SAMPLING From 0bb02f6415dc87f050164bba9b02726aec4be011 Mon Sep 17 00:00:00 2001 From: Yao Xiao <87789492+yao-xiao-github@users.noreply.github.com> Date: Thu, 9 Jun 2022 10:50:39 -0700 Subject: [PATCH 49/49] [Sharded RocksDB] 3/N Implement functions for range clear. (#7310) --- .../KeyValueStoreShardedRocksDB.actor.cpp | 149 ++++++++++++++++-- 1 file changed, 132 insertions(+), 17 deletions(-) diff --git a/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp b/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp index f829733e8b..db49699dea 100644 --- a/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp @@ -553,10 +553,31 @@ public: TraceEvent(SevError, "ShardedRocksDB").detail("Error", "write to non-exist shard").detail("WriteKey", key); return; } - writeBatch->Put(it->value()->physicalShard->cf, toSlice(key), toSlice(value)); + writeBatch->Put(it.value()->physicalShard->cf, toSlice(key), toSlice(value)); dirtyShards->insert(it.value()->physicalShard); } + void clear(KeyRef key) { + auto it = dataShardMap.rangeContaining(key); + if (!it.value()) { + return; + } + writeBatch->Delete(it.value()->physicalShard->cf, toSlice(key)); + dirtyShards->insert(it.value()->physicalShard); + } + + void clearRange(KeyRangeRef range) { + auto rangeIterator = dataShardMap.intersectingRanges(range); + + for (auto it = rangeIterator.begin(); it != rangeIterator.end(); ++it) { + if (it.value() == nullptr) { + continue; + } + writeBatch->DeleteRange(it.value()->physicalShard->cf, toSlice(range.begin), toSlice(range.end)); + dirtyShards->insert(it.value()->physicalShard); + } + } + std::unique_ptr getWriteBatch() { std::unique_ptr existingWriteBatch = std::move(writeBatch); writeBatch = std::make_unique(); @@ -595,12 +616,17 @@ public: } rocksdb::DB* getDb() { return db; } + std::unordered_map>* getAllShards() { return &physicalShards; } + std::unordered_map* getColumnFamilyMap() { return &columnFamilyMap; } + private: std::string path; rocksdb::DB* db = nullptr; std::unordered_map> physicalShards; + // Stores mapping between cf id and cf handle, used during compaction. + std::unordered_map columnFamilyMap; std::unique_ptr writeBatch; std::unique_ptr> dirtyShards; KeyRangeMap dataShardMap; @@ -1216,11 +1242,14 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore { struct Writer : IThreadPoolReceiver { int threadIndex; + std::unordered_map* columnFamilyMap; std::shared_ptr rocksDBMetrics; std::shared_ptr rateLimiter; - explicit Writer(int threadIndex, std::shared_ptr rocksDBMetrics) - : threadIndex(threadIndex), rocksDBMetrics(rocksDBMetrics), + explicit Writer(int threadIndex, + std::unordered_map* columnFamilyMap, + std::shared_ptr rocksDBMetrics) + : threadIndex(threadIndex), columnFamilyMap(columnFamilyMap), rocksDBMetrics(rocksDBMetrics), rateLimiter(SERVER_KNOBS->ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC > 0 ? rocksdb::NewGenericRateLimiter( SERVER_KNOBS->ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC, // rate_bytes_per_sec @@ -1278,7 +1307,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore { PhysicalShard* shard; ThreadReturnPromise done; - AddShardAction(PhysicalShard* shard) : shard(shard) {} + AddShardAction(PhysicalShard* shard) : shard(shard) { ASSERT(shard); } double getTimeEstimate() const override { return SERVER_KNOBS->COMMIT_TIME_ESTIMATE; } }; @@ -1287,6 +1316,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore { if (!s.ok()) { a.done.sendError(statusToError(s)); } + (*columnFamilyMap)[a.shard->cf->GetID()] = a.shard->cf; a.done.send(Void()); } @@ -1319,12 +1349,59 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore { } }; - rocksdb::Status doCommit(rocksdb::WriteBatch* batch, rocksdb::DB* db, bool sample) { + struct DeleteVisitor : public rocksdb::WriteBatch::Handler { + std::vector>* deletes; + + DeleteVisitor(std::vector>* deletes) : deletes(deletes) { ASSERT(deletes); } + + rocksdb::Status DeleteRangeCF(uint32_t column_family_id, + const rocksdb::Slice& begin, + const rocksdb::Slice& end) override { + deletes->push_back( + std::make_pair(column_family_id, KeyRange(KeyRangeRef(toStringRef(begin), toStringRef(end))))); + return rocksdb::Status::OK(); + } + + rocksdb::Status PutCF(uint32_t column_family_id, + const rocksdb::Slice& key, + const rocksdb::Slice& value) override { + return rocksdb::Status::OK(); + } + + rocksdb::Status DeleteCF(uint32_t column_family_id, const rocksdb::Slice& key) override { + return rocksdb::Status::OK(); + } + + rocksdb::Status SingleDeleteCF(uint32_t column_family_id, const rocksdb::Slice& key) override { + return rocksdb::Status::OK(); + } + + rocksdb::Status MergeCF(uint32_t column_family_id, + const rocksdb::Slice& key, + const rocksdb::Slice& value) override { + return rocksdb::Status::OK(); + } + }; + + rocksdb::Status doCommit(rocksdb::WriteBatch* batch, + rocksdb::DB* db, + std::vector>* deletes, + bool sample) { + DeleteVisitor dv(deletes); + rocksdb::Status s = batch->Iterate(&dv); + if (!s.ok()) { + logRocksDBError(s, "CommitDeleteVisitor"); + return s; + } + + // If there are any range deletes, we should have added them to be deleted. + ASSERT(!deletes->empty() || !batch->HasDeleteRange()); + rocksdb::WriteOptions options; options.sync = !SERVER_KNOBS->ROCKSDB_UNSAFE_AUTO_FSYNC; double writeBeginTime = sample ? timer_monotonic() : 0; - auto s = db->Write(options, batch); + s = db->Write(options, batch); if (sample) { rocksDBMetrics->getWriteHistogram()->sampleSeconds(timer_monotonic() - writeBeginTime); } @@ -1333,7 +1410,6 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore { return s; } - // TODO: Add cf id <-> cf handle mapping and suggest compact range. return s; } @@ -1346,15 +1422,25 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore { commitBeginTime = timer_monotonic(); rocksDBMetrics->getCommitQueueWaitHistogram()->sampleSeconds(commitBeginTime - a.startTime); } - - auto s = doCommit(a.writeBatch.get(), a.db, a.getHistograms); + std::vector> deletes; + auto s = doCommit(a.writeBatch.get(), a.db, &deletes, a.getHistograms); + if (!s.ok()) { + a.done.sendError(statusToError(s)); + return; + } for (auto shard : *(a.dirtyShards)) { shard->readIterPool->update(); } - if (!s.ok()) { - a.done.sendError(statusToError(s)); - return; + + a.done.send(Void()); + + for (const auto& [id, range] : deletes) { + auto cf = columnFamilyMap->find(id); + ASSERT(cf != columnFamilyMap->end()); + auto begin = toSlice(range.begin); + auto end = toSlice(range.end); + ASSERT(a.db->SuggestCompactRange(cf->second, &begin, &end).ok()); } if (a.getHistograms) { @@ -1366,7 +1452,6 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore { if (a.getPerfContext) { rocksDBMetrics->setPerfContext(threadIndex); } - a.done.send(Void()); } struct CloseAction : TypedAction { @@ -1712,7 +1797,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore { writeThread = createGenericThreadPool(); readThreads = createGenericThreadPool(); } - writeThread->addThread(new Writer(0, rocksDBMetrics), "fdb-rocksdb-wr"); + writeThread->addThread(new Writer(0, shardManager.getColumnFamilyMap(), rocksDBMetrics), "fdb-rocksdb-wr"); TraceEvent("RocksDBReadThreads").detail("KnobRocksDBReadParallelism", SERVER_KNOBS->ROCKSDB_READ_PARALLELISM); for (unsigned i = 0; i < SERVER_KNOBS->ROCKSDB_READ_PARALLELISM; ++i) { readThreads->addThread(new Reader(i, rocksDBMetrics), "fdb-rocksdb-re"); @@ -1773,8 +1858,11 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore { void set(KeyValueRef kv, const Arena*) override { shardManager.put(kv.key, kv.value); } void clear(KeyRangeRef range, const Arena*) override { - // TODO: clear ranges. - return; + if (range.singleKeyRange()) { + shardManager.clear(range.begin); + } else { + shardManager.clearRange(range); + } } Future commit(bool) override { @@ -1985,7 +2073,7 @@ TEST_CASE("noSim/ShardedRocksDB/SingleShardRead") { return Void(); } -TEST_CASE("noSim/ShardedRocksDB/ReadRange") { +TEST_CASE("noSim/ShardedRocksDB/RangeOps") { state std::string rocksDBTestDir = "sharded-rocksdb-kvs-test-db"; platform::eraseDirectoryRecursive(rocksDBTestDir); @@ -2078,6 +2166,33 @@ TEST_CASE("noSim/ShardedRocksDB/ReadRange") { ASSERT(result[i] == expectedRows[40 + i]); } + // Clear a range on a single shard. + kvStore->clear(KeyRangeRef("40"_sr, "45"_sr)); + wait(kvStore->commit(false)); + + RangeResult result = + wait(kvStore->readRange(KeyRangeRef("4"_sr, "5"_sr), 20, 10000, IKeyValueStore::ReadType::NORMAL)); + ASSERT_EQ(result.size(), 5); + + // Clear a single value. + kvStore->clear(KeyRangeRef("01"_sr, keyAfter("01"_sr))); + wait(kvStore->commit(false)); + + Optional val = wait(kvStore->readValue("01"_sr)); + ASSERT(!val.present()); + + // Clear a range spanning on multiple shards. + kvStore->clear(KeyRangeRef("1"_sr, "8"_sr)); + wait(kvStore->commit(false)); + + RangeResult result = + wait(kvStore->readRange(KeyRangeRef("1"_sr, "8"_sr), 1000, 10000, IKeyValueStore::ReadType::NORMAL)); + ASSERT_EQ(result.size(), 0); + + RangeResult result = + wait(kvStore->readRange(KeyRangeRef("0"_sr, ":"_sr), 1000, 10000, IKeyValueStore::ReadType::NORMAL)); + ASSERT_EQ(result.size(), 19); + Future closed = kvStore->onClosed(); kvStore->dispose(); wait(closed);