From f21fcf67ceca82693d8dd2252d458b3326de1d0e Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Tue, 25 Oct 2022 10:59:21 -0700 Subject: [PATCH 01/57] initial commit to allow tenant list filtering in metacluster --- .../sphinx/source/command-line-interface.rst | 4 ++- fdbcli/MetaclusterCommands.actor.cpp | 2 +- fdbcli/TenantCommands.actor.cpp | 23 ++++++++++--- fdbclient/Tenant.cpp | 2 ++ .../fdbclient/MetaclusterManagement.actor.h | 33 +++++++++++++------ 5 files changed, 47 insertions(+), 17 deletions(-) diff --git a/documentation/sphinx/source/command-line-interface.rst b/documentation/sphinx/source/command-line-interface.rst index c561379100..a6c60d3f4f 100644 --- a/documentation/sphinx/source/command-line-interface.rst +++ b/documentation/sphinx/source/command-line-interface.rst @@ -475,7 +475,7 @@ Deletes a tenant from the cluster. The tenant must be empty. list ^^^^ -``tenant list [BEGIN] [END] [LIMIT]`` +``tenant list [BEGIN] [END] [LIMIT] [state=,,...]`` Lists the tenants present in the cluster. @@ -485,6 +485,8 @@ Lists the tenants present in the cluster. ``LIMIT`` - the number of tenants to list. Defaults to 100. +``STATE``` - TenantState(s) to filter the list with. Defaults to no filters. + get ^^^ diff --git a/fdbcli/MetaclusterCommands.actor.cpp b/fdbcli/MetaclusterCommands.actor.cpp index edb25ace2c..652df4645b 100644 --- a/fdbcli/MetaclusterCommands.actor.cpp +++ b/fdbcli/MetaclusterCommands.actor.cpp @@ -398,7 +398,7 @@ std::vector metaclusterHintGenerator(std::vector const& }; return std::vector(opts.begin() + std::min(1, tokens.size() - 2), opts.end()); } else if (tokencmp(tokens[1], "list") && tokens.size() < 5) { - static std::vector opts = { "[BEGIN]", "[END]", "[LIMIT]" }; + static std::vector opts = { "[BEGIN]", "[END]", "[LIMIT]", "[state=,,...]" }; return std::vector(opts.begin() + tokens.size() - 2, opts.end()); } else if (tokencmp(tokens[1], "get") && tokens.size() < 4) { static std::vector opts = { "", "[JSON]" }; diff --git a/fdbcli/TenantCommands.actor.cpp b/fdbcli/TenantCommands.actor.cpp index e2be6fac56..73c4e79adb 100644 --- a/fdbcli/TenantCommands.actor.cpp +++ b/fdbcli/TenantCommands.actor.cpp @@ -225,11 +225,12 @@ ACTOR Future tenantDeleteCommand(Reference db, std::vector tenantListCommand(Reference db, std::vector tokens) { - if (tokens.size() > 5) { - fmt::print("Usage: tenant list [BEGIN] [END] [LIMIT]\n\n"); + if (tokens.size() > 6) { + fmt::print("Usage: tenant list [BEGIN] [END] [LIMIT] [state=,,...]\n\n"); fmt::print("Lists the tenants in a cluster.\n"); fmt::print("Only tenants in the range BEGIN - END will be printed.\n"); fmt::print("An optional LIMIT can be specified to limit the number of results (default 100).\n"); + fmt::print("Optional comma-separated state(s) can be provided to filter the list.\n"); return false; } @@ -243,11 +244,11 @@ ACTOR Future tenantListCommand(Reference db, std::vector= 4) { endTenant = tokens[3]; if (endTenant <= beginTenant) { - fmt::print(stderr, "ERROR: end must be larger than begin"); + fmt::print(stderr, "ERROR: end must be larger than begin\n"); return false; } } - if (tokens.size() == 5) { + if (tokens.size() >= 5) { int n = 0; if (sscanf(tokens[4].toString().c_str(), "%d%n", &limit, &n) != 1 || n != tokens[4].size() || limit <= 0) { fmt::print(stderr, "ERROR: invalid limit `{}'\n", tokens[4].toString().c_str()); @@ -255,6 +256,18 @@ ACTOR Future tenantListCommand(Reference db, std::vector filters; + if (tokens.size() == 6) { // state=ready,registering + if (!tokens[5].startsWith("state="_sr)) { + fmt::print(stderr, "ERROR: state filter must begin with `state='\n"); + return false; + } + auto filterStrings = tokens[5].removePrefix("state="_sr).splitAny(","_sr); + for (auto sref : filterStrings) { + filters.push_back(TenantMapEntry::stringToTenantState(sref.toString())); + } + } + state Key beginTenantKey = tenantMapSpecialKeyRange.begin.withSuffix(beginTenant); state Key endTenantKey = tenantMapSpecialKeyRange.begin.withSuffix(endTenant); state Reference tr = db->createTransaction(); @@ -266,7 +279,7 @@ ACTOR Future tenantListCommand(Reference db, std::vector tenantNames; if (clusterType == ClusterType::METACLUSTER_MANAGEMENT) { std::vector> tenants = - wait(MetaclusterAPI::listTenantsTransaction(tr, beginTenant, endTenant, limit)); + wait(MetaclusterAPI::listTenantsTransaction(tr, beginTenant, endTenant, limit, filters)); for (auto tenant : tenants) { tenantNames.push_back(tenant.first); } diff --git a/fdbclient/Tenant.cpp b/fdbclient/Tenant.cpp index 2ad1989fd0..e4d27a8be0 100644 --- a/fdbclient/Tenant.cpp +++ b/fdbclient/Tenant.cpp @@ -70,6 +70,7 @@ std::string TenantMapEntry::tenantStateToString(TenantState tenantState) { } TenantState TenantMapEntry::stringToTenantState(std::string stateStr) { + std::transform(stateStr.begin(), stateStr.end(), stateStr.begin(), [](unsigned char c) { return std::tolower(c); }); if (stateStr == "registering") { return TenantState::REGISTERING; } else if (stateStr == "ready") { @@ -103,6 +104,7 @@ std::string TenantMapEntry::tenantLockStateToString(TenantLockState tenantState) } TenantLockState TenantMapEntry::stringToTenantLockState(std::string stateStr) { + std::transform(stateStr.begin(), stateStr.end(), stateStr.begin(), [](unsigned char c) { return std::tolower(c); }); if (stateStr == "unlocked") { return TenantLockState::UNLOCKED; } else if (stateStr == "read only") { diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index 91a17a8b88..7d930a8d9d 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1555,23 +1555,36 @@ Future deleteTenant(Reference db, TenantName name) { } ACTOR template -Future>> listTenantsTransaction(Transaction tr, - TenantNameRef begin, - TenantNameRef end, - int limit) { +Future>> listTenantsTransaction( + Transaction tr, + TenantNameRef begin, + TenantNameRef end, + int limit, + std::vector filters = std::vector()) { tr->setOption(FDBTransactionOptions::RAW_ACCESS); KeyBackedRangeResult> results = wait(ManagementClusterMetadata::tenantMetadata().tenantMap.getRange(tr, begin, end, limit)); - return results.results; + if (filters.empty()) { + return results.results; + } + std::vector> filterResults; + for (auto pair : results.results) { + if (std::count(filters.begin(), filters.end(), pair.second.tenantState)) { + filterResults.push_back(pair); + } + } + return filterResults; } ACTOR template -Future>> listTenants(Reference db, - TenantName begin, - TenantName end, - int limit) { +Future>> listTenants( + Reference db, + TenantName begin, + TenantName end, + int limit, + std::vector filters = std::vector()) { state Reference tr = db->createTransaction(); loop { @@ -1579,7 +1592,7 @@ Future>> listTenants(Reference tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE); std::vector> tenants = - wait(listTenantsTransaction(tr, begin, end, limit)); + wait(listTenantsTransaction(tr, begin, end, limit, filters)); return tenants; } catch (Error& e) { wait(safeThreadFutureToFuture(tr->onError(e))); From b17c3fecbbd9a41865ddf3fb270447e6eea9682f Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 26 Oct 2022 14:37:00 -0700 Subject: [PATCH 02/57] add invalid tenant state and assertion in metacluster consistency --- fdbcli/MetaclusterCommands.actor.cpp | 2 +- fdbclient/Tenant.cpp | 4 +++- fdbclient/include/fdbclient/Tenant.h | 12 +++++++++- .../workloads/MetaclusterConsistency.actor.h | 22 +++++++++++++++++++ 4 files changed, 37 insertions(+), 3 deletions(-) diff --git a/fdbcli/MetaclusterCommands.actor.cpp b/fdbcli/MetaclusterCommands.actor.cpp index 652df4645b..068fe75de5 100644 --- a/fdbcli/MetaclusterCommands.actor.cpp +++ b/fdbcli/MetaclusterCommands.actor.cpp @@ -397,7 +397,7 @@ std::vector metaclusterHintGenerator(std::vector const& "", "|connection_string=>" }; return std::vector(opts.begin() + std::min(1, tokens.size() - 2), opts.end()); - } else if (tokencmp(tokens[1], "list") && tokens.size() < 5) { + } else if (tokencmp(tokens[1], "list") && tokens.size() < 6) { static std::vector opts = { "[BEGIN]", "[END]", "[LIMIT]", "[state=,,...]" }; return std::vector(opts.begin() + tokens.size() - 2, opts.end()); } else if (tokencmp(tokens[1], "get") && tokens.size() < 4) { diff --git a/fdbclient/Tenant.cpp b/fdbclient/Tenant.cpp index e4d27a8be0..8ef4a8b9e3 100644 --- a/fdbclient/Tenant.cpp +++ b/fdbclient/Tenant.cpp @@ -64,6 +64,8 @@ std::string TenantMapEntry::tenantStateToString(TenantState tenantState) { return "renaming to"; case TenantState::ERROR: return "error"; + case TenantState::INVALID: + return "invalid"; default: UNREACHABLE(); } @@ -87,7 +89,7 @@ TenantState TenantMapEntry::stringToTenantState(std::string stateStr) { return TenantState::ERROR; } - UNREACHABLE(); + return TenantState::INVALID; } std::string TenantMapEntry::tenantLockStateToString(TenantLockState tenantState) { diff --git a/fdbclient/include/fdbclient/Tenant.h b/fdbclient/include/fdbclient/Tenant.h index 87e1731e90..0781ed08a2 100644 --- a/fdbclient/include/fdbclient/Tenant.h +++ b/fdbclient/include/fdbclient/Tenant.h @@ -49,6 +49,7 @@ typedef Standalone TenantGroupName; // RENAMING_TO - the tenant is being created as a rename from an existing tenant and is awaiting the rename to complete // on the data cluster // ERROR - the tenant is in an error state +// INVALID - Unrecognized state - likely the result of a failed parsing // // A tenant in any configuration is allowed to be removed. Only tenants in the READY or UPDATING_CONFIGURATION phases // can have their configuration updated. A tenant must not exist or be in the REGISTERING phase to be created. To be @@ -57,7 +58,16 @@ typedef Standalone TenantGroupName; // // If an operation fails and the tenant is left in a non-ready state, re-running the same operation is legal. If // successful, the tenant will return to the READY state. -enum class TenantState { REGISTERING, READY, REMOVING, UPDATING_CONFIGURATION, RENAMING_FROM, RENAMING_TO, ERROR }; +enum class TenantState { + REGISTERING, + READY, + REMOVING, + UPDATING_CONFIGURATION, + RENAMING_FROM, + RENAMING_TO, + ERROR, + INVALID +}; // Represents the lock state the tenant could be in. // Can be used in conjunction with the other tenant states above. diff --git a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h index 25f3fcae19..55b6aa863a 100644 --- a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h @@ -71,6 +71,20 @@ private: ACTOR static Future loadManagementClusterMetadata(MetaclusterConsistencyCheck* self) { state Reference managementTr = self->managementDb->createTransaction(); state std::vector> tenantList; + state std::vector> tenantListReady; + state std::vector> tenantListOther; + + state std::vector readyFilter; + state std::vector otherFilter; + + readyFilter.push_back(TenantState::READY); + otherFilter.push_back(TenantState::REGISTERING); + otherFilter.push_back(TenantState::REMOVING); + otherFilter.push_back(TenantState::UPDATING_CONFIGURATION); + otherFilter.push_back(TenantState::RENAMING_FROM); + otherFilter.push_back(TenantState::RENAMING_TO); + otherFilter.push_back(TenantState::ERROR); + otherFilter.push_back(TenantState::INVALID); loop { try { @@ -101,6 +115,12 @@ private: store(tenantList, MetaclusterAPI::listTenantsTransaction( managementTr, ""_sr, "\xff\xff"_sr, metaclusterMaxTenants)) && + store(tenantListReady, + MetaclusterAPI::listTenantsTransaction( + managementTr, ""_sr, "\xff\xff"_sr, metaclusterMaxTenants, readyFilter)) && + store(tenantListOther, + MetaclusterAPI::listTenantsTransaction( + managementTr, ""_sr, "\xff\xff"_sr, metaclusterMaxTenants, otherFilter)) && store(self->managementMetadata.tenantGroups, MetaclusterAPI::ManagementClusterMetadata::tenantMetadata().tenantGroupMap.getRange( managementTr, {}, {}, metaclusterMaxTenants)) && @@ -113,6 +133,8 @@ private: } } + ASSERT(tenantListReady.size() + tenantListOther.size() == tenantList.size()); + self->managementMetadata.tenantMap = std::map(tenantList.begin(), tenantList.end()); for (auto t : self->managementMetadata.clusterTenantTuples.results) { From 098793893e96a35eeddb1b1f9f309ab049d3b545 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 26 Oct 2022 14:50:59 -0700 Subject: [PATCH 03/57] move hints to correct generator --- fdbcli/MetaclusterCommands.actor.cpp | 4 ++-- fdbcli/TenantCommands.actor.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fdbcli/MetaclusterCommands.actor.cpp b/fdbcli/MetaclusterCommands.actor.cpp index 068fe75de5..edb25ace2c 100644 --- a/fdbcli/MetaclusterCommands.actor.cpp +++ b/fdbcli/MetaclusterCommands.actor.cpp @@ -397,8 +397,8 @@ std::vector metaclusterHintGenerator(std::vector const& "", "|connection_string=>" }; return std::vector(opts.begin() + std::min(1, tokens.size() - 2), opts.end()); - } else if (tokencmp(tokens[1], "list") && tokens.size() < 6) { - static std::vector opts = { "[BEGIN]", "[END]", "[LIMIT]", "[state=,,...]" }; + } else if (tokencmp(tokens[1], "list") && tokens.size() < 5) { + static std::vector opts = { "[BEGIN]", "[END]", "[LIMIT]" }; return std::vector(opts.begin() + tokens.size() - 2, opts.end()); } else if (tokencmp(tokens[1], "get") && tokens.size() < 4) { static std::vector opts = { "", "[JSON]" }; diff --git a/fdbcli/TenantCommands.actor.cpp b/fdbcli/TenantCommands.actor.cpp index 73c4e79adb..00bd0e8309 100644 --- a/fdbcli/TenantCommands.actor.cpp +++ b/fdbcli/TenantCommands.actor.cpp @@ -626,8 +626,8 @@ std::vector tenantHintGenerator(std::vector const& token } else if (tokencmp(tokens[1], "delete") && tokens.size() < 3) { static std::vector opts = { "" }; return std::vector(opts.begin() + tokens.size() - 2, opts.end()); - } else if (tokencmp(tokens[1], "list") && tokens.size() < 5) { - static std::vector opts = { "[BEGIN]", "[END]", "[LIMIT]" }; + } else if (tokencmp(tokens[1], "list") && tokens.size() < 6) { + static std::vector opts = { "[BEGIN]", "[END]", "[LIMIT]", "[state=,,...]" }; return std::vector(opts.begin() + tokens.size() - 2, opts.end()); } else if (tokencmp(tokens[1], "get") && tokens.size() < 4) { static std::vector opts = { "", "[JSON]" }; From 8a59bc276d52c3299b8e5a1ff7b95403a014de95 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Wed, 26 Oct 2022 11:44:42 -0700 Subject: [PATCH 04/57] data operation api (not finished) --- fdbserver/MockGlobalState.actor.cpp | 62 +++++++++++++++++++ fdbserver/include/fdbserver/MockGlobalState.h | 22 +++++++ .../include/fdbserver/StorageMetrics.actor.h | 4 ++ fdbserver/storageserver.actor.cpp | 3 +- 4 files changed, 89 insertions(+), 2 deletions(-) diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index 8de995fd2b..f95b2a33f6 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -250,6 +250,68 @@ Future MockStorageServer::run() { return serveStorageMetricsRequests(this, ssi); } +void MockStorageServer::set(KeyRef key, int64_t bytes, int64_t oldBytes) { + notifyMvccStorageCost(key, bytes); +} + +void MockStorageServer::insert(KeyRef key, int64_t bytes) { + notifyMvccStorageCost(key, bytes); +} + +void MockStorageServer::clear(KeyRef key, int64_t bytes) { + notifyMvccStorageCost(key, bytes); +} + +void MockStorageServer::clearRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { + notifyMvccStorageCost(range.begin, range.begin.size() + range.end.size()); + + auto totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); +} + +void MockStorageServer::get(KeyRef key, int64_t bytes) { + // If the read yields no value, randomly sample the empty read. + int64_t bytesReadPerKSecond = std::max(bytes, SERVER_KNOBS->EMPTY_READ_PENALTY); + metrics.notifyBytesReadPerKSecond(key, bytesReadPerKSecond); +} + +void MockStorageServer::getRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { + auto totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); + // For performance concerns, the cost of a range read is billed to the start key and end key of the + // range. + if (totalByteSize > 0) { + int64_t bytesReadPerKSecond = std::max(totalByteSize, SERVER_KNOBS->EMPTY_READ_PENALTY) / 2; + metrics.notifyBytesReadPerKSecond(range.begin, bytesReadPerKSecond); + metrics.notifyBytesReadPerKSecond(range.end, bytesReadPerKSecond); + } +} + +int64_t MockStorageServer::estimateRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { + int64_t totalByteSize = 0; + auto ranges = serverKeys.intersectingRanges(range); + + // use the beginShardBytes as partial size + if (ranges.begin().begin() < range.begin) { + ranges.pop_front(); + totalByteSize += beginShardBytes; + } + // use the endShardBytes as partial size + if (ranges.end().begin() < range.end) { + totalByteSize += endShardBytes; + } + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + totalByteSize += it->cvalue().shardSize; + } + return totalByteSize; +} + +void MockStorageServer::notifyMvccStorageCost(KeyRef key, int64_t size) { + // update write bandwidth and iops as mock the cost of writing mvcc storage + StorageMetrics s; + s.bytesPerKSecond = mvccStorageBytes(size) / 2; + s.iosPerKSecond = 1; + metrics.notify(key, s); +} + void MockGlobalState::initializeAsEmptyDatabaseMGS(const DatabaseConfiguration& conf, uint64_t defaultDiskSpace) { ASSERT(conf.storageTeamSize > 0); configuration = conf; diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index ac984e9069..260478c57e 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -133,6 +133,23 @@ public: Future run(); + // data operation APIs - change the metrics + + // Set key with a new value, the total bytes change from oldBytes to bytes + void set(KeyRef key, int64_t bytes, int64_t oldBytes); + // Insert key with a new value, the total bytes is `bytes` + void insert(KeyRef key, int64_t bytes); + // Clear key and its value of which the size is bytes + void clear(KeyRef key, int64_t bytes); + // Clear range, assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` + void clearRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + + // modify the metrics as like doing an n-bytes read op + // Read key and cause bytes read overhead + void get(KeyRef key, int64_t bytes); + // Read range, assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` + void getRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + protected: void threeWayShardSplitting(KeyRangeRef outerRange, KeyRangeRef innerRange, @@ -140,6 +157,11 @@ protected: bool restrictSize); void twoWayShardSplitting(KeyRangeRef range, KeyRef splitPoint, uint64_t rangeSize, bool restrictSize); + + // Assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` + int64_t estimateRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + // Update the storage metrics as if we write the MVCC storage with a mutation of `size` bytes. + void notifyMvccStorageCost(KeyRef key, int64_t size); }; class MockGlobalStateImpl; diff --git a/fdbserver/include/fdbserver/StorageMetrics.actor.h b/fdbserver/include/fdbserver/StorageMetrics.actor.h index dc518cf318..34a2d27dd5 100644 --- a/fdbserver/include/fdbserver/StorageMetrics.actor.h +++ b/fdbserver/include/fdbserver/StorageMetrics.actor.h @@ -228,5 +228,9 @@ Future serveStorageMetricsRequests(ServiceType* self, StorageServerInterfa } } +// For both the mutation log and the versioned map. +inline int mvccStorageBytes(int64_t size) { + return VersionedMap::overheadPerItem * 2 + (MutationRef::OVERHEAD_BYTES + size) * 2; +} #include "flow/unactorcompiler.h" #endif // FDBSERVER_STORAGEMETRICS_H \ No newline at end of file diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 1e337bc4fd..41cc8e701d 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -535,8 +535,7 @@ const int VERSION_OVERHEAD = // overhead for map // For both the mutation log and the versioned map. static int mvccStorageBytes(MutationRef const& m) { - return VersionedMap::overheadPerItem * 2 + - (MutationRef::OVERHEAD_BYTES + m.param1.size() + m.param2.size()) * 2; + return mvccStorageBytes(m.param1.size() + m.param2.size()); } struct FetchInjectionInfo { From 334fced5723747bbcbe5b0f7bf0a377710f9d8eb Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Thu, 27 Oct 2022 10:59:19 -0700 Subject: [PATCH 05/57] add data api implementations; add more realistic fetchKey implementation; finish randomKeyBetween implementation --- fdbclient/FDBTypes.cpp | 46 +++++++++++ fdbclient/include/fdbclient/FDBTypes.h | 3 + fdbserver/DDTxnProcessor.actor.cpp | 30 ++++++- fdbserver/MockGlobalState.actor.cpp | 82 +++++++++++++++++-- fdbserver/include/fdbserver/MockGlobalState.h | 21 ++++- .../include/fdbserver/StorageMetrics.actor.h | 5 +- fdbserver/storageserver.actor.cpp | 6 +- 7 files changed, 179 insertions(+), 14 deletions(-) diff --git a/fdbclient/FDBTypes.cpp b/fdbclient/FDBTypes.cpp index edd016d391..056fc3430d 100644 --- a/fdbclient/FDBTypes.cpp +++ b/fdbclient/FDBTypes.cpp @@ -50,6 +50,52 @@ KeyRef keyBetween(const KeyRangeRef& keys) { return keys.end; } +Key randomKeyBetween(const KeyRangeRef& keys) { + KeyRef begin = keys.begin; + KeyRef end = keys.end; + ASSERT(begin < end); + if (begin.size() < end.size()) { + // randomly append a char + uint8_t newChar = deterministicRandom()->randomInt(0, end[begin.size()] + 1); + return begin.withSuffix(StringRef(&newChar, 1)); + } + + int pos = 0; // will be the position of the first difference between keys.begin and keys.end + for (; pos < end.size() && pos < CLIENT_KNOBS->KEY_SIZE_LIMIT; pos++) { + if (keys.begin[pos] != keys.end[pos]) { + break; + } + } + ASSERT(pos < end.size()); // otherwise, begin >= end + + // find the lowest char in range begin[pos+1, begin.size()) that is not \xff (255) + int lowest = begin.size() - 1; + for (; lowest > pos; lowest--) { + if (begin[lowest] < 255) { + Key res = begin; + uint8_t* ptr = mutateString(res); + *(ptr + lowest) = (uint8_t)deterministicRandom()->randomInt(begin[lowest] + 1, 256); + return res; + } + } + + if (begin[pos] + 1 < end[pos]) { + Key res = begin; + uint8_t* ptr = mutateString(res); + *(ptr + pos) = (uint8_t)deterministicRandom()->randomInt(begin[pos] + 1, end[pos]); + return res; + } + + if (begin.size() + 1 < CLIENT_KNOBS->KEY_SIZE_LIMIT) { + // randomly append a char + uint8_t newChar = deterministicRandom()->randomInt(1, 255); + return begin.withSuffix(StringRef(&newChar, 1)); + } + + // no possible result + return end; +} + void KeySelectorRef::setKey(KeyRef const& key) { // There are no keys in the database with size greater than the max key size, so if this key selector has a key // which is large, then we can translate it to an equivalent key selector with a smaller key diff --git a/fdbclient/include/fdbclient/FDBTypes.h b/fdbclient/include/fdbclient/FDBTypes.h index ddb6404bb8..a7ed1040ec 100644 --- a/fdbclient/include/fdbclient/FDBTypes.h +++ b/fdbclient/include/fdbclient/FDBTypes.h @@ -590,6 +590,9 @@ inline KeyRange prefixRange(KeyRef prefix) { // The returned reference is valid as long as keys is valid. KeyRef keyBetween(const KeyRangeRef& keys); +// Returns a randomKey between keys. If it's impossible, return keys.end. +Key randomKeyBetween(const KeyRangeRef& keys); + KeyRangeRef toPrefixRelativeRange(KeyRangeRef range, KeyRef prefix); struct KeySelectorRef { diff --git a/fdbserver/DDTxnProcessor.actor.cpp b/fdbserver/DDTxnProcessor.actor.cpp index 693e06e949..6a6b7d78dc 100644 --- a/fdbserver/DDTxnProcessor.actor.cpp +++ b/fdbserver/DDTxnProcessor.actor.cpp @@ -680,14 +680,32 @@ Future DDTxnProcessor::rawFinishMovement(MoveKeysParams& params, } struct DDMockTxnProcessorImpl { + // return when all status become TRANSFERRED + ACTOR static Future checkFetchingState(DDMockTxnProcessor* self, std::vector ids, KeyRangeRef range) { + loop { + wait(delayJittered(1.0)); + DDMockTxnProcessor* selfP = self; + KeyRangeRef cloneRef; + if (std::all_of(ids.begin(), ids.end(), [selfP, cloneRef](const UID& id) { + auto& server = selfP->mgs->allServers.at(id); + return server.allShardStatusEqual(cloneRef, MockShardStatus::TRANSFERRED) || + server.allShardStatusEqual(cloneRef, MockShardStatus::COMPLETED); + })) { + break; + } + } + if (BUGGIFY_WITH_PROB(0.5)) { + wait(delayJittered(5.0)); + } + return Void(); + } + ACTOR static Future moveKeys(DDMockTxnProcessor* self, MoveKeysParams params) { state std::map tssMapping; self->rawStartMovement(params, tssMapping); ASSERT(tssMapping.empty()); - if (BUGGIFY_WITH_PROB(0.5)) { - wait(delayJittered(5.0)); - } + wait(checkFetchingState(self, params.destinationTeam, params.keys)); self->rawFinishMovement(params, tssMapping); if (!params.dataMovementComplete.isSet()) @@ -877,8 +895,12 @@ void DDMockTxnProcessor::rawStartMovement(MoveKeysParams& params, std::mapshardMapping->moveShard(params.keys, destTeams); + auto randomRangeSize = + deterministicRandom()->randomInt64(SERVER_KNOBS->MIN_SHARD_BYTES, SERVER_KNOBS->MAX_SHARD_BYTES); for (auto& id : params.destinationTeam) { - mgs->allServers.at(id).setShardStatus(params.keys, MockShardStatus::INFLIGHT, mgs->restrictSize); + auto& server = mgs->allServers.at(id); + server.setShardStatus(params.keys, MockShardStatus::INFLIGHT, mgs->restrictSize); + server.signalFetchKeys(params.keys, randomRangeSize); } } diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index f95b2a33f6..cbb0a53bf4 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -114,6 +114,43 @@ public: } return Void(); } + + ACTOR static Future serveMockStorageServer(MockStorageServer* self) { + state ActorCollection actors; + loop choose { + when(MockStorageServer::FetchKeysParams params = waitNext(self->fetchKeysRequests.getFuture())) { + if (!self->allShardStatusEqual(params.keys, MockShardStatus::COMPLETED)) { + actors.add(waitFetchKeysFinish(self, params)); + } + } + when(wait(actors.getResult())) { ASSERT(false); } + } + } + ACTOR static Future waitFetchKeysFinish(MockStorageServer* self, MockStorageServer::FetchKeysParams params) { + // between each chunk delay for random time, and finally set the fetchComplete signal. + ASSERT(params.totalRangeBytes > 0); + state int chunkCount = std::ceil(params.totalRangeBytes * 1.0 / SERVER_KNOBS->FETCH_BLOCK_BYTES); + state Key lastKey = params.keys.begin; + + state int i = 0; + for (; i < chunkCount; ++i) { + wait(delayJittered(0.01)); + int remainBytes = (chunkCount == 1 ? params.totalRangeBytes : SERVER_KNOBS->FETCH_BLOCK_BYTES); + + while (remainBytes >= lastKey.size()) { + int maxSize = std::min(remainBytes, 130000) + 1; + int randomSize = deterministicRandom()->randomInt(lastKey.size(), maxSize); + + self->availableDiskSpace -= randomSize; + self->byteSampleApplySet(lastKey, randomSize); + remainBytes -= randomSize; + lastKey = randomKeyBetween(KeyRangeRef(lastKey, params.keys.end)); + } + } + + self->setShardStatus(params.keys, MockShardStatus::TRANSFERRED, true); + return Void(); + } }; bool MockStorageServer::allShardStatusEqual(KeyRangeRef range, MockShardStatus status) { @@ -133,7 +170,6 @@ void MockStorageServer::setShardStatus(KeyRangeRef range, MockShardStatus status if (ranges.begin().range().contains(range)) { CODE_PROBE(true, "Implicitly split single shard to 3 pieces"); threeWayShardSplitting(ranges.begin().range(), range, ranges.begin().cvalue().shardSize, restrictSize); - return; } if (ranges.begin().begin() < range.begin) { CODE_PROBE(true, "Implicitly split begin range to 2 pieces"); @@ -155,7 +191,8 @@ void MockStorageServer::setShardStatus(KeyRangeRef range, MockShardStatus status auto oldStatus = it.value().status; if (isStatusTransitionValid(oldStatus, status)) { it.value() = ShardInfo{ status, newSize }; - } else if (oldStatus == MockShardStatus::COMPLETED && status == MockShardStatus::INFLIGHT) { + } else if (oldStatus == MockShardStatus::COMPLETED && + (status == MockShardStatus::INFLIGHT || status == MockShardStatus::TRANSFERRED)) { CODE_PROBE(true, "Shard already on server"); } else { TraceEvent(SevError, "MockShardStatusTransitionError") @@ -176,6 +213,9 @@ void MockStorageServer::threeWayShardSplitting(KeyRangeRef outerRange, uint64_t outerRangeSize, bool restrictSize) { ASSERT(outerRange.contains(innerRange)); + if (outerRange == innerRange) { + return; + } Key left = outerRange.begin; // random generate 3 shard sizes, the caller guarantee that the min, max parameters are always valid. @@ -216,6 +256,7 @@ void MockStorageServer::removeShard(KeyRangeRef range) { auto ranges = serverKeys.containedRanges(range); ASSERT(ranges.begin().range() == range); serverKeys.rawErase(range); + metrics.notifyNotReadable(range); } uint64_t MockStorageServer::sumRangeSize(KeyRangeRef range) const { @@ -247,7 +288,9 @@ Future MockStorageServer::run() { ssi.initEndpoints(); ssi.startAcceptingRequests(); TraceEvent("MockStorageServerStart").detail("Address", ssi.address()); - return serveStorageMetricsRequests(this, ssi); + addActor(serveStorageMetricsRequests(this, ssi)); + addActor(MockStorageServerImpl::serveMockStorageServer(this)); + return actors.getResult(); } void MockStorageServer::set(KeyRef key, int64_t bytes, int64_t oldBytes) { @@ -258,14 +301,14 @@ void MockStorageServer::insert(KeyRef key, int64_t bytes) { notifyMvccStorageCost(key, bytes); } +// TODO: finish clear implementation. Currently the clear operations are not used. void MockStorageServer::clear(KeyRef key, int64_t bytes) { notifyMvccStorageCost(key, bytes); } void MockStorageServer::clearRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { notifyMvccStorageCost(range.begin, range.begin.size() + range.end.size()); - - auto totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); + // auto totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); } void MockStorageServer::get(KeyRef key, int64_t bytes) { @@ -312,6 +355,35 @@ void MockStorageServer::notifyMvccStorageCost(KeyRef key, int64_t size) { metrics.notify(key, s); } +void MockStorageServer::signalFetchKeys(KeyRangeRef range, int64_t rangeTotalBytes) { + fetchKeysRequests.send({ KeyRange(range), rangeTotalBytes }); +} + +Future MockStorageServer::fetchKeys(const MockStorageServer::FetchKeysParams& param) { + return MockStorageServerImpl::waitFetchKeysFinish(this, param); +} + +void MockStorageServer::byteSampleApplySet(KeyRef key, int64_t kvSize) { + // Update byteSample in memory and notify waiting metrics + ByteSampleInfo sampleInfo = isKeyValueInSample(key, kvSize); + auto& byteSample = metrics.byteSample.sample; + + int64_t delta = 0; + auto old = byteSample.find(key); + if (old != byteSample.end()) + delta = -byteSample.getMetric(old); + + if (sampleInfo.inSample) { + delta += sampleInfo.sampledSize; + byteSample.insert(key, sampleInfo.sampledSize); + } else if (old != byteSample.end()) { + byteSample.erase(old); + } + + if (delta) + metrics.notifyBytes(key, delta); +} + void MockGlobalState::initializeAsEmptyDatabaseMGS(const DatabaseConfiguration& conf, uint64_t defaultDiskSpace) { ASSERT(conf.storageTeamSize > 0); configuration = conf; diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index 260478c57e..16e93ce664 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -35,6 +35,7 @@ enum class MockShardStatus { EMPTY = 0, // data loss COMPLETED, INFLIGHT, + TRANSFERRED, // finish fetch Keys but not change the serverKey mapping. Only can be set by MSS itself. UNSET }; @@ -42,8 +43,11 @@ inline bool isStatusTransitionValid(MockShardStatus from, MockShardStatus to) { switch (from) { case MockShardStatus::UNSET: case MockShardStatus::EMPTY: - case MockShardStatus::INFLIGHT: return to == MockShardStatus::COMPLETED || to == MockShardStatus::INFLIGHT || to == MockShardStatus::EMPTY; + case MockShardStatus::INFLIGHT: + return to == MockShardStatus::TRANSFERRED || to == MockShardStatus::INFLIGHT || to == MockShardStatus::EMPTY; + case MockShardStatus::TRANSFERRED: + return to == MockShardStatus::COMPLETED; case MockShardStatus::COMPLETED: return to == MockShardStatus::EMPTY; default: @@ -52,8 +56,10 @@ inline bool isStatusTransitionValid(MockShardStatus from, MockShardStatus to) { return false; } +class MockStorageServerImpl; class MockStorageServer : public IStorageMetricsService { friend struct MockGlobalStateTester; + friend class MockStorageServerImpl; ActorCollection actors; @@ -66,6 +72,11 @@ public: bool operator!=(const ShardInfo& a) const { return !(a == *this); } }; + struct FetchKeysParams { + KeyRange keys; + int64_t totalRangeBytes; + }; + static constexpr uint64_t DEFAULT_DISK_SPACE = 1000LL * 1024 * 1024 * 1024; // control plane statistics associated with a real storage server @@ -150,7 +161,11 @@ public: // Read range, assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` void getRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + void signalFetchKeys(KeyRangeRef range, int64_t rangeTotalBytes); + protected: + PromiseStream fetchKeysRequests; + void threeWayShardSplitting(KeyRangeRef outerRange, KeyRangeRef innerRange, uint64_t outerRangeSize, @@ -162,6 +177,10 @@ protected: int64_t estimateRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); // Update the storage metrics as if we write the MVCC storage with a mutation of `size` bytes. void notifyMvccStorageCost(KeyRef key, int64_t size); + + Future fetchKeys(const FetchKeysParams&); + + void byteSampleApplySet(KeyRef key, int64_t kvSize); }; class MockGlobalStateImpl; diff --git a/fdbserver/include/fdbserver/StorageMetrics.actor.h b/fdbserver/include/fdbserver/StorageMetrics.actor.h index 34a2d27dd5..db7524d5f9 100644 --- a/fdbserver/include/fdbserver/StorageMetrics.actor.h +++ b/fdbserver/include/fdbserver/StorageMetrics.actor.h @@ -156,7 +156,10 @@ struct ByteSampleInfo { // Determines whether a key-value pair should be included in a byte sample // Also returns size information about the sample -ByteSampleInfo isKeyValueInSample(KeyValueRef keyValue); +ByteSampleInfo isKeyValueInSample(KeyRef key, int64_t totalKvSize); +inline ByteSampleInfo isKeyValueInSample(KeyValueRef keyValue) { + return isKeyValueInSample(keyValue.key, keyValue.key.size() + keyValue.value.size()); +} class IStorageMetricsService { public: diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 41cc8e701d..35101586f7 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -9931,11 +9931,11 @@ Future StorageServerDisk::restoreDurableState() { // Determines whether a key-value pair should be included in a byte sample // Also returns size information about the sample -ByteSampleInfo isKeyValueInSample(KeyValueRef keyValue) { +ByteSampleInfo isKeyValueInSample(const KeyRef key, int64_t totalKvSize) { + ASSERT(totalKvSize >= key.size()); ByteSampleInfo info; - const KeyRef key = keyValue.key; - info.size = key.size() + keyValue.value.size(); + info.size = totalKvSize; uint32_t a = 0; uint32_t b = 0; From cc61ea6a01005f9a78e90e942bb9ac74038342a8 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Thu, 27 Oct 2022 12:50:49 -0700 Subject: [PATCH 06/57] finish the clearrange ops --- fdbserver/MockGlobalState.actor.cpp | 67 +++++++++++++++++-- fdbserver/include/fdbserver/MockGlobalState.h | 7 +- 2 files changed, 66 insertions(+), 8 deletions(-) diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index cbb0a53bf4..6230fa2c99 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -255,7 +255,10 @@ void MockStorageServer::twoWayShardSplitting(KeyRangeRef range, void MockStorageServer::removeShard(KeyRangeRef range) { auto ranges = serverKeys.containedRanges(range); ASSERT(ranges.begin().range() == range); + auto rangeSize = sumRangeSize(range); + availableDiskSpace += rangeSize; serverKeys.rawErase(range); + byteSampleApplyClear(range); metrics.notifyNotReadable(range); } @@ -295,20 +298,26 @@ Future MockStorageServer::run() { void MockStorageServer::set(KeyRef key, int64_t bytes, int64_t oldBytes) { notifyMvccStorageCost(key, bytes); + byteSampleApplySet(key, bytes); + auto delta = oldBytes - bytes; + availableDiskSpace += delta; + serverKeys[key].shardSize += delta; } -void MockStorageServer::insert(KeyRef key, int64_t bytes) { - notifyMvccStorageCost(key, bytes); -} - -// TODO: finish clear implementation. Currently the clear operations are not used. void MockStorageServer::clear(KeyRef key, int64_t bytes) { notifyMvccStorageCost(key, bytes); + KeyRange sr = singleKeyRange(key); + byteSampleApplyClear(sr); + availableDiskSpace += bytes; + serverKeys[key].shardSize -= bytes; } void MockStorageServer::clearRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { notifyMvccStorageCost(range.begin, range.begin.size() + range.end.size()); - // auto totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); + byteSampleApplyClear(range); + auto totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); + availableDiskSpace += totalByteSize; + clearRangeTotalBytes(range, beginShardBytes, endShardBytes); } void MockStorageServer::get(KeyRef key, int64_t bytes) { @@ -347,6 +356,25 @@ int64_t MockStorageServer::estimateRangeTotalBytes(KeyRangeRef range, int64_t be return totalByteSize; } +void MockStorageServer::clearRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { + auto ranges = serverKeys.intersectingRanges(range); + + // use the beginShardBytes as partial size + if (ranges.begin().begin() < range.begin) { + auto delta = std::min(ranges.begin().value().shardSize, (uint64_t)beginShardBytes); + ranges.begin().value().shardSize -= delta; + ranges.pop_front(); + } + // use the endShardBytes as partial size + if (ranges.end().begin() < range.end) { + auto delta = std::min(ranges.end().value().shardSize, (uint64_t)endShardBytes); + ranges.end().value().shardSize -= delta; + } + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + it->value().shardSize = 0; + } +} + void MockStorageServer::notifyMvccStorageCost(KeyRef key, int64_t size) { // update write bandwidth and iops as mock the cost of writing mvcc storage StorageMetrics s; @@ -384,6 +412,33 @@ void MockStorageServer::byteSampleApplySet(KeyRef key, int64_t kvSize) { metrics.notifyBytes(key, delta); } +void MockStorageServer::byteSampleApplyClear(KeyRangeRef range) { + // Update byteSample in memory and (eventually) on disk via the mutationLog and notify waiting metrics + + auto& byteSample = metrics.byteSample.sample; + bool any = false; + + if (range.begin < allKeys.end) { + // NotifyBytes should not be called for keys past allKeys.end + KeyRangeRef searchRange = KeyRangeRef(range.begin, std::min(range.end, allKeys.end)); + + auto r = metrics.waitMetricsMap.intersectingRanges(searchRange); + for (auto shard = r.begin(); shard != r.end(); ++shard) { + KeyRangeRef intersectingRange = shard.range() & range; + int64_t bytes = byteSample.sumRange(intersectingRange.begin, intersectingRange.end); + metrics.notifyBytes(shard, -bytes); + any = any || bytes > 0; + } + } + + if (range.end > allKeys.end && byteSample.sumRange(std::max(allKeys.end, range.begin), range.end) > 0) + any = true; + + if (any) { + byteSample.eraseAsync(range.begin, range.end); + } +} + void MockGlobalState::initializeAsEmptyDatabaseMGS(const DatabaseConfiguration& conf, uint64_t defaultDiskSpace) { ASSERT(conf.storageTeamSize > 0); configuration = conf; diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index 16e93ce664..ed01f26ee0 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -148,8 +148,6 @@ public: // Set key with a new value, the total bytes change from oldBytes to bytes void set(KeyRef key, int64_t bytes, int64_t oldBytes); - // Insert key with a new value, the total bytes is `bytes` - void insert(KeyRef key, int64_t bytes); // Clear key and its value of which the size is bytes void clear(KeyRef key, int64_t bytes); // Clear range, assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` @@ -175,12 +173,17 @@ protected: // Assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` int64_t estimateRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + // Decrease the intersecting shard bytes as if delete the data + void clearRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + // Update the storage metrics as if we write the MVCC storage with a mutation of `size` bytes. void notifyMvccStorageCost(KeyRef key, int64_t size); Future fetchKeys(const FetchKeysParams&); void byteSampleApplySet(KeyRef key, int64_t kvSize); + + void byteSampleApplyClear(KeyRangeRef range); }; class MockGlobalStateImpl; From 0cbd1dfccaa268e18e18878314f51cc0287b0f5b Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Thu, 27 Oct 2022 15:46:48 -0700 Subject: [PATCH 07/57] add comment and MockDDTest base class file --- fdbserver/DDTxnProcessor.actor.cpp | 5 +-- fdbserver/MockGlobalState.actor.cpp | 20 ++++++++-- fdbserver/include/fdbserver/MockGlobalState.h | 22 ++++++---- fdbserver/workloads/MockDDTest.actor.cpp | 40 +++++++++++++++++++ 4 files changed, 73 insertions(+), 14 deletions(-) create mode 100644 fdbserver/workloads/MockDDTest.actor.cpp diff --git a/fdbserver/DDTxnProcessor.actor.cpp b/fdbserver/DDTxnProcessor.actor.cpp index 6a6b7d78dc..7b124bea06 100644 --- a/fdbserver/DDTxnProcessor.actor.cpp +++ b/fdbserver/DDTxnProcessor.actor.cpp @@ -680,7 +680,7 @@ Future DDTxnProcessor::rawFinishMovement(MoveKeysParams& params, } struct DDMockTxnProcessorImpl { - // return when all status become TRANSFERRED + // return when all status become FETCHED ACTOR static Future checkFetchingState(DDMockTxnProcessor* self, std::vector ids, KeyRangeRef range) { loop { wait(delayJittered(1.0)); @@ -688,8 +688,7 @@ struct DDMockTxnProcessorImpl { KeyRangeRef cloneRef; if (std::all_of(ids.begin(), ids.end(), [selfP, cloneRef](const UID& id) { auto& server = selfP->mgs->allServers.at(id); - return server.allShardStatusEqual(cloneRef, MockShardStatus::TRANSFERRED) || - server.allShardStatusEqual(cloneRef, MockShardStatus::COMPLETED); + return server.allShardStatusIn(cloneRef, { MockShardStatus::FETCHED, MockShardStatus::COMPLETED }); })) { break; } diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index 6230fa2c99..a862072d51 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -148,7 +148,7 @@ public: } } - self->setShardStatus(params.keys, MockShardStatus::TRANSFERRED, true); + self->setShardStatus(params.keys, MockShardStatus::FETCHED, true); return Void(); } }; @@ -164,6 +164,17 @@ bool MockStorageServer::allShardStatusEqual(KeyRangeRef range, MockShardStatus s return true; } +bool MockStorageServer::allShardStatusIn(KeyRangeRef range, const std::set& status) { + auto ranges = serverKeys.intersectingRanges(range); + ASSERT(!ranges.empty()); // at least the range is allKeys + + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + if (!status.count(it->cvalue().status)) + return false; + } + return true; +} + void MockStorageServer::setShardStatus(KeyRangeRef range, MockShardStatus status, bool restrictSize) { auto ranges = serverKeys.intersectingRanges(range); ASSERT(!ranges.empty()); @@ -192,7 +203,7 @@ void MockStorageServer::setShardStatus(KeyRangeRef range, MockShardStatus status if (isStatusTransitionValid(oldStatus, status)) { it.value() = ShardInfo{ status, newSize }; } else if (oldStatus == MockShardStatus::COMPLETED && - (status == MockShardStatus::INFLIGHT || status == MockShardStatus::TRANSFERRED)) { + (status == MockShardStatus::INFLIGHT || status == MockShardStatus::FETCHED)) { CODE_PROBE(true, "Shard already on server"); } else { TraceEvent(SevError, "MockShardStatusTransitionError") @@ -413,7 +424,7 @@ void MockStorageServer::byteSampleApplySet(KeyRef key, int64_t kvSize) { } void MockStorageServer::byteSampleApplyClear(KeyRangeRef range) { - // Update byteSample in memory and (eventually) on disk via the mutationLog and notify waiting metrics + // Update byteSample and notify waiting metrics auto& byteSample = metrics.byteSample.sample; bool any = false; @@ -483,7 +494,8 @@ bool MockGlobalState::serverIsDestForShard(const UID& serverId, KeyRangeRef shar // check serverKeys auto& mss = allServers.at(serverId); - if (!mss.allShardStatusEqual(shard, MockShardStatus::INFLIGHT)) { + if (!mss.allShardStatusIn(shard, + { MockShardStatus::INFLIGHT, MockShardStatus::COMPLETED, MockShardStatus::FETCHED })) { return false; } diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index ed01f26ee0..a86d8aeb44 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -35,7 +35,7 @@ enum class MockShardStatus { EMPTY = 0, // data loss COMPLETED, INFLIGHT, - TRANSFERRED, // finish fetch Keys but not change the serverKey mapping. Only can be set by MSS itself. + FETCHED, // finish fetch but not change the serverKey mapping. Only can be set by MSS itself. UNSET }; @@ -45,8 +45,8 @@ inline bool isStatusTransitionValid(MockShardStatus from, MockShardStatus to) { case MockShardStatus::EMPTY: return to == MockShardStatus::COMPLETED || to == MockShardStatus::INFLIGHT || to == MockShardStatus::EMPTY; case MockShardStatus::INFLIGHT: - return to == MockShardStatus::TRANSFERRED || to == MockShardStatus::INFLIGHT || to == MockShardStatus::EMPTY; - case MockShardStatus::TRANSFERRED: + return to == MockShardStatus::FETCHED || to == MockShardStatus::INFLIGHT || to == MockShardStatus::EMPTY; + case MockShardStatus::FETCHED: return to == MockShardStatus::COMPLETED; case MockShardStatus::COMPLETED: return to == MockShardStatus::EMPTY; @@ -80,7 +80,7 @@ public: static constexpr uint64_t DEFAULT_DISK_SPACE = 1000LL * 1024 * 1024 * 1024; // control plane statistics associated with a real storage server - uint64_t usedDiskSpace = 0, availableDiskSpace = DEFAULT_DISK_SPACE; + uint64_t totalDiskSpace = DEFAULT_DISK_SPACE, availableDiskSpace = DEFAULT_DISK_SPACE; // In-memory counterpart of the `serverKeys` in system keyspace // the value ShardStatus is [InFlight, Completed, Empty] and metrics uint64_t is the shard size, the caveat is the @@ -96,7 +96,8 @@ public: MockStorageServer() = default; MockStorageServer(StorageServerInterface ssi, uint64_t availableDiskSpace, uint64_t usedDiskSpace = 0) - : usedDiskSpace(usedDiskSpace), availableDiskSpace(availableDiskSpace), ssi(ssi), id(ssi.id()) {} + : totalDiskSpace(usedDiskSpace + availableDiskSpace), availableDiskSpace(availableDiskSpace), ssi(ssi), + id(ssi.id()) {} MockStorageServer(const UID& id, uint64_t availableDiskSpace, uint64_t usedDiskSpace = 0) : MockStorageServer(StorageServerInterface(id), availableDiskSpace, usedDiskSpace) {} @@ -104,6 +105,7 @@ public: decltype(serverKeys)::Ranges getAllRanges() { return serverKeys.ranges(); } bool allShardStatusEqual(KeyRangeRef range, MockShardStatus status); + bool allShardStatusIn(KeyRangeRef range, const std::set& status); // change the status of range. This function may result in split to make the shard boundary align with range.begin // and range.end. In this case, if restrictSize==true, the sum of the split shard size is strictly equal to the old @@ -113,6 +115,7 @@ public: // this function removed an aligned range from server void removeShard(KeyRangeRef range); + // intersecting range size uint64_t sumRangeSize(KeyRangeRef range) const; void addActor(Future future) override; @@ -144,7 +147,7 @@ public: Future run(); - // data operation APIs - change the metrics + // data operation APIs - change the metrics sample, disk space and shard size // Set key with a new value, the total bytes change from oldBytes to bytes void set(KeyRef key, int64_t bytes, int64_t oldBytes); @@ -159,6 +162,7 @@ public: // Read range, assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` void getRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + // trigger the asynchronous fetch keys operation void signalFetchKeys(KeyRangeRef range, int64_t rangeTotalBytes); protected: @@ -179,10 +183,14 @@ protected: // Update the storage metrics as if we write the MVCC storage with a mutation of `size` bytes. void notifyMvccStorageCost(KeyRef key, int64_t size); + // Randomly generate keys and kv size between the fetch range, updating the byte sample. + // Once the fetchKeys return, the shard status will become FETCHED. Future fetchKeys(const FetchKeysParams&); + // Update byte sample as if set a key value pair of which the size is kvSize void byteSampleApplySet(KeyRef key, int64_t kvSize); + // Update byte sample as if clear a whole range void byteSampleApplyClear(KeyRangeRef range); }; @@ -223,7 +231,7 @@ public: * Shard is in-flight. * * In mgs.shardMapping,the destination teams is non-empty for a given shard; * * For each MSS belonging to the source teams, mss.serverKeys[shard] = Completed - * * For each MSS belonging to the destination teams, mss.serverKeys[shard] = InFlight|Completed + * * For each MSS belonging to the destination teams, mss.serverKeys[shard] = InFlight | Fetched | Completed * Shard is lost. * * In mgs.shardMapping, the destination teams is empty for the given shard; * * For each MSS belonging to the source teams, mss.serverKeys[shard] = Empty diff --git a/fdbserver/workloads/MockDDTest.actor.cpp b/fdbserver/workloads/MockDDTest.actor.cpp new file mode 100644 index 0000000000..209df66a5e --- /dev/null +++ b/fdbserver/workloads/MockDDTest.actor.cpp @@ -0,0 +1,40 @@ +/* + * MockDDTest.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbserver/workloads/workloads.actor.h" +#include "fdbclient/FDBOptions.g.h" +#include "fdbclient/ManagementAPI.actor.h" +#include "fdbserver/DDSharedContext.h" +#include "fdbserver/DDTxnProcessor.h" +#include "fdbserver/MoveKeys.actor.h" +#include "fdbclient/StorageServerInterface.h" +#include "fdbserver/workloads/workloads.actor.h" +#include "flow/actorcompiler.h" // This must be the last #include. + +struct MockDDTestWorkload { + bool enabled; + double testDuration; + double meanDelay = 0.05; + double maxKeyspace = 0.1; + DDSharedContext ddContext; + + std::shared_ptr mgs; + std::shared_ptr mock; +}; \ No newline at end of file From 11b2c035c0acb19f49c529e712a9d7f6a1bcfb62 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Fri, 28 Oct 2022 00:21:54 -0700 Subject: [PATCH 08/57] add unit test for randomKeyBetween --- fdbclient/FDBTypes.cpp | 25 +++++++++++++++++++ .../fdbclient/StorageServerInterface.h | 4 +-- .../include/fdbserver/StorageMetrics.actor.h | 6 ++--- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/fdbclient/FDBTypes.cpp b/fdbclient/FDBTypes.cpp index 056fc3430d..c104aea5e8 100644 --- a/fdbclient/FDBTypes.cpp +++ b/fdbclient/FDBTypes.cpp @@ -96,6 +96,31 @@ Key randomKeyBetween(const KeyRangeRef& keys) { return end; } +TEST_CASE("/KeyRangeUtil/randomKeyBetween") { + Key begin = "qwert"_sr; + Key end = "qwertyu"_sr; + Key res; + for(int i = 0; i < 10; ++ i) { + res = randomKeyBetween(KeyRangeRef(begin, end)); + ASSERT(res > begin); + ASSERT(res < end); + } + + begin = "q"_sr; + end = "q\x00"_sr; + res = randomKeyBetween(KeyRangeRef(begin, end)); + ASSERT(res == end); + + begin = "aaaaaaa"_sr; + end = "b"_sr; + for(int i = 0; i < 10; ++ i) { + res = randomKeyBetween(KeyRangeRef(begin, end)); + ASSERT(res > begin); + ASSERT(res < end); + } + return Void(); +} + void KeySelectorRef::setKey(KeyRef const& key) { // There are no keys in the database with size greater than the max key size, so if this key selector has a key // which is large, then we can translate it to an equivalent key selector with a smaller key diff --git a/fdbclient/include/fdbclient/StorageServerInterface.h b/fdbclient/include/fdbclient/StorageServerInterface.h index 2358312a4a..cdc79c05c5 100644 --- a/fdbclient/include/fdbclient/StorageServerInterface.h +++ b/fdbclient/include/fdbclient/StorageServerInterface.h @@ -634,9 +634,9 @@ struct GetShardStateRequest { struct StorageMetrics { constexpr static FileIdentifier file_identifier = 13622226; int64_t bytes = 0; // total storage - // FIXME: currently, neither of bytesPerKSecond or iosPerKSecond are actually used in DataDistribution calculations. - // This may change in the future, but this comment is left here to avoid any confusion for the time being. int64_t bytesPerKSecond = 0; // network bandwidth (average over 10s) + + // FIXME: currently, iosPerKSecond is not used in DataDistribution calculations. int64_t iosPerKSecond = 0; int64_t bytesReadPerKSecond = 0; diff --git a/fdbserver/include/fdbserver/StorageMetrics.actor.h b/fdbserver/include/fdbserver/StorageMetrics.actor.h index db7524d5f9..553dcaa4b9 100644 --- a/fdbserver/include/fdbserver/StorageMetrics.actor.h +++ b/fdbserver/include/fdbserver/StorageMetrics.actor.h @@ -77,9 +77,9 @@ private: struct StorageServerMetrics { KeyRangeMap>> waitMetricsMap; StorageMetricSample byteSample; - TransientStorageMetricSample iopsSample, - bandwidthSample; // FIXME: iops and bandwidth calculations are not effectively tested, since they aren't - // currently used by data distribution + + // FIXME: iops is not effectively tested, and is not used by data distribution + TransientStorageMetricSample iopsSample, bandwidthSample; TransientStorageMetricSample bytesReadSample; StorageServerMetrics() From 55a3db82b540e1c1f7630557a47bcb6674660aea Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Fri, 28 Oct 2022 11:13:21 -0700 Subject: [PATCH 09/57] update the name, comment and discription of write byte sampling; update the calculation of write bandwidth metrics --- fdbclient/ServerKnobs.cpp | 4 +- fdbclient/include/fdbclient/ServerKnobs.h | 2 +- .../fdbclient/StorageServerInterface.h | 20 +++++----- fdbserver/BlobManager.actor.cpp | 10 ++--- fdbserver/BlobWorker.actor.cpp | 2 +- fdbserver/DDShardTracker.actor.cpp | 40 +++++++++---------- fdbserver/DataDistribution.actor.cpp | 6 +-- fdbserver/MockGlobalState.actor.cpp | 12 +++--- fdbserver/StorageMetrics.actor.cpp | 30 +++++++------- fdbserver/include/fdbserver/MockGlobalState.h | 4 +- .../include/fdbserver/StorageMetrics.actor.h | 9 +---- fdbserver/storageserver.actor.cpp | 18 +++++---- 12 files changed, 78 insertions(+), 79 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index d729384ebf..c153fb3cc3 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -223,7 +223,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi shards. The bandwidth sample maintained by the storage server needs to be accurate enough to reliably measure this minimum bandwidth. See - BANDWIDTH_UNITS_PER_SAMPLE. If this number is too low, the storage server needs to spend more memory and time on sampling. + BYTES_WRITE_UNITS_PER_SAMPLE. If this number is too low, the storage server needs to spend more memory and time on sampling. */ init( SHARD_SPLIT_BYTES_PER_KSEC, 250 * 1000 * 1000 ); if( buggifySmallBandwidthSplit ) SHARD_SPLIT_BYTES_PER_KSEC = 50 * 1000 * 1000; @@ -743,7 +743,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS, 1000.0 / STORAGE_METRICS_AVERAGE_INTERVAL ); // milliHz! init( SPLIT_JITTER_AMOUNT, 0.05 ); if( randomize && BUGGIFY ) SPLIT_JITTER_AMOUNT = 0.2; init( IOPS_UNITS_PER_SAMPLE, 10000 * 1000 / STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS / 100 ); - init( BANDWIDTH_UNITS_PER_SAMPLE, SHARD_MIN_BYTES_PER_KSEC / STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS / 25 ); + init( BYTES_WRITE_UNITS_PER_SAMPLE, SHARD_MIN_BYTES_PER_KSEC / STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS / 25 ); init( BYTES_READ_UNITS_PER_SAMPLE, 100000 ); // 100K bytes init( READ_HOT_SUB_RANGE_CHUNK_SIZE, 10000000); // 10MB init( EMPTY_READ_PENALTY, 20 ); // 20 bytes diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index d4ba08d518..f782d63e69 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -697,7 +697,7 @@ public: double STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; double SPLIT_JITTER_AMOUNT; int64_t IOPS_UNITS_PER_SAMPLE; - int64_t BANDWIDTH_UNITS_PER_SAMPLE; + int64_t BYTES_WRITE_UNITS_PER_SAMPLE; int64_t BYTES_READ_UNITS_PER_SAMPLE; int64_t READ_HOT_SUB_RANGE_CHUNK_SIZE; int64_t EMPTY_READ_PENALTY; diff --git a/fdbclient/include/fdbclient/StorageServerInterface.h b/fdbclient/include/fdbclient/StorageServerInterface.h index cdc79c05c5..2a2442c94b 100644 --- a/fdbclient/include/fdbclient/StorageServerInterface.h +++ b/fdbclient/include/fdbclient/StorageServerInterface.h @@ -634,7 +634,7 @@ struct GetShardStateRequest { struct StorageMetrics { constexpr static FileIdentifier file_identifier = 13622226; int64_t bytes = 0; // total storage - int64_t bytesPerKSecond = 0; // network bandwidth (average over 10s) + int64_t writeBytesPerKSecond = 0; // network bandwidth (average over 10s) == write bandwidth through any IO devices // FIXME: currently, iosPerKSecond is not used in DataDistribution calculations. int64_t iosPerKSecond = 0; @@ -643,33 +643,33 @@ struct StorageMetrics { static const int64_t infinity = 1LL << 60; bool allLessOrEqual(const StorageMetrics& rhs) const { - return bytes <= rhs.bytes && bytesPerKSecond <= rhs.bytesPerKSecond && iosPerKSecond <= rhs.iosPerKSecond && + return bytes <= rhs.bytes && writeBytesPerKSecond <= rhs.writeBytesPerKSecond && iosPerKSecond <= rhs.iosPerKSecond && bytesReadPerKSecond <= rhs.bytesReadPerKSecond; } void operator+=(const StorageMetrics& rhs) { bytes += rhs.bytes; - bytesPerKSecond += rhs.bytesPerKSecond; + writeBytesPerKSecond += rhs.writeBytesPerKSecond; iosPerKSecond += rhs.iosPerKSecond; bytesReadPerKSecond += rhs.bytesReadPerKSecond; } void operator-=(const StorageMetrics& rhs) { bytes -= rhs.bytes; - bytesPerKSecond -= rhs.bytesPerKSecond; + writeBytesPerKSecond -= rhs.writeBytesPerKSecond; iosPerKSecond -= rhs.iosPerKSecond; bytesReadPerKSecond -= rhs.bytesReadPerKSecond; } template void operator*=(F f) { bytes *= f; - bytesPerKSecond *= f; + writeBytesPerKSecond *= f; iosPerKSecond *= f; bytesReadPerKSecond *= f; } - bool allZero() const { return !bytes && !bytesPerKSecond && !iosPerKSecond && !bytesReadPerKSecond; } + bool allZero() const { return !bytes && !writeBytesPerKSecond && !iosPerKSecond && !bytesReadPerKSecond; } template void serialize(Ar& ar) { - serializer(ar, bytes, bytesPerKSecond, iosPerKSecond, bytesReadPerKSecond); + serializer(ar, bytes, writeBytesPerKSecond, iosPerKSecond, bytesReadPerKSecond); } void negate() { operator*=(-1.0); } @@ -697,14 +697,14 @@ struct StorageMetrics { } bool operator==(StorageMetrics const& rhs) const { - return bytes == rhs.bytes && bytesPerKSecond == rhs.bytesPerKSecond && iosPerKSecond == rhs.iosPerKSecond && + return bytes == rhs.bytes && writeBytesPerKSecond == rhs.writeBytesPerKSecond && iosPerKSecond == rhs.iosPerKSecond && bytesReadPerKSecond == rhs.bytesReadPerKSecond; } std::string toString() const { - return format("Bytes: %lld, BPerKSec: %lld, iosPerKSec: %lld, BReadPerKSec: %lld", + return format("Bytes: %lld, BWritePerKSec: %lld, iosPerKSec: %lld, BReadPerKSec: %lld", bytes, - bytesPerKSecond, + writeBytesPerKSecond, iosPerKSecond, bytesReadPerKSecond); } diff --git a/fdbserver/BlobManager.actor.cpp b/fdbserver/BlobManager.actor.cpp index 84b482cfcf..50c68f328e 100644 --- a/fdbserver/BlobManager.actor.cpp +++ b/fdbserver/BlobManager.actor.cpp @@ -636,11 +636,11 @@ ACTOR Future splitRange(Reference bmDat // only split on bytes and write rate state StorageMetrics splitMetrics; splitMetrics.bytes = SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES; - splitMetrics.bytesPerKSecond = SERVER_KNOBS->SHARD_SPLIT_BYTES_PER_KSEC; + splitMetrics.writeBytesPerKSecond = SERVER_KNOBS->SHARD_SPLIT_BYTES_PER_KSEC; if (writeHot) { - splitMetrics.bytesPerKSecond = std::min(splitMetrics.bytesPerKSecond, estimated.bytesPerKSecond / 2); - splitMetrics.bytesPerKSecond = - std::max(splitMetrics.bytesPerKSecond, SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC); + splitMetrics.writeBytesPerKSecond = std::min(splitMetrics.writeBytesPerKSecond, estimated.writeBytesPerKSecond / 2); + splitMetrics.writeBytesPerKSecond = + std::max(splitMetrics.writeBytesPerKSecond, SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC); } splitMetrics.iosPerKSecond = splitMetrics.infinity; splitMetrics.bytesReadPerKSecond = splitMetrics.infinity; @@ -2616,7 +2616,7 @@ ACTOR Future attemptMerges(Reference bmData, wait(bmData->db->getStorageMetrics(std::get<1>(candidates[i]), CLIENT_KNOBS->TOO_MANY)); if (metrics.bytes >= SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES || - metrics.bytesPerKSecond >= SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) { + metrics.writeBytesPerKSecond >= SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) { // This granule cannot be merged with any neighbors. // If current candidates up to here can be merged, merge them and skip over this one attemptStartMerge(bmData, currentCandidates); diff --git a/fdbserver/BlobWorker.actor.cpp b/fdbserver/BlobWorker.actor.cpp index fc51c8fae0..f5cb4e2c13 100644 --- a/fdbserver/BlobWorker.actor.cpp +++ b/fdbserver/BlobWorker.actor.cpp @@ -1595,7 +1595,7 @@ ACTOR Future granuleCheckMergeCandidate(Reference bwData, // FIXME: maybe separate knob and/or value for write rate? if (currentMetrics.bytes >= SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES / 2 || - currentMetrics.bytesPerKSecond >= SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) { + currentMetrics.writeBytesPerKSecond >= SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) { wait(delayJittered(SERVER_KNOBS->BG_MERGE_CANDIDATE_THRESHOLD_SECONDS / 2.0)); CODE_PROBE(true, "wait and check later to see if granule got smaller or colder"); continue; diff --git a/fdbserver/DDShardTracker.actor.cpp b/fdbserver/DDShardTracker.actor.cpp index 7964915217..b436be9965 100644 --- a/fdbserver/DDShardTracker.actor.cpp +++ b/fdbserver/DDShardTracker.actor.cpp @@ -41,9 +41,9 @@ enum BandwidthStatus { BandwidthStatusLow, BandwidthStatusNormal, BandwidthStatu enum ReadBandwidthStatus { ReadBandwidthStatusNormal, ReadBandwidthStatusHigh }; BandwidthStatus getBandwidthStatus(StorageMetrics const& metrics) { - if (metrics.bytesPerKSecond > SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC) + if (metrics.writeBytesPerKSecond > SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC) return BandwidthStatusHigh; - else if (metrics.bytesPerKSecond < SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) + else if (metrics.writeBytesPerKSecond < SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) return BandwidthStatusLow; return BandwidthStatusNormal; @@ -176,7 +176,7 @@ ShardSizeBounds getShardSizeBounds(KeyRangeRef shard, int64_t maxShardSize) { bounds.max.bytes = maxShardSize; } - bounds.max.bytesPerKSecond = bounds.max.infinity; + bounds.max.writeBytesPerKSecond = bounds.max.infinity; bounds.max.iosPerKSecond = bounds.max.infinity; bounds.max.bytesReadPerKSecond = bounds.max.infinity; @@ -187,14 +187,14 @@ ShardSizeBounds getShardSizeBounds(KeyRangeRef shard, int64_t maxShardSize) { bounds.min.bytes = maxShardSize / SERVER_KNOBS->SHARD_BYTES_RATIO; } - bounds.min.bytesPerKSecond = 0; + bounds.min.writeBytesPerKSecond = 0; bounds.min.iosPerKSecond = 0; bounds.min.bytesReadPerKSecond = 0; // The permitted error is 1/3 of the general-case minimum bytes (even in the special case where this is the last // shard) bounds.permittedError.bytes = bounds.max.bytes / SERVER_KNOBS->SHARD_BYTES_RATIO / 3; - bounds.permittedError.bytesPerKSecond = bounds.permittedError.infinity; + bounds.permittedError.writeBytesPerKSecond = bounds.permittedError.infinity; bounds.permittedError.iosPerKSecond = bounds.permittedError.infinity; bounds.permittedError.bytesReadPerKSecond = bounds.permittedError.infinity; @@ -222,18 +222,18 @@ ShardSizeBounds calculateShardSizeBounds(const KeyRange& keys, std::max(int64_t(bytes - (SERVER_KNOBS->MIN_SHARD_BYTES * 0.1)), (int64_t)0)); bounds.permittedError.bytes = bytes * 0.1; if (bandwidthStatus == BandwidthStatusNormal) { // Not high or low - bounds.max.bytesPerKSecond = SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC; - bounds.min.bytesPerKSecond = SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC; - bounds.permittedError.bytesPerKSecond = bounds.min.bytesPerKSecond / 4; + bounds.max.writeBytesPerKSecond = SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC; + bounds.min.writeBytesPerKSecond = SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC; + bounds.permittedError.writeBytesPerKSecond = bounds.min.writeBytesPerKSecond / 4; } else if (bandwidthStatus == BandwidthStatusHigh) { // > 10MB/sec for 100MB shard, proportionally lower // for smaller shard, > 200KB/sec no matter what - bounds.max.bytesPerKSecond = bounds.max.infinity; - bounds.min.bytesPerKSecond = SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC; - bounds.permittedError.bytesPerKSecond = bounds.min.bytesPerKSecond / 4; + bounds.max.writeBytesPerKSecond = bounds.max.infinity; + bounds.min.writeBytesPerKSecond = SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC; + bounds.permittedError.writeBytesPerKSecond = bounds.min.writeBytesPerKSecond / 4; } else if (bandwidthStatus == BandwidthStatusLow) { // < 10KB/sec - bounds.max.bytesPerKSecond = SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC; - bounds.min.bytesPerKSecond = 0; - bounds.permittedError.bytesPerKSecond = bounds.max.bytesPerKSecond / 4; + bounds.max.writeBytesPerKSecond = SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC; + bounds.min.writeBytesPerKSecond = 0; + bounds.permittedError.writeBytesPerKSecond = bounds.max.writeBytesPerKSecond / 4; } else { ASSERT(false); } @@ -309,12 +309,12 @@ ACTOR Future trackShardMetrics(DataDistributionTracker::SafeAccessor self, /*TraceEvent("ShardSizeUpdate") .detail("Keys", keys) .detail("UpdatedSize", metrics.metrics.bytes) - .detail("Bandwidth", metrics.metrics.bytesPerKSecond) + .detail("Bandwidth", metrics.metrics.writeBytesPerKSecond) .detail("BandwidthStatus", getBandwidthStatus(metrics)) .detail("BytesLower", bounds.min.bytes) .detail("BytesUpper", bounds.max.bytes) - .detail("BandwidthLower", bounds.min.bytesPerKSecond) - .detail("BandwidthUpper", bounds.max.bytesPerKSecond) + .detail("BandwidthLower", bounds.min.writeBytesPerKSecond) + .detail("BandwidthUpper", bounds.max.writeBytesPerKSecond) .detail("ShardSizePresent", shardSize->get().present()) .detail("OldShardSize", shardSize->get().present() ? shardSize->get().get().metrics.bytes : 0) .detail("TrackerID", trackerID);*/ @@ -882,7 +882,7 @@ ACTOR Future shardSplitter(DataDistributionTracker* self, StorageMetrics splitMetrics; splitMetrics.bytes = shardBounds.max.bytes / 2; - splitMetrics.bytesPerKSecond = + splitMetrics.writeBytesPerKSecond = keys.begin >= keyServersKeys.begin ? splitMetrics.infinity : SERVER_KNOBS->SHARD_SPLIT_BYTES_PER_KSEC; splitMetrics.iosPerKSecond = splitMetrics.infinity; splitMetrics.bytesReadPerKSecond = splitMetrics.infinity; // Don't split by readBandwidth @@ -905,7 +905,7 @@ ACTOR Future shardSplitter(DataDistributionTracker* self, bandwidthStatus == BandwidthStatusHigh ? "High" : bandwidthStatus == BandwidthStatusNormal ? "Normal" : "Low") - .detail("BytesPerKSec", metrics.bytesPerKSecond) + .detail("BytesPerKSec", metrics.writeBytesPerKSecond) .detail("NumShards", numShards); if (numShards > 1) { @@ -1206,7 +1206,7 @@ ACTOR Future shardTracker(DataDistributionTracker::SafeAccessor self, .detail("TrackerID", trackerID) .detail("MaxBytes", self()->maxShardSize->get().get()) .detail("ShardSize", shardSize->get().get().bytes) - .detail("BytesPerKSec", shardSize->get().get().bytesPerKSecond);*/ + .detail("BytesPerKSec", shardSize->get().get().writeBytesPerKSecond);*/ try { loop { diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 1ce76e642c..e71cc51ef5 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -56,12 +56,12 @@ ShardSizeBounds ShardSizeBounds::shardSizeBoundsBeforeTrack() { return ShardSizeBounds{ .max = StorageMetrics{ .bytes = -1, - .bytesPerKSecond = StorageMetrics::infinity, + .writeBytesPerKSecond = StorageMetrics::infinity, .iosPerKSecond = StorageMetrics::infinity, .bytesReadPerKSecond = StorageMetrics::infinity }, - .min = StorageMetrics{ .bytes = -1, .bytesPerKSecond = 0, .iosPerKSecond = 0, .bytesReadPerKSecond = 0 }, + .min = StorageMetrics{ .bytes = -1, .writeBytesPerKSecond = 0, .iosPerKSecond = 0, .bytesReadPerKSecond = 0 }, .permittedError = StorageMetrics{ .bytes = -1, - .bytesPerKSecond = StorageMetrics::infinity, + .writeBytesPerKSecond = StorageMetrics::infinity, .iosPerKSecond = StorageMetrics::infinity, .bytesReadPerKSecond = StorageMetrics::infinity } }; diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index a862072d51..24d32dfb34 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -308,7 +308,7 @@ Future MockStorageServer::run() { } void MockStorageServer::set(KeyRef key, int64_t bytes, int64_t oldBytes) { - notifyMvccStorageCost(key, bytes); + notifyWriteMetrics(key, bytes); byteSampleApplySet(key, bytes); auto delta = oldBytes - bytes; availableDiskSpace += delta; @@ -316,7 +316,7 @@ void MockStorageServer::set(KeyRef key, int64_t bytes, int64_t oldBytes) { } void MockStorageServer::clear(KeyRef key, int64_t bytes) { - notifyMvccStorageCost(key, bytes); + notifyWriteMetrics(key, bytes); KeyRange sr = singleKeyRange(key); byteSampleApplyClear(sr); availableDiskSpace += bytes; @@ -324,7 +324,7 @@ void MockStorageServer::clear(KeyRef key, int64_t bytes) { } void MockStorageServer::clearRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { - notifyMvccStorageCost(range.begin, range.begin.size() + range.end.size()); + notifyWriteMetrics(range.begin, range.begin.size() + range.end.size()); byteSampleApplyClear(range); auto totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); availableDiskSpace += totalByteSize; @@ -386,10 +386,10 @@ void MockStorageServer::clearRangeTotalBytes(KeyRangeRef range, int64_t beginSha } } -void MockStorageServer::notifyMvccStorageCost(KeyRef key, int64_t size) { - // update write bandwidth and iops as mock the cost of writing mvcc storage +void MockStorageServer::notifyWriteMetrics(KeyRef key, int64_t size) { + // update write bandwidth and iops as mock the cost of writing a mutation StorageMetrics s; - s.bytesPerKSecond = mvccStorageBytes(size) / 2; + s.writeBytesPerKSecond = size + MutationRef::OVERHEAD_BYTES; s.iosPerKSecond = 1; metrics.notify(key, s); } diff --git a/fdbserver/StorageMetrics.actor.cpp b/fdbserver/StorageMetrics.actor.cpp index ada2301483..89305a968f 100644 --- a/fdbserver/StorageMetrics.actor.cpp +++ b/fdbserver/StorageMetrics.actor.cpp @@ -75,8 +75,8 @@ KeyRef StorageMetricSample::splitEstimate(KeyRangeRef range, int64_t offset, boo StorageMetrics StorageServerMetrics::getMetrics(KeyRangeRef const& keys) const { StorageMetrics result; result.bytes = byteSample.getEstimate(keys); - result.bytesPerKSecond = - bandwidthSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; + result.writeBytesPerKSecond = + bytesWriteSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; result.iosPerKSecond = iopsSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; result.bytesReadPerKSecond = bytesReadSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; @@ -88,7 +88,7 @@ StorageMetrics StorageServerMetrics::getMetrics(KeyRangeRef const& keys) const { void StorageServerMetrics::notify(KeyRef key, StorageMetrics& metrics) { ASSERT(metrics.bytes == 0); // ShardNotifyMetrics if (g_network->isSimulated()) { - CODE_PROBE(metrics.bytesPerKSecond != 0, "ShardNotifyMetrics bytes"); + CODE_PROBE(metrics.writeBytesPerKSecond != 0, "ShardNotifyMetrics bytes"); CODE_PROBE(metrics.iosPerKSecond != 0, "ShardNotifyMetrics ios"); CODE_PROBE(metrics.bytesReadPerKSecond != 0, "ShardNotifyMetrics bytesRead", probe::decoration::rare); } @@ -97,8 +97,8 @@ void StorageServerMetrics::notify(KeyRef key, StorageMetrics& metrics) { StorageMetrics notifyMetrics; - if (metrics.bytesPerKSecond) - notifyMetrics.bytesPerKSecond = bandwidthSample.addAndExpire(key, metrics.bytesPerKSecond, expire) * + if (metrics.writeBytesPerKSecond) + notifyMetrics.writeBytesPerKSecond = bytesWriteSample.addAndExpire(key, metrics.writeBytesPerKSecond, expire) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; if (metrics.iosPerKSecond) notifyMetrics.iosPerKSecond = iopsSample.addAndExpire(key, metrics.iosPerKSecond, expire) * @@ -177,8 +177,8 @@ void StorageServerMetrics::notifyNotReadable(KeyRangeRef keys) { void StorageServerMetrics::poll() { { StorageMetrics m; - m.bytesPerKSecond = SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; - bandwidthSample.poll(waitMetricsMap, m); + m.writeBytesPerKSecond = SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; + bytesWriteSample.poll(waitMetricsMap, m); } { StorageMetrics m; @@ -250,7 +250,7 @@ void StorageServerMetrics::splitMetrics(SplitMetricsRequest req) const { if (remaining.bytes < 2 * minSplitBytes) break; KeyRef key = req.keys.end; - bool hasUsed = used.bytes != 0 || used.bytesPerKSecond != 0 || used.iosPerKSecond != 0; + bool hasUsed = used.bytes != 0 || used.writeBytesPerKSecond != 0 || used.iosPerKSecond != 0; key = getSplitKey(remaining.bytes, estimated.bytes, req.limits.bytes, @@ -276,13 +276,13 @@ void StorageServerMetrics::splitMetrics(SplitMetricsRequest req) const { lastKey, key, hasUsed); - key = getSplitKey(remaining.bytesPerKSecond, - estimated.bytesPerKSecond, - req.limits.bytesPerKSecond, - used.bytesPerKSecond, + key = getSplitKey(remaining.writeBytesPerKSecond, + estimated.writeBytesPerKSecond, + req.limits.writeBytesPerKSecond, + used.writeBytesPerKSecond, req.limits.infinity, req.isLastShard, - bandwidthSample, + bytesWriteSample, SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS, lastKey, key, @@ -328,12 +328,12 @@ void StorageServerMetrics::getStorageMetrics(GetStorageMetricsRequest req, rep.available.bytes = sb.available; rep.available.iosPerKSecond = 10e6; - rep.available.bytesPerKSecond = 100e9; + rep.available.writeBytesPerKSecond = 100e9; rep.available.bytesReadPerKSecond = 100e9; rep.capacity.bytes = sb.total; rep.capacity.iosPerKSecond = 10e6; - rep.capacity.bytesPerKSecond = 100e9; + rep.capacity.writeBytesPerKSecond = 100e9; rep.capacity.bytesReadPerKSecond = 100e9; rep.bytesInputRate = bytesInputRate; diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index a86d8aeb44..cf831dccb2 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -180,8 +180,8 @@ protected: // Decrease the intersecting shard bytes as if delete the data void clearRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); - // Update the storage metrics as if we write the MVCC storage with a mutation of `size` bytes. - void notifyMvccStorageCost(KeyRef key, int64_t size); + // Update the storage metrics as if we write a k-v pair of `size` bytes. + void notifyWriteMetrics(KeyRef key, int64_t size); // Randomly generate keys and kv size between the fetch range, updating the byte sample. // Once the fetchKeys return, the shard status will become FETCHED. diff --git a/fdbserver/include/fdbserver/StorageMetrics.actor.h b/fdbserver/include/fdbserver/StorageMetrics.actor.h index 553dcaa4b9..b0985ec52a 100644 --- a/fdbserver/include/fdbserver/StorageMetrics.actor.h +++ b/fdbserver/include/fdbserver/StorageMetrics.actor.h @@ -79,12 +79,12 @@ struct StorageServerMetrics { StorageMetricSample byteSample; // FIXME: iops is not effectively tested, and is not used by data distribution - TransientStorageMetricSample iopsSample, bandwidthSample; + TransientStorageMetricSample iopsSample, bytesWriteSample; TransientStorageMetricSample bytesReadSample; StorageServerMetrics() : byteSample(0), iopsSample(SERVER_KNOBS->IOPS_UNITS_PER_SAMPLE), - bandwidthSample(SERVER_KNOBS->BANDWIDTH_UNITS_PER_SAMPLE), + bytesWriteSample(SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE), bytesReadSample(SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE) {} StorageMetrics getMetrics(KeyRangeRef const& keys) const; @@ -230,10 +230,5 @@ Future serveStorageMetricsRequests(ServiceType* self, StorageServerInterfa } } } - -// For both the mutation log and the versioned map. -inline int mvccStorageBytes(int64_t size) { - return VersionedMap::overheadPerItem * 2 + (MutationRef::OVERHEAD_BYTES + size) * 2; -} #include "flow/unactorcompiler.h" #endif // FDBSERVER_STORAGEMETRICS_H \ No newline at end of file diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 35101586f7..af7ddeba69 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -533,9 +533,13 @@ const int VERSION_OVERHEAD = sizeof(Reference::PTreeT>)); // versioned map [ x2 for // createNewVersion(version+1) ], 64b // overhead for map -// For both the mutation log and the versioned map. + +// Memory size for storing mutation in the mutation log and the versioned map. static int mvccStorageBytes(MutationRef const& m) { - return mvccStorageBytes(m.param1.size() + m.param2.size()); + // Why * 2: + // - 1 insertion into version map costs 2 nodes in avg; + // - The mutation will be stored in both mutation log and versioned map; + return VersionedMap::overheadPerItem * 2 + m.totalSize() * 2; } struct FetchInjectionInfo { @@ -1960,7 +1964,7 @@ ACTOR Future getValueQ(StorageServer* data, GetValueRequest req) { /* StorageMetrics m; - m.bytesPerKSecond = req.key.size() + (v.present() ? v.get().size() : 0); + m.writeBytesPerKSecond = req.key.size() + (v.present() ? v.get().size() : 0); m.iosPerKSecond = 1; data->metrics.notify(req.key, m); */ @@ -5610,7 +5614,7 @@ void applyMutation(StorageServer* self, // m is expected to be in arena already // Clear split keys are added to arena StorageMetrics metrics; - metrics.bytesPerKSecond = mvccStorageBytes(m) / 2; + metrics.writeBytesPerKSecond = m.totalSize(); // comparable to counter.mutationBytes metrics.iosPerKSecond = 1; self->metrics.notify(m.param1, metrics); @@ -10070,12 +10074,12 @@ ACTOR Future waitMetrics(StorageServerMetrics* self, WaitMetricsRequest re // all the messages for one clear or set have been dispatched. /*StorageMetrics m = getMetrics( data, req.keys ); - bool b = ( m.bytes != metrics.bytes || m.bytesPerKSecond != metrics.bytesPerKSecond || + bool b = ( m.bytes != metrics.bytes || m.writeBytesPerKSecond != metrics.writeBytesPerKSecond || m.iosPerKSecond != metrics.iosPerKSecond ); if (b) { printf("keys: '%s' - '%s' @%p\n", printable(req.keys.begin).c_str(), printable(req.keys.end).c_str(), this); printf("waitMetrics: desync %d (%lld %lld %lld) != (%lld %lld %lld); +(%lld %lld %lld)\n", - b, m.bytes, m.bytesPerKSecond, m.iosPerKSecond, metrics.bytes, metrics.bytesPerKSecond, - metrics.iosPerKSecond, c.bytes, c.bytesPerKSecond, c.iosPerKSecond); + b, m.bytes, m.writeBytesPerKSecond, m.iosPerKSecond, metrics.bytes, metrics.writeBytesPerKSecond, + metrics.iosPerKSecond, c.bytes, c.writeBytesPerKSecond, c.iosPerKSecond); }*/ } From 004a0f8915af106a73b9ab633bacbc7860a7aac1 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Fri, 28 Oct 2022 15:51:08 -0700 Subject: [PATCH 10/57] update data ops definition and comments; add a unit test --- fdbserver/MockGlobalState.actor.cpp | 159 ++++++++++++++++-- fdbserver/ShardsAffectedByTeamFailure.cpp | 10 ++ fdbserver/include/fdbserver/MockGlobalState.h | 33 +++- .../fdbserver/ShardsAffectedByTeamFailure.h | 2 + 4 files changed, 183 insertions(+), 21 deletions(-) diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index 24d32dfb34..ba855affcf 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -141,7 +141,7 @@ public: int maxSize = std::min(remainBytes, 130000) + 1; int randomSize = deterministicRandom()->randomInt(lastKey.size(), maxSize); - self->availableDiskSpace -= randomSize; + self->usedDiskSpace += randomSize; self->byteSampleApplySet(lastKey, randomSize); remainBytes -= randomSize; lastKey = randomKeyBetween(KeyRangeRef(lastKey, params.keys.end)); @@ -267,7 +267,7 @@ void MockStorageServer::removeShard(KeyRangeRef range) { auto ranges = serverKeys.containedRanges(range); ASSERT(ranges.begin().range() == range); auto rangeSize = sumRangeSize(range); - availableDiskSpace += rangeSize; + usedDiskSpace -= rangeSize; serverKeys.rawErase(range); byteSampleApplyClear(range); metrics.notifyNotReadable(range); @@ -310,8 +310,8 @@ Future MockStorageServer::run() { void MockStorageServer::set(KeyRef key, int64_t bytes, int64_t oldBytes) { notifyWriteMetrics(key, bytes); byteSampleApplySet(key, bytes); - auto delta = oldBytes - bytes; - availableDiskSpace += delta; + auto delta = bytes - oldBytes; + usedDiskSpace += delta; serverKeys[key].shardSize += delta; } @@ -319,16 +319,17 @@ void MockStorageServer::clear(KeyRef key, int64_t bytes) { notifyWriteMetrics(key, bytes); KeyRange sr = singleKeyRange(key); byteSampleApplyClear(sr); - availableDiskSpace += bytes; + usedDiskSpace -= bytes; serverKeys[key].shardSize -= bytes; } -void MockStorageServer::clearRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { +int64_t MockStorageServer::clearRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { notifyWriteMetrics(range.begin, range.begin.size() + range.end.size()); byteSampleApplyClear(range); auto totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); - availableDiskSpace += totalByteSize; + usedDiskSpace -= totalByteSize; clearRangeTotalBytes(range, beginShardBytes, endShardBytes); + return totalByteSize; } void MockStorageServer::get(KeyRef key, int64_t bytes) { @@ -337,8 +338,8 @@ void MockStorageServer::get(KeyRef key, int64_t bytes) { metrics.notifyBytesReadPerKSecond(key, bytesReadPerKSecond); } -void MockStorageServer::getRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { - auto totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); +int64_t MockStorageServer::getRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { + int64_t totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); // For performance concerns, the cost of a range read is billed to the start key and end key of the // range. if (totalByteSize > 0) { @@ -346,6 +347,7 @@ void MockStorageServer::getRange(KeyRangeRef range, int64_t beginShardBytes, int metrics.notifyBytesReadPerKSecond(range.begin, bytesReadPerKSecond); metrics.notifyBytesReadPerKSecond(range.end, bytesReadPerKSecond); } + return totalByteSize; } int64_t MockStorageServer::estimateRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { @@ -609,6 +611,107 @@ Future>> MockGlobalState::splitStorageMetrics(const return MockGlobalStateImpl::splitStorageMetrics(this, keys, limit, estimated, minSplitBytes); } +std::vector> MockGlobalState::runAllMockServers() { + std::vector> futures; + futures.reserve(allServers.size()); + for (auto& [id, _] : allServers) { + futures.emplace_back(runMockServer(id)); + } + return futures; +} +Future MockGlobalState::runMockServer(const UID& id) { + auto& server = allServers.at(id); + IFailureMonitor::failureMonitor().setStatus(server.ssi.address(), FailureStatus(false)); + return server.run(); +} + +int64_t MockGlobalState::get(KeyRef key) { + auto ids = shardMapping->getSourceServerIdsFor(key); + int64_t randomBytes = 0; + if (deterministicRandom()->random01() > emptyProb) { + randomBytes = deterministicRandom()->randomInt64(minByteSize, maxByteSize); + } + // randomly choose 1 server + auto id = deterministicRandom()->randomChoice(ids); + allServers.at(id).get(key, randomBytes); + return randomBytes; +} + +int64_t MockGlobalState::getRange(KeyRangeRef range) { + auto ranges = shardMapping->intersectingRanges(range); + int64_t totalSize = 0; + KeyRef begin, end; + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + auto ids = shardMapping->getSourceServerIdsFor(it->begin()); + if (range.begin > it->begin()) { + begin = range.begin; + } + if (range.end < it->end()) { + end = range.end; + } + + // randomly choose 1 server + auto id = deterministicRandom()->randomChoice(ids); + int64_t beginSize = deterministicRandom()->randomInt64(0, SERVER_KNOBS->MIN_SHARD_BYTES), + endSize = deterministicRandom()->randomInt64(0, SERVER_KNOBS->MIN_SHARD_BYTES); + totalSize += allServers.at(id).getRange(KeyRangeRef(begin, end), beginSize, endSize); + } + return totalSize; +} + +int64_t MockGlobalState::set(KeyRef key, int valueSize, bool insert) { + auto ids = shardMapping->getSourceServerIdsFor(key); + int64_t oldKvBytes = 0; + insert |= (deterministicRandom()->random01() < emptyProb); + + if (!insert) { + oldKvBytes = key.size() + deterministicRandom()->randomInt64(minByteSize, maxByteSize); + } + + for (auto& id : ids) { + allServers.at(id).set(key, valueSize + key.size(), oldKvBytes); + } + return oldKvBytes; +} + +int64_t MockGlobalState::clear(KeyRef key) { + auto ids = shardMapping->getSourceServerIdsFor(key); + int64_t randomBytes = 0; + if (deterministicRandom()->random01() > emptyProb) { + randomBytes = deterministicRandom()->randomInt64(minByteSize, maxByteSize) + key.size(); + } + + for (auto& id : ids) { + allServers.at(id).clear(key, randomBytes); + } + return randomBytes; +} + +int64_t MockGlobalState::clearRange(KeyRangeRef range) { + auto ranges = shardMapping->intersectingRanges(range); + int64_t totalSize = 0; + KeyRef begin, end; + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + auto ids = shardMapping->getSourceServerIdsFor(it->begin()); + if (range.begin > it->begin()) { + begin = range.begin; + } + if (range.end < it->end()) { + end = range.end; + } + + int64_t beginSize = deterministicRandom()->randomInt64(0, SERVER_KNOBS->MIN_SHARD_BYTES), + endSize = deterministicRandom()->randomInt64(0, SERVER_KNOBS->MIN_SHARD_BYTES); + int64_t lastSize = -1; + for (auto& id : ids) { + int64_t size = allServers.at(id).clearRange(KeyRangeRef(begin, end), beginSize, endSize); + ASSERT(lastSize == size || lastSize == -1); // every server should return the same result + } + totalSize += lastSize; + } + return totalSize; +} + TEST_CASE("/MockGlobalState/initializeAsEmptyDatabaseMGS/SimpleThree") { BasicTestConfig testConfig; testConfig.simpleConfig = true; @@ -803,15 +906,12 @@ TEST_CASE("/MockGlobalState/MockStorageServer/WaitStorageMetricsRequest") { state std::shared_ptr mgs = std::make_shared(); mgs->initializeAsEmptyDatabaseMGS(dbConfig); - state ActorCollection actors; - - ActorCollection* ptr = &actors; // get around ACTOR syntax restriction - std::for_each(mgs->allServers.begin(), mgs->allServers.end(), [ptr](auto& server) { - ptr->add(server.second.run()); - IFailureMonitor::failureMonitor().setStatus(server.second.ssi.address(), FailureStatus(false)); + std::for_each(mgs->allServers.begin(), mgs->allServers.end(), [](auto& server) { server.second.metrics.byteSample.sample.insert("something"_sr, 500000); }); + state Future allServerFutures = waitForAll(mgs->runAllMockServers()); + KeyRange testRange = allKeys; ShardSizeBounds bounds = ShardSizeBounds::shardSizeBoundsBeforeTrack(); std::pair, int> res = @@ -822,3 +922,32 @@ TEST_CASE("/MockGlobalState/MockStorageServer/WaitStorageMetricsRequest") { ASSERT_EQ(res.first.get().bytes, 500000); return Void(); } + +TEST_CASE("/MockGlobalState/MockStorageServer/DataOpsSet") { + BasicTestConfig testConfig; + testConfig.simpleConfig = true; + testConfig.minimumReplication = 1; + testConfig.logAntiQuorum = 0; + DatabaseConfiguration dbConfig = generateNormalDatabaseConfiguration(testConfig); + TraceEvent("DataOpsUnitTestConfig").detail("Config", dbConfig.toString()); + state std::shared_ptr mgs = std::make_shared(); + mgs->initializeAsEmptyDatabaseMGS(dbConfig); + state Future allServerFutures = waitForAll(mgs->runAllMockServers()); + + // use data ops + state int64_t setBytes = 0; + setBytes += mgs->set("a"_sr, 1 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); + setBytes += mgs->set("b"_sr, 2 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); + setBytes += mgs->set("c"_sr, 3 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); + for (auto& server : mgs->allServers) { + ASSERT_EQ(server.second.sumRangeSize(KeyRangeRef("a"_sr, "c"_sr)), + 2 + 3 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE); + ASSERT_EQ(server.second.usedDiskSpace, 3 + 6 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE); + } + ShardSizeBounds bounds = ShardSizeBounds::shardSizeBoundsBeforeTrack(); + std::pair, int> res = + wait(mgs->waitStorageMetrics(allKeys, bounds.min, bounds.max, bounds.permittedError, 1, 1)); + std::cout << "get result " << res.second << "\n"; + std::cout << "get byte " << res.first.get().bytes << " " << setBytes << "\n"; + return Void(); +} diff --git a/fdbserver/ShardsAffectedByTeamFailure.cpp b/fdbserver/ShardsAffectedByTeamFailure.cpp index cc634689ec..bc1b150656 100644 --- a/fdbserver/ShardsAffectedByTeamFailure.cpp +++ b/fdbserver/ShardsAffectedByTeamFailure.cpp @@ -246,3 +246,13 @@ void ShardsAffectedByTeamFailure::removeFailedServerForRange(KeyRangeRef keys, c auto ShardsAffectedByTeamFailure::intersectingRanges(KeyRangeRef keyRange) const -> decltype(shard_teams)::ConstRanges { return shard_teams.intersectingRanges(keyRange); } + +std::vector ShardsAffectedByTeamFailure::getSourceServerIdsFor(KeyRef key) { + auto teamPair = getTeamsFor(key); + std::set res; + auto& srcTeams = teamPair.second.empty() ? teamPair.first : teamPair.second; + for (auto& team : srcTeams) { + res.insert(team.servers.begin(), team.servers.end()); + } + return std::vector(res.begin(), res.end()); +} diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index cf831dccb2..071fcb2609 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -80,7 +80,7 @@ public: static constexpr uint64_t DEFAULT_DISK_SPACE = 1000LL * 1024 * 1024 * 1024; // control plane statistics associated with a real storage server - uint64_t totalDiskSpace = DEFAULT_DISK_SPACE, availableDiskSpace = DEFAULT_DISK_SPACE; + uint64_t totalDiskSpace = DEFAULT_DISK_SPACE, usedDiskSpace = DEFAULT_DISK_SPACE; // In-memory counterpart of the `serverKeys` in system keyspace // the value ShardStatus is [InFlight, Completed, Empty] and metrics uint64_t is the shard size, the caveat is the @@ -96,8 +96,7 @@ public: MockStorageServer() = default; MockStorageServer(StorageServerInterface ssi, uint64_t availableDiskSpace, uint64_t usedDiskSpace = 0) - : totalDiskSpace(usedDiskSpace + availableDiskSpace), availableDiskSpace(availableDiskSpace), ssi(ssi), - id(ssi.id()) {} + : totalDiskSpace(usedDiskSpace + availableDiskSpace), usedDiskSpace(usedDiskSpace), ssi(ssi), id(ssi.id()) {} MockStorageServer(const UID& id, uint64_t availableDiskSpace, uint64_t usedDiskSpace = 0) : MockStorageServer(StorageServerInterface(id), availableDiskSpace, usedDiskSpace) {} @@ -154,13 +153,15 @@ public: // Clear key and its value of which the size is bytes void clear(KeyRef key, int64_t bytes); // Clear range, assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` - void clearRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + // return the total range size + int64_t clearRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); // modify the metrics as like doing an n-bytes read op // Read key and cause bytes read overhead void get(KeyRef key, int64_t bytes); - // Read range, assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` - void getRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + // Read range, assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes`, + // return the total range size; + int64_t getRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); // trigger the asynchronous fetch keys operation void signalFetchKeys(KeyRangeRef range, int64_t rangeTotalBytes); @@ -280,6 +281,26 @@ public: Optional debugID, UseProvisionalProxies useProvisionalProxies, Version version) override; + + // data ops + // MGS finds the shard X contains this key, randomly generates a N-bytes read operation on that shard, which may + // change the read sampling stats of shard X. return the random size of value + int64_t get(KeyRef key); + // For the edge shards contains the range boundaries, randomly do N1 byte and N2 byte read operations. For other + // shards fully within the range, mock a full shard read op. + int64_t getRange(KeyRangeRef range); + // MGS finds the shard X contains this key, mock an N-bytes write to shard X, where N = valueSize + key.size(). + // Return a random number representing the old kv size + int64_t set(KeyRef key, int valueSize, bool insert); + // MGS finds the shard X contains this key, randomly generate an N-byte clear operation. + // Return a random number representing the old kv size + int64_t clear(KeyRef key); + // Similar as getRange, but need to change shardTotalBytes because this is a clear operation. + int64_t clearRange(KeyRangeRef range); + + // convenient shortcuts for test + std::vector> runAllMockServers(); + Future runMockServer(const UID& id); }; #endif // FOUNDATIONDB_MOCKGLOBALSTATE_H diff --git a/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h b/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h index 7b674510d4..0bb9d00d7b 100644 --- a/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h +++ b/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h @@ -86,6 +86,8 @@ public: std::pair, std::vector> getTeamsFor(KeyRef key); + std::vector getSourceServerIdsFor(KeyRef key); + // Shard boundaries are modified in defineShard and the content of what servers correspond to each shard is a copy // or union of the shards already there void defineShard(KeyRangeRef keys); From 802dce47b6cc5e773c9eb876499c572634e176b5 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Fri, 28 Oct 2022 17:12:46 -0700 Subject: [PATCH 11/57] unit test clean; fix some bugs --- fdbserver/MockGlobalState.actor.cpp | 37 ++++++++++--------- fdbserver/include/fdbserver/MockGlobalState.h | 3 +- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index ba855affcf..a1570bea98 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -251,6 +251,9 @@ void MockStorageServer::twoWayShardSplitting(KeyRangeRef range, KeyRef splitPoint, uint64_t rangeSize, bool restrictSize) { + if (splitPoint == range.begin || !range.contains(splitPoint)) { + return; + } Key left = range.begin; // random generate 3 shard sizes, the caller guarantee that the min, max parameters are always valid. int leftSize = deterministicRandom()->randomInt(SERVER_KNOBS->MIN_SHARD_BYTES, @@ -301,6 +304,8 @@ Future MockStorageServer::run() { Optional>()); ssi.initEndpoints(); ssi.startAcceptingRequests(); + IFailureMonitor::failureMonitor().setStatus(ssi.address(), FailureStatus(false)); + TraceEvent("MockStorageServerStart").detail("Address", ssi.address()); addActor(serveStorageMetricsRequests(this, ssi)); addActor(MockStorageServerImpl::serveMockStorageServer(this)); @@ -620,9 +625,7 @@ std::vector> MockGlobalState::runAllMockServers() { return futures; } Future MockGlobalState::runMockServer(const UID& id) { - auto& server = allServers.at(id); - IFailureMonitor::failureMonitor().setStatus(server.ssi.address(), FailureStatus(false)); - return server.run(); + return allServers.at(id).run(); } int64_t MockGlobalState::get(KeyRef key) { @@ -934,20 +937,20 @@ TEST_CASE("/MockGlobalState/MockStorageServer/DataOpsSet") { mgs->initializeAsEmptyDatabaseMGS(dbConfig); state Future allServerFutures = waitForAll(mgs->runAllMockServers()); - // use data ops - state int64_t setBytes = 0; - setBytes += mgs->set("a"_sr, 1 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); - setBytes += mgs->set("b"_sr, 2 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); - setBytes += mgs->set("c"_sr, 3 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); - for (auto& server : mgs->allServers) { - ASSERT_EQ(server.second.sumRangeSize(KeyRangeRef("a"_sr, "c"_sr)), - 2 + 3 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE); - ASSERT_EQ(server.second.usedDiskSpace, 3 + 6 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE); + // insert + { + mgs->set("a"_sr, 1 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); + mgs->set("b"_sr, 2 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); + mgs->set("c"_sr, 3 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); + for (auto& server : mgs->allServers) { + ASSERT_EQ(server.second.usedDiskSpace, 3 + 6 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE); + } + ShardSizeBounds bounds = ShardSizeBounds::shardSizeBoundsBeforeTrack(); + std::pair, int> res = wait( + mgs->waitStorageMetrics(KeyRangeRef("a"_sr, "c"_sr), bounds.min, bounds.max, bounds.permittedError, 1, 1)); + + int64_t testSize = 2 + 3 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE; + ASSERT_EQ(res.first.get().bytes, testSize); } - ShardSizeBounds bounds = ShardSizeBounds::shardSizeBoundsBeforeTrack(); - std::pair, int> res = - wait(mgs->waitStorageMetrics(allKeys, bounds.min, bounds.max, bounds.permittedError, 1, 1)); - std::cout << "get result " << res.second << "\n"; - std::cout << "get byte " << res.first.get().bytes << " " << setBytes << "\n"; return Void(); } diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index 071fcb2609..3aa245c19c 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -282,7 +282,8 @@ public: UseProvisionalProxies useProvisionalProxies, Version version) override; - // data ops + // data ops - the key is not accurate, only the shard the key locate in matters. + // MGS finds the shard X contains this key, randomly generates a N-bytes read operation on that shard, which may // change the read sampling stats of shard X. return the random size of value int64_t get(KeyRef key); From d2ecc3cb48c78becb5a947e3e143f2a65f6866e4 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Mon, 31 Oct 2022 09:19:33 -0700 Subject: [PATCH 12/57] handling no-sampled scenario in unit test --- fdbserver/MockGlobalState.actor.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index a1570bea98..28d1d9c7c7 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -947,10 +947,16 @@ TEST_CASE("/MockGlobalState/MockStorageServer/DataOpsSet") { } ShardSizeBounds bounds = ShardSizeBounds::shardSizeBoundsBeforeTrack(); std::pair, int> res = wait( - mgs->waitStorageMetrics(KeyRangeRef("a"_sr, "c"_sr), bounds.min, bounds.max, bounds.permittedError, 1, 1)); + mgs->waitStorageMetrics(KeyRangeRef("a"_sr, "bc"_sr), bounds.min, bounds.max, bounds.permittedError, 1, 1)); int64_t testSize = 2 + 3 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE; - ASSERT_EQ(res.first.get().bytes, testSize); + // SOMEDAY: how to integrate with isKeyValueInSample() better? + if (res.first.get().bytes > 0) { + // If sampled + ASSERT_EQ(res.first.get().bytes, testSize); + ASSERT_LT(res.first.get().writeBytesPerKSecond, 0); + ASSERT_LT(res.first.get().iosPerKSecond, 0); + } } return Void(); } From 7442cfa2cb73673235964ec3849dd560e24a47cb Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Mon, 31 Oct 2022 09:45:56 -0700 Subject: [PATCH 13/57] format code --- fdbclient/FDBTypes.cpp | 4 ++-- fdbclient/include/fdbclient/StorageServerInterface.h | 8 ++++---- fdbserver/BlobManager.actor.cpp | 3 ++- fdbserver/StorageMetrics.actor.cpp | 2 +- fdbserver/storageserver.actor.cpp | 9 +++++---- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fdbclient/FDBTypes.cpp b/fdbclient/FDBTypes.cpp index c104aea5e8..e83630596b 100644 --- a/fdbclient/FDBTypes.cpp +++ b/fdbclient/FDBTypes.cpp @@ -100,7 +100,7 @@ TEST_CASE("/KeyRangeUtil/randomKeyBetween") { Key begin = "qwert"_sr; Key end = "qwertyu"_sr; Key res; - for(int i = 0; i < 10; ++ i) { + for (int i = 0; i < 10; ++i) { res = randomKeyBetween(KeyRangeRef(begin, end)); ASSERT(res > begin); ASSERT(res < end); @@ -113,7 +113,7 @@ TEST_CASE("/KeyRangeUtil/randomKeyBetween") { begin = "aaaaaaa"_sr; end = "b"_sr; - for(int i = 0; i < 10; ++ i) { + for (int i = 0; i < 10; ++i) { res = randomKeyBetween(KeyRangeRef(begin, end)); ASSERT(res > begin); ASSERT(res < end); diff --git a/fdbclient/include/fdbclient/StorageServerInterface.h b/fdbclient/include/fdbclient/StorageServerInterface.h index 2a2442c94b..b8ad4523c9 100644 --- a/fdbclient/include/fdbclient/StorageServerInterface.h +++ b/fdbclient/include/fdbclient/StorageServerInterface.h @@ -643,8 +643,8 @@ struct StorageMetrics { static const int64_t infinity = 1LL << 60; bool allLessOrEqual(const StorageMetrics& rhs) const { - return bytes <= rhs.bytes && writeBytesPerKSecond <= rhs.writeBytesPerKSecond && iosPerKSecond <= rhs.iosPerKSecond && - bytesReadPerKSecond <= rhs.bytesReadPerKSecond; + return bytes <= rhs.bytes && writeBytesPerKSecond <= rhs.writeBytesPerKSecond && + iosPerKSecond <= rhs.iosPerKSecond && bytesReadPerKSecond <= rhs.bytesReadPerKSecond; } void operator+=(const StorageMetrics& rhs) { bytes += rhs.bytes; @@ -697,8 +697,8 @@ struct StorageMetrics { } bool operator==(StorageMetrics const& rhs) const { - return bytes == rhs.bytes && writeBytesPerKSecond == rhs.writeBytesPerKSecond && iosPerKSecond == rhs.iosPerKSecond && - bytesReadPerKSecond == rhs.bytesReadPerKSecond; + return bytes == rhs.bytes && writeBytesPerKSecond == rhs.writeBytesPerKSecond && + iosPerKSecond == rhs.iosPerKSecond && bytesReadPerKSecond == rhs.bytesReadPerKSecond; } std::string toString() const { diff --git a/fdbserver/BlobManager.actor.cpp b/fdbserver/BlobManager.actor.cpp index 50c68f328e..7e8455f79b 100644 --- a/fdbserver/BlobManager.actor.cpp +++ b/fdbserver/BlobManager.actor.cpp @@ -638,7 +638,8 @@ ACTOR Future splitRange(Reference bmDat splitMetrics.bytes = SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES; splitMetrics.writeBytesPerKSecond = SERVER_KNOBS->SHARD_SPLIT_BYTES_PER_KSEC; if (writeHot) { - splitMetrics.writeBytesPerKSecond = std::min(splitMetrics.writeBytesPerKSecond, estimated.writeBytesPerKSecond / 2); + splitMetrics.writeBytesPerKSecond = + std::min(splitMetrics.writeBytesPerKSecond, estimated.writeBytesPerKSecond / 2); splitMetrics.writeBytesPerKSecond = std::max(splitMetrics.writeBytesPerKSecond, SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC); } diff --git a/fdbserver/StorageMetrics.actor.cpp b/fdbserver/StorageMetrics.actor.cpp index 89305a968f..ea314eff77 100644 --- a/fdbserver/StorageMetrics.actor.cpp +++ b/fdbserver/StorageMetrics.actor.cpp @@ -99,7 +99,7 @@ void StorageServerMetrics::notify(KeyRef key, StorageMetrics& metrics) { if (metrics.writeBytesPerKSecond) notifyMetrics.writeBytesPerKSecond = bytesWriteSample.addAndExpire(key, metrics.writeBytesPerKSecond, expire) * - SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; + SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; if (metrics.iosPerKSecond) notifyMetrics.iosPerKSecond = iopsSample.addAndExpire(key, metrics.iosPerKSecond, expire) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index af7ddeba69..5c355eadbf 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -10074,12 +10074,13 @@ ACTOR Future waitMetrics(StorageServerMetrics* self, WaitMetricsRequest re // all the messages for one clear or set have been dispatched. /*StorageMetrics m = getMetrics( data, req.keys ); - bool b = ( m.bytes != metrics.bytes || m.writeBytesPerKSecond != metrics.writeBytesPerKSecond || - m.iosPerKSecond != metrics.iosPerKSecond ); if (b) { printf("keys: '%s' - '%s' @%p\n", + bool b = ( m.bytes != metrics.bytes || m.writeBytesPerKSecond != metrics.writeBytesPerKSecond + || m.iosPerKSecond != metrics.iosPerKSecond ); if (b) { printf("keys: '%s' - '%s' @%p\n", printable(req.keys.begin).c_str(), printable(req.keys.end).c_str(), this); printf("waitMetrics: desync %d (%lld %lld %lld) != (%lld %lld %lld); +(%lld %lld %lld)\n", - b, m.bytes, m.writeBytesPerKSecond, m.iosPerKSecond, metrics.bytes, metrics.writeBytesPerKSecond, - metrics.iosPerKSecond, c.bytes, c.writeBytesPerKSecond, c.iosPerKSecond); + b, m.bytes, m.writeBytesPerKSecond, m.iosPerKSecond, metrics.bytes, + metrics.writeBytesPerKSecond, metrics.iosPerKSecond, c.bytes, c.writeBytesPerKSecond, + c.iosPerKSecond); }*/ } From 7ed5a99213181bbd42e650b4951503c4dde64243 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Mon, 31 Oct 2022 17:18:43 -0700 Subject: [PATCH 14/57] add setShardStatus unit test and change signatures of methods to const& --- fdbserver/MockGlobalState.actor.cpp | 77 ++++++++++++------- fdbserver/include/fdbserver/MockGlobalState.h | 49 ++++++------ 2 files changed, 74 insertions(+), 52 deletions(-) diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index 28d1d9c7c7..a54a0fe494 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -153,7 +153,7 @@ public: } }; -bool MockStorageServer::allShardStatusEqual(KeyRangeRef range, MockShardStatus status) { +bool MockStorageServer::allShardStatusEqual(const KeyRangeRef& range, MockShardStatus status) { auto ranges = serverKeys.intersectingRanges(range); ASSERT(!ranges.empty()); // at least the range is allKeys @@ -164,7 +164,7 @@ bool MockStorageServer::allShardStatusEqual(KeyRangeRef range, MockShardStatus s return true; } -bool MockStorageServer::allShardStatusIn(KeyRangeRef range, const std::set& status) { +bool MockStorageServer::allShardStatusIn(const KeyRangeRef& range, const std::set& status) { auto ranges = serverKeys.intersectingRanges(range); ASSERT(!ranges.empty()); // at least the range is allKeys @@ -175,7 +175,7 @@ bool MockStorageServer::allShardStatusIn(KeyRangeRef range, const std::set MockStorageServer::run() { return actors.getResult(); } -void MockStorageServer::set(KeyRef key, int64_t bytes, int64_t oldBytes) { +void MockStorageServer::set(KeyRef const& key, int64_t bytes, int64_t oldBytes) { notifyWriteMetrics(key, bytes); byteSampleApplySet(key, bytes); auto delta = bytes - oldBytes; @@ -320,7 +320,7 @@ void MockStorageServer::set(KeyRef key, int64_t bytes, int64_t oldBytes) { serverKeys[key].shardSize += delta; } -void MockStorageServer::clear(KeyRef key, int64_t bytes) { +void MockStorageServer::clear(KeyRef const& key, int64_t bytes) { notifyWriteMetrics(key, bytes); KeyRange sr = singleKeyRange(key); byteSampleApplyClear(sr); @@ -328,7 +328,7 @@ void MockStorageServer::clear(KeyRef key, int64_t bytes) { serverKeys[key].shardSize -= bytes; } -int64_t MockStorageServer::clearRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { +int64_t MockStorageServer::clearRange(KeyRangeRef const& range, int64_t beginShardBytes, int64_t endShardBytes) { notifyWriteMetrics(range.begin, range.begin.size() + range.end.size()); byteSampleApplyClear(range); auto totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); @@ -337,13 +337,13 @@ int64_t MockStorageServer::clearRange(KeyRangeRef range, int64_t beginShardBytes return totalByteSize; } -void MockStorageServer::get(KeyRef key, int64_t bytes) { +void MockStorageServer::get(KeyRef const& key, int64_t bytes) { // If the read yields no value, randomly sample the empty read. int64_t bytesReadPerKSecond = std::max(bytes, SERVER_KNOBS->EMPTY_READ_PENALTY); metrics.notifyBytesReadPerKSecond(key, bytesReadPerKSecond); } -int64_t MockStorageServer::getRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { +int64_t MockStorageServer::getRange(KeyRangeRef const& range, int64_t beginShardBytes, int64_t endShardBytes) { int64_t totalByteSize = estimateRangeTotalBytes(range, beginShardBytes, endShardBytes); // For performance concerns, the cost of a range read is billed to the start key and end key of the // range. @@ -355,7 +355,7 @@ int64_t MockStorageServer::getRange(KeyRangeRef range, int64_t beginShardBytes, return totalByteSize; } -int64_t MockStorageServer::estimateRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { +int64_t MockStorageServer::estimateRangeTotalBytes(KeyRangeRef const& range, int64_t beginShardBytes, int64_t endShardBytes) { int64_t totalByteSize = 0; auto ranges = serverKeys.intersectingRanges(range); @@ -374,7 +374,7 @@ int64_t MockStorageServer::estimateRangeTotalBytes(KeyRangeRef range, int64_t be return totalByteSize; } -void MockStorageServer::clearRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes) { +void MockStorageServer::clearRangeTotalBytes(KeyRangeRef const& range, int64_t beginShardBytes, int64_t endShardBytes) { auto ranges = serverKeys.intersectingRanges(range); // use the beginShardBytes as partial size @@ -393,7 +393,7 @@ void MockStorageServer::clearRangeTotalBytes(KeyRangeRef range, int64_t beginSha } } -void MockStorageServer::notifyWriteMetrics(KeyRef key, int64_t size) { +void MockStorageServer::notifyWriteMetrics(KeyRef const& key, int64_t size) { // update write bandwidth and iops as mock the cost of writing a mutation StorageMetrics s; s.writeBytesPerKSecond = size + MutationRef::OVERHEAD_BYTES; @@ -401,15 +401,17 @@ void MockStorageServer::notifyWriteMetrics(KeyRef key, int64_t size) { metrics.notify(key, s); } -void MockStorageServer::signalFetchKeys(KeyRangeRef range, int64_t rangeTotalBytes) { - fetchKeysRequests.send({ KeyRange(range), rangeTotalBytes }); +void MockStorageServer::signalFetchKeys(const KeyRangeRef& range, int64_t rangeTotalBytes) { + std::cout << "----- signalFetchKeys ---- \n"; + fetchKeysRequests.send({ range, rangeTotalBytes }); + std::cout << "----- signalFetchKeys end ---- \n"; } Future MockStorageServer::fetchKeys(const MockStorageServer::FetchKeysParams& param) { return MockStorageServerImpl::waitFetchKeysFinish(this, param); } -void MockStorageServer::byteSampleApplySet(KeyRef key, int64_t kvSize) { +void MockStorageServer::byteSampleApplySet(KeyRef const& key, int64_t kvSize) { // Update byteSample in memory and notify waiting metrics ByteSampleInfo sampleInfo = isKeyValueInSample(key, kvSize); auto& byteSample = metrics.byteSample.sample; @@ -430,7 +432,7 @@ void MockStorageServer::byteSampleApplySet(KeyRef key, int64_t kvSize) { metrics.notifyBytes(key, delta); } -void MockStorageServer::byteSampleApplyClear(KeyRangeRef range) { +void MockStorageServer::byteSampleApplyClear(KeyRangeRef const& range) { // Update byteSample and notify waiting metrics auto& byteSample = metrics.byteSample.sample; @@ -628,7 +630,7 @@ Future MockGlobalState::runMockServer(const UID& id) { return allServers.at(id).run(); } -int64_t MockGlobalState::get(KeyRef key) { +int64_t MockGlobalState::get(KeyRef const& key) { auto ids = shardMapping->getSourceServerIdsFor(key); int64_t randomBytes = 0; if (deterministicRandom()->random01() > emptyProb) { @@ -640,7 +642,7 @@ int64_t MockGlobalState::get(KeyRef key) { return randomBytes; } -int64_t MockGlobalState::getRange(KeyRangeRef range) { +int64_t MockGlobalState::getRange(KeyRangeRef const& range) { auto ranges = shardMapping->intersectingRanges(range); int64_t totalSize = 0; KeyRef begin, end; @@ -662,7 +664,7 @@ int64_t MockGlobalState::getRange(KeyRangeRef range) { return totalSize; } -int64_t MockGlobalState::set(KeyRef key, int valueSize, bool insert) { +int64_t MockGlobalState::set(KeyRef const& key, int valueSize, bool insert) { auto ids = shardMapping->getSourceServerIdsFor(key); int64_t oldKvBytes = 0; insert |= (deterministicRandom()->random01() < emptyProb); @@ -677,7 +679,7 @@ int64_t MockGlobalState::set(KeyRef key, int valueSize, bool insert) { return oldKvBytes; } -int64_t MockGlobalState::clear(KeyRef key) { +int64_t MockGlobalState::clear(KeyRef const& key) { auto ids = shardMapping->getSourceServerIdsFor(key); int64_t randomBytes = 0; if (deterministicRandom()->random01() > emptyProb) { @@ -690,7 +692,7 @@ int64_t MockGlobalState::clear(KeyRef key) { return randomBytes; } -int64_t MockGlobalState::clearRange(KeyRangeRef range) { +int64_t MockGlobalState::clearRange(KeyRangeRef const& range) { auto ranges = shardMapping->intersectingRanges(range); int64_t totalSize = 0; KeyRef begin, end; @@ -827,6 +829,25 @@ TEST_CASE("/MockGlobalState/MockStorageServer/SplittingFunctions") { return Void(); } +TEST_CASE("/MockGlobalState/MockStorageServer/SetShardStatus") { + BasicTestConfig testConfig; + testConfig.simpleConfig = true; + testConfig.minimumReplication = 1; + testConfig.logAntiQuorum = 0; + DatabaseConfiguration dbConfig = generateNormalDatabaseConfiguration(testConfig); + TraceEvent("SetShardStatusUnitTestDbConfig").detail("Config", dbConfig.toString()); + + auto mgs = std::make_shared(); + mgs->initializeAsEmptyDatabaseMGS(dbConfig); + + auto& mss = mgs->allServers.at(MockGlobalState::indexToUID(1)); + KeyRange testRange(KeyRangeRef("a"_sr, "b"_sr)); + mss.setShardStatus(testRange, MockShardStatus::INFLIGHT, false); + ASSERT(mss.allShardStatusEqual(testRange, MockShardStatus::INFLIGHT)); + + return Void(); +} + namespace { inline bool locationInfoEqualsToTeam(Reference loc, const std::vector& ids) { return loc->locations()->size() == ids.size() && @@ -954,8 +975,8 @@ TEST_CASE("/MockGlobalState/MockStorageServer/DataOpsSet") { if (res.first.get().bytes > 0) { // If sampled ASSERT_EQ(res.first.get().bytes, testSize); - ASSERT_LT(res.first.get().writeBytesPerKSecond, 0); - ASSERT_LT(res.first.get().iosPerKSecond, 0); + ASSERT_GT(res.first.get().writeBytesPerKSecond, 0); + ASSERT_GT(res.first.get().iosPerKSecond, 0); } } return Void(); diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index 3aa245c19c..05a9fdbca3 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -103,19 +103,19 @@ public: decltype(serverKeys)::Ranges getAllRanges() { return serverKeys.ranges(); } - bool allShardStatusEqual(KeyRangeRef range, MockShardStatus status); - bool allShardStatusIn(KeyRangeRef range, const std::set& status); + bool allShardStatusEqual(const KeyRangeRef& range, MockShardStatus status); + bool allShardStatusIn(const KeyRangeRef& range, const std::set& status); // change the status of range. This function may result in split to make the shard boundary align with range.begin // and range.end. In this case, if restrictSize==true, the sum of the split shard size is strictly equal to the old // large shard. Otherwise, the size are randomly generated between (min_shard_size, max_shard_size) - void setShardStatus(KeyRangeRef range, MockShardStatus status, bool restrictSize); + void setShardStatus(const KeyRangeRef& range, MockShardStatus status, bool restrictSize); // this function removed an aligned range from server - void removeShard(KeyRangeRef range); + void removeShard(const KeyRangeRef& range); // intersecting range size - uint64_t sumRangeSize(KeyRangeRef range) const; + uint64_t sumRangeSize(const KeyRangeRef& range) const; void addActor(Future future) override; @@ -149,50 +149,51 @@ public: // data operation APIs - change the metrics sample, disk space and shard size // Set key with a new value, the total bytes change from oldBytes to bytes - void set(KeyRef key, int64_t bytes, int64_t oldBytes); + void set(KeyRef const& key, int64_t bytes, int64_t oldBytes); // Clear key and its value of which the size is bytes - void clear(KeyRef key, int64_t bytes); + void clear(KeyRef const& key, int64_t bytes); // Clear range, assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` // return the total range size - int64_t clearRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + int64_t clearRange(KeyRangeRef const& range, int64_t beginShardBytes, int64_t endShardBytes); // modify the metrics as like doing an n-bytes read op // Read key and cause bytes read overhead - void get(KeyRef key, int64_t bytes); + void get(KeyRef const& key, int64_t bytes); // Read range, assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes`, // return the total range size; - int64_t getRange(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + int64_t getRange(KeyRangeRef const& range, int64_t beginShardBytes, int64_t endShardBytes); // trigger the asynchronous fetch keys operation - void signalFetchKeys(KeyRangeRef range, int64_t rangeTotalBytes); + void signalFetchKeys(const KeyRangeRef& range, int64_t rangeTotalBytes); protected: PromiseStream fetchKeysRequests; - void threeWayShardSplitting(KeyRangeRef outerRange, - KeyRangeRef innerRange, + void threeWayShardSplitting(const KeyRangeRef& outerRange, + const KeyRangeRef& innerRange, uint64_t outerRangeSize, bool restrictSize); - void twoWayShardSplitting(KeyRangeRef range, KeyRef splitPoint, uint64_t rangeSize, bool restrictSize); + void twoWayShardSplitting(const KeyRangeRef& range, + const KeyRef& splitPoint, uint64_t rangeSize, bool restrictSize); // Assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` - int64_t estimateRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + int64_t estimateRangeTotalBytes(KeyRangeRef const& range, int64_t beginShardBytes, int64_t endShardBytes); // Decrease the intersecting shard bytes as if delete the data - void clearRangeTotalBytes(KeyRangeRef range, int64_t beginShardBytes, int64_t endShardBytes); + void clearRangeTotalBytes(KeyRangeRef const& range, int64_t beginShardBytes, int64_t endShardBytes); // Update the storage metrics as if we write a k-v pair of `size` bytes. - void notifyWriteMetrics(KeyRef key, int64_t size); + void notifyWriteMetrics(KeyRef const& key, int64_t size); // Randomly generate keys and kv size between the fetch range, updating the byte sample. // Once the fetchKeys return, the shard status will become FETCHED. Future fetchKeys(const FetchKeysParams&); // Update byte sample as if set a key value pair of which the size is kvSize - void byteSampleApplySet(KeyRef key, int64_t kvSize); + void byteSampleApplySet(KeyRef const& key, int64_t kvSize); // Update byte sample as if clear a whole range - void byteSampleApplyClear(KeyRangeRef range); + void byteSampleApplyClear(KeyRangeRef const& range); }; class MockGlobalStateImpl; @@ -286,18 +287,18 @@ public: // MGS finds the shard X contains this key, randomly generates a N-bytes read operation on that shard, which may // change the read sampling stats of shard X. return the random size of value - int64_t get(KeyRef key); + int64_t get(KeyRef const& key); // For the edge shards contains the range boundaries, randomly do N1 byte and N2 byte read operations. For other // shards fully within the range, mock a full shard read op. - int64_t getRange(KeyRangeRef range); + int64_t getRange(KeyRangeRef const& range); // MGS finds the shard X contains this key, mock an N-bytes write to shard X, where N = valueSize + key.size(). // Return a random number representing the old kv size - int64_t set(KeyRef key, int valueSize, bool insert); + int64_t set(KeyRef const& key, int valueSize, bool insert); // MGS finds the shard X contains this key, randomly generate an N-byte clear operation. // Return a random number representing the old kv size - int64_t clear(KeyRef key); + int64_t clear(KeyRef const& key); // Similar as getRange, but need to change shardTotalBytes because this is a clear operation. - int64_t clearRange(KeyRangeRef range); + int64_t clearRange(KeyRangeRef const& range); // convenient shortcuts for test std::vector> runAllMockServers(); From 5a4736a574a9e4f815d0c10966619aedf2d05d53 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Mon, 31 Oct 2022 23:17:40 -0700 Subject: [PATCH 15/57] fix setShardStatus bug and finish the unit test --- fdbserver/MockGlobalState.actor.cpp | 46 ++++++++++++++----- fdbserver/include/fdbserver/MockGlobalState.h | 12 +++-- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index a54a0fe494..caa9451dc5 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -178,22 +178,25 @@ bool MockStorageServer::allShardStatusIn(const KeyRangeRef& range, const std::se void MockStorageServer::setShardStatus(const KeyRangeRef& range, MockShardStatus status, bool restrictSize) { auto ranges = serverKeys.intersectingRanges(range); ASSERT(!ranges.empty()); - if (ranges.begin().range().contains(range)) { + if (ranges.begin().begin() < range.begin && ranges.begin().end() > range.end) { CODE_PROBE(true, "Implicitly split single shard to 3 pieces"); threeWayShardSplitting(ranges.begin().range(), range, ranges.begin().cvalue().shardSize, restrictSize); - } - if (ranges.begin().begin() < range.begin) { - CODE_PROBE(true, "Implicitly split begin range to 2 pieces"); - twoWayShardSplitting(ranges.begin().range(), range.begin, ranges.begin().cvalue().shardSize, restrictSize); - } - if (ranges.end().end() > range.end) { - CODE_PROBE(true, "Implicitly split end range to 2 pieces"); - twoWayShardSplitting(ranges.end().range(), range.end, ranges.end().cvalue().shardSize, restrictSize); + } else { + if (ranges.begin().begin() < range.begin) { + CODE_PROBE(true, "Implicitly split begin range to 2 pieces"); + twoWayShardSplitting(ranges.begin().range(), range.begin, ranges.begin().cvalue().shardSize, restrictSize); + } + if (ranges.end().begin() > range.end) { + CODE_PROBE(true, "Implicitly split end range to 2 pieces"); + auto lastRange = ranges.end(); + --lastRange; + twoWayShardSplitting(lastRange.range(), range.end, ranges.end().cvalue().shardSize, restrictSize); + } } ranges = serverKeys.containedRanges(range); // now the boundary must be aligned ASSERT(ranges.begin().begin() == range.begin); - ASSERT(ranges.end().end() == range.end); + ASSERT(ranges.end().begin() == range.end); uint64_t newSize = 0; for (auto it = ranges.begin(); it != ranges.end(); ++it) { newSize += it->cvalue().shardSize; @@ -402,9 +405,7 @@ void MockStorageServer::notifyWriteMetrics(KeyRef const& key, int64_t size) { } void MockStorageServer::signalFetchKeys(const KeyRangeRef& range, int64_t rangeTotalBytes) { - std::cout << "----- signalFetchKeys ---- \n"; fetchKeysRequests.send({ range, rangeTotalBytes }); - std::cout << "----- signalFetchKeys end ---- \n"; } Future MockStorageServer::fetchKeys(const MockStorageServer::FetchKeysParams& param) { @@ -754,11 +755,13 @@ struct MockGlobalStateTester { mss.threeWayShardSplitting(outerRange, KeyRangeRef(x1, x2), oldSize, false); auto ranges = mss.serverKeys.containedRanges(outerRange); ASSERT(ranges.begin().range() == KeyRangeRef(outerRange.begin, x1)); + ASSERT(ranges.begin().cvalue().status == oldStatus); ranges.pop_front(); ASSERT(ranges.begin().range() == KeyRangeRef(x1, x2)); ASSERT(ranges.begin().cvalue().status == oldStatus); ranges.pop_front(); ASSERT(ranges.begin().range() == KeyRangeRef(x2, outerRange.end)); + ASSERT(ranges.begin().cvalue().status == oldStatus); ranges.pop_front(); ASSERT(ranges.empty()); } @@ -777,6 +780,7 @@ struct MockGlobalStateTester { mss.twoWayShardSplitting(it->range(), x1, oldSize, false); auto ranges = mss.serverKeys.containedRanges(outerRange); ASSERT(ranges.begin().range() == KeyRangeRef(outerRange.begin, x1)); + ASSERT(ranges.begin().cvalue().status == oldStatus); ranges.pop_front(); ASSERT(ranges.begin().range() == KeyRangeRef(x1, outerRange.end)); ASSERT(ranges.begin().cvalue().status == oldStatus); @@ -841,10 +845,28 @@ TEST_CASE("/MockGlobalState/MockStorageServer/SetShardStatus") { mgs->initializeAsEmptyDatabaseMGS(dbConfig); auto& mss = mgs->allServers.at(MockGlobalState::indexToUID(1)); + mss.serverKeys.insert(allKeys, { MockShardStatus::UNSET, 0 }); // manually reset status + + // split to 3 shards [allKeys.begin, a, b, allKeys.end] KeyRange testRange(KeyRangeRef("a"_sr, "b"_sr)); mss.setShardStatus(testRange, MockShardStatus::INFLIGHT, false); ASSERT(mss.allShardStatusEqual(testRange, MockShardStatus::INFLIGHT)); + // [allKeys.begin, a, ac, b, bc, allKeys.end] + testRange = KeyRangeRef("ac"_sr, "bc"_sr); + mss.setShardStatus(testRange, MockShardStatus::INFLIGHT, false); + ASSERT(mss.allShardStatusEqual(testRange, MockShardStatus::INFLIGHT)); + + testRange = KeyRangeRef("b"_sr, "bc"_sr); + mss.setShardStatus(testRange, MockShardStatus::FETCHED, false); + ASSERT(mss.allShardStatusEqual(testRange, MockShardStatus::FETCHED)); + mss.setShardStatus(testRange, MockShardStatus::COMPLETED, false); + ASSERT(mss.allShardStatusEqual(testRange, MockShardStatus::COMPLETED)); + mss.setShardStatus(testRange, MockShardStatus::FETCHED, false); + ASSERT(mss.allShardStatusEqual(testRange, MockShardStatus::COMPLETED)); + + ASSERT(mss.serverKeys.size() == 5); + return Void(); } diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index 05a9fdbca3..b9e2125881 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -31,21 +31,25 @@ struct MockGlobalStateTester; +// the status is roughly order by transition order, except for UNSET and EMPTY enum class MockShardStatus { EMPTY = 0, // data loss - COMPLETED, + UNSET, INFLIGHT, FETCHED, // finish fetch but not change the serverKey mapping. Only can be set by MSS itself. - UNSET + COMPLETED }; inline bool isStatusTransitionValid(MockShardStatus from, MockShardStatus to) { + if (from == to) + return true; + switch (from) { case MockShardStatus::UNSET: case MockShardStatus::EMPTY: - return to == MockShardStatus::COMPLETED || to == MockShardStatus::INFLIGHT || to == MockShardStatus::EMPTY; + return to >= MockShardStatus::INFLIGHT; case MockShardStatus::INFLIGHT: - return to == MockShardStatus::FETCHED || to == MockShardStatus::INFLIGHT || to == MockShardStatus::EMPTY; + return to == MockShardStatus::FETCHED || to == MockShardStatus::EMPTY; case MockShardStatus::FETCHED: return to == MockShardStatus::COMPLETED; case MockShardStatus::COMPLETED: From a0489330d001b571e18e824e8177e5cf5c50b633 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Tue, 1 Nov 2022 14:22:04 -0700 Subject: [PATCH 16/57] fix rawStartMovement defineShard bug --- fdbserver/DDTxnProcessor.actor.cpp | 10 ++- .../fdbserver/ShardsAffectedByTeamFailure.h | 4 +- .../IDDTxnProcessorApiCorrectness.actor.cpp | 78 ++++++++++++------- fdbserver/workloads/MockDDTest.actor.cpp | 30 ++++++- 4 files changed, 88 insertions(+), 34 deletions(-) diff --git a/fdbserver/DDTxnProcessor.actor.cpp b/fdbserver/DDTxnProcessor.actor.cpp index 7b124bea06..991f4de95b 100644 --- a/fdbserver/DDTxnProcessor.actor.cpp +++ b/fdbserver/DDTxnProcessor.actor.cpp @@ -701,6 +701,10 @@ struct DDMockTxnProcessorImpl { ACTOR static Future moveKeys(DDMockTxnProcessor* self, MoveKeysParams params) { state std::map tssMapping; + // Because SFBTF::Team requires the ID is ordered + std::sort(params.destinationTeam.begin(), params.destinationTeam.end()); + std::sort(params.healthyDestinations.begin(), params.healthyDestinations.end()); + self->rawStartMovement(params, tssMapping); ASSERT(tssMapping.empty()); @@ -892,6 +896,7 @@ void DDMockTxnProcessor::rawStartMovement(MoveKeysParams& params, std::map destTeams; destTeams.emplace_back(params.destinationTeam, true); + mgs->shardMapping->defineShard(params.keys); mgs->shardMapping->moveShard(params.keys, destTeams); auto randomRangeSize = @@ -926,9 +931,12 @@ void DDMockTxnProcessor::rawFinishMovement(MoveKeysParams& params, mgs->allServers.at(id).setShardStatus(params.keys, MockShardStatus::COMPLETED, mgs->restrictSize); } + // remove destination servers from source servers ASSERT_EQ(srcTeams.size(), 0); for (auto& id : srcTeams.front().servers) { - mgs->allServers.at(id).removeShard(params.keys); + if (!std::binary_search(params.destinationTeam.begin(), params.destinationTeam.end(), id)) { + mgs->allServers.at(id).removeShard(params.keys); + } } mgs->shardMapping->finishMove(params.keys); } diff --git a/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h b/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h index 0bb9d00d7b..326958bbb6 100644 --- a/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h +++ b/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h @@ -36,7 +36,9 @@ public: bool primary; Team() : primary(true) {} - Team(std::vector const& servers, bool primary) : servers(servers), primary(primary) {} + Team(std::vector const& servers, bool primary) : servers(servers), primary(primary) { + ASSERT(std::is_sorted(servers.begin(), servers.end())); + } bool operator<(const Team& r) const { if (servers == r.servers) diff --git a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp index 3b10176103..351c84d97d 100644 --- a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp +++ b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp @@ -46,6 +46,10 @@ bool compareShardInfo(const DDShardInfo& a, const DDShardInfo& other) { void verifyInitDataEqual(Reference real, Reference mock) { // Mock DD just care about the team list and server<->key mapping are consistent with the real cluster + if(real->shards.size() != mock->shards.size()) { + std::cout << "real.size: " << real->shards.size() << " mock.size: " << mock->shards.size() << "\n"; + ASSERT(false); + } ASSERT(std::equal( real->shards.begin(), real->shards.end(), mock->shards.begin(), mock->shards.end(), compareShardInfo)); std::cout << describe(real->primaryTeams) << " | " << describe(mock->primaryTeams) << "\n"; @@ -189,18 +193,17 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { verifyInitDataEqual(self->realInitDD, mockInitData); - // wait(timeout(reportErrors(self->worker(cx, self), "IDDTxnProcessorApiWorkload"), self->testDuration, - // Void())); + wait(timeout(reportErrors(self->worker(cx, self), "IDDTxnProcessorApiWorkload"), self->testDuration, Void())); // Always set the DD mode back, even if we die with an error TraceEvent("IDDTxnApiTestDoneMoving").log(); - wait(success(setDDMode(cx, 1))); - TraceEvent("IDDTxnApiTestDoneModeSetting").log(); + int oldValue = wait(setDDMode(cx, 1)); + TraceEvent("IDDTxnApiTestDoneModeSetting").detail("OldValue", oldValue); return Void(); } ACTOR static Future testRawMovementApi(IDDTxnProcessorApiWorkload* self) { - state TraceInterval relocateShardInterval("RelocateShard"); + state TraceInterval relocateShardInterval("RelocateShard_TestRawMovementApi"); state FlowLock fl1(1); state FlowLock fl2(1); state std::map emptyTssMapping; @@ -209,32 +212,33 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { params.startMoveKeysParallelismLock = &fl1; params.finishMoveKeysParallelismLock = &fl2; params.relocationIntervalId = relocateShardInterval.pairID; + TraceEvent(SevDebug, relocateShardInterval.begin(), relocateShardInterval.pairID); - // test start - self->mock->testRawStartMovement(params, emptyTssMapping); - wait(self->real->testRawStartMovement(params, emptyTssMapping)); + loop { + params.dataMovementComplete.reset(); + wait(store(params.lock, self->real->takeMoveKeysLock(UID()))); + try { + // test start + self->mock->testRawStartMovement(params, emptyTssMapping); + wait(self->real->testRawStartMovement(params, emptyTssMapping)); - // read initial data again - wait(readRealInitialDataDistribution(self)); - mockInitData = self->mock - ->getInitialDataDistribution(self->ddContext.id(), - self->ddContext.lock, - {}, - self->ddContext.ddEnabledState.get(), - SkipDDModeCheck::True) - .get(); + // test finish or started but cancelled movement + if (deterministicRandom()->coinflip()) { + CODE_PROBE(true, "RawMovementApi partial started"); + break; + } - verifyInitDataEqual(self->realInitDD, mockInitData); - - // test finish or started but cancelled movement - if (deterministicRandom()->coinflip()) { - CODE_PROBE(true, "RawMovementApi partial started"); - return Void(); + self->mock->testRawFinishMovement(params, emptyTssMapping); + wait(self->real->testRawFinishMovement(params, emptyTssMapping)); + break; + } catch (Error& e) { + if (e.code() != error_code_movekeys_conflict && e.code() != error_code_operation_failed) + throw; + wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); + // Keep trying to get the moveKeysLock + } } - self->mock->testRawFinishMovement(params, emptyTssMapping); - wait(self->real->testRawFinishMovement(params, emptyTssMapping)); - // read initial data again wait(readRealInitialDataDistribution(self)); mockInitData = self->mock @@ -246,6 +250,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { .get(); verifyInitDataEqual(self->realInitDD, mockInitData); + TraceEvent(SevDebug, relocateShardInterval.end(), relocateShardInterval.pairID); return Void(); } @@ -269,7 +274,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { } ACTOR static Future testMoveKeys(IDDTxnProcessorApiWorkload* self) { - state TraceInterval relocateShardInterval("RelocateShard"); + state TraceInterval relocateShardInterval("RelocateShard_TestMoveKeys"); state FlowLock fl1(1); state FlowLock fl2(1); state std::map emptyTssMapping; @@ -278,9 +283,22 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { params.startMoveKeysParallelismLock = &fl1; params.finishMoveKeysParallelismLock = &fl2; params.relocationIntervalId = relocateShardInterval.pairID; + TraceEvent(SevDebug, relocateShardInterval.begin(), relocateShardInterval.pairID); - self->mock->moveKeys(params); - wait(self->real->moveKeys(params)); + loop { + params.dataMovementComplete.reset(); + wait(store(params.lock, self->real->takeMoveKeysLock(UID()))); + try { + self->mock->moveKeys(params); + wait(self->real->moveKeys(params)); + break; + } catch (Error& e) { + if (e.code() != error_code_movekeys_conflict && e.code() != error_code_operation_failed) + throw; + wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); + // Keep trying to get the moveKeysLock + } + } // read initial data again wait(readRealInitialDataDistribution(self)); @@ -293,7 +311,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { .get(); verifyInitDataEqual(self->realInitDD, mockInitData); - + TraceEvent(SevDebug, relocateShardInterval.end(), relocateShardInterval.pairID); return Void(); } ACTOR Future worker(Database cx, IDDTxnProcessorApiWorkload* self) { diff --git a/fdbserver/workloads/MockDDTest.actor.cpp b/fdbserver/workloads/MockDDTest.actor.cpp index 209df66a5e..c9ca9d1f8e 100644 --- a/fdbserver/workloads/MockDDTest.actor.cpp +++ b/fdbserver/workloads/MockDDTest.actor.cpp @@ -28,13 +28,39 @@ #include "fdbserver/workloads/workloads.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. -struct MockDDTestWorkload { +struct MockDDTestWorkload : public TestWorkload { bool enabled; + bool simpleConfig; double testDuration; double meanDelay = 0.05; double maxKeyspace = 0.1; - DDSharedContext ddContext; std::shared_ptr mgs; std::shared_ptr mock; + + MockDDTestWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { + enabled = !clientId && g_network->isSimulated(); // only do this on the "first" client + simpleConfig = getOption(options, "simpleConfig"_sr, true); + testDuration = getOption(options, "testDuration"_sr, 10.0); + meanDelay = getOption(options, "meanDelay"_sr, meanDelay); + maxKeyspace = getOption(options, "maxKeyspace"_sr, maxKeyspace); + } + + Future setup(Database const& cx) override { + if (!enabled) + return Void(); + // initialize configuration + BasicTestConfig testConfig; + testConfig.simpleConfig = simpleConfig; + testConfig.minimumReplication = 1; + testConfig.logAntiQuorum = 0; + DatabaseConfiguration dbConfig = generateNormalDatabaseConfiguration(testConfig); + + // initialize mgs + mgs = std::make_shared(); + mgs->initializeAsEmptyDatabaseMGS(dbConfig); + mock = std::make_shared(mgs); + + return Void(); + } }; \ No newline at end of file From 13fae7ba8a263351256a0831c78af26727ab043c Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Wed, 2 Nov 2022 13:45:53 -0700 Subject: [PATCH 17/57] finish add MockDDTrackerShardEvaluatorWorkload --- fdbserver/MockGlobalState.actor.cpp | 10 +- fdbserver/include/fdbserver/MockGlobalState.h | 6 +- .../IDDTxnProcessorApiCorrectness.actor.cpp | 6 +- fdbserver/workloads/MockDDTest.actor.cpp | 147 +++++++++++++++++- 4 files changed, 157 insertions(+), 12 deletions(-) diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index caa9451dc5..388b3da93a 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -358,7 +358,9 @@ int64_t MockStorageServer::getRange(KeyRangeRef const& range, int64_t beginShard return totalByteSize; } -int64_t MockStorageServer::estimateRangeTotalBytes(KeyRangeRef const& range, int64_t beginShardBytes, int64_t endShardBytes) { +int64_t MockStorageServer::estimateRangeTotalBytes(KeyRangeRef const& range, + int64_t beginShardBytes, + int64_t endShardBytes) { int64_t totalByteSize = 0; auto ranges = serverKeys.intersectingRanges(range); @@ -635,7 +637,7 @@ int64_t MockGlobalState::get(KeyRef const& key) { auto ids = shardMapping->getSourceServerIdsFor(key); int64_t randomBytes = 0; if (deterministicRandom()->random01() > emptyProb) { - randomBytes = deterministicRandom()->randomInt64(minByteSize, maxByteSize); + randomBytes = deterministicRandom()->randomInt64(minByteSize, maxByteSize + 1); } // randomly choose 1 server auto id = deterministicRandom()->randomChoice(ids); @@ -671,7 +673,7 @@ int64_t MockGlobalState::set(KeyRef const& key, int valueSize, bool insert) { insert |= (deterministicRandom()->random01() < emptyProb); if (!insert) { - oldKvBytes = key.size() + deterministicRandom()->randomInt64(minByteSize, maxByteSize); + oldKvBytes = key.size() + deterministicRandom()->randomInt64(minByteSize, maxByteSize + 1); } for (auto& id : ids) { @@ -684,7 +686,7 @@ int64_t MockGlobalState::clear(KeyRef const& key) { auto ids = shardMapping->getSourceServerIdsFor(key); int64_t randomBytes = 0; if (deterministicRandom()->random01() > emptyProb) { - randomBytes = deterministicRandom()->randomInt64(minByteSize, maxByteSize) + key.size(); + randomBytes = deterministicRandom()->randomInt64(minByteSize, maxByteSize + 1) + key.size(); } for (auto& id : ids) { diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index b9e2125881..4ea121697d 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -179,7 +179,9 @@ protected: bool restrictSize); void twoWayShardSplitting(const KeyRangeRef& range, - const KeyRef& splitPoint, uint64_t rangeSize, bool restrictSize); + const KeyRef& splitPoint, + uint64_t rangeSize, + bool restrictSize); // Assuming the first and last shard within the range having size `beginShardBytes` and `endShardBytes` int64_t estimateRangeTotalBytes(KeyRangeRef const& range, int64_t beginShardBytes, int64_t endShardBytes); @@ -218,7 +220,7 @@ public: // user defined parameters for mock workload purpose double emptyProb; // probability of doing an empty read - uint32_t minByteSize, maxByteSize; // the size band of a point data operation + int minByteSize, maxByteSize; // the size band of a point data operation bool restrictSize = true; MockGlobalState() : shardMapping(new ShardsAffectedByTeamFailure) {} diff --git a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp index 351c84d97d..26cd3cf91c 100644 --- a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp +++ b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp @@ -46,7 +46,7 @@ bool compareShardInfo(const DDShardInfo& a, const DDShardInfo& other) { void verifyInitDataEqual(Reference real, Reference mock) { // Mock DD just care about the team list and server<->key mapping are consistent with the real cluster - if(real->shards.size() != mock->shards.size()) { + if (real->shards.size() != mock->shards.size()) { std::cout << "real.size: " << real->shards.size() << " mock.size: " << mock->shards.size() << "\n"; ASSERT(false); } @@ -223,7 +223,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { wait(self->real->testRawStartMovement(params, emptyTssMapping)); // test finish or started but cancelled movement - if (deterministicRandom()->coinflip()) { + if (true || deterministicRandom()->coinflip()) { CODE_PROBE(true, "RawMovementApi partial started"); break; } @@ -318,7 +318,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { state double lastTime = now(); state int choice = 0; loop { - choice = deterministicRandom()->randomInt(0, 2); + choice = deterministicRandom()->randomInt(0, 1); if (choice == 0) { // test rawStartMovement and rawFinishMovement separately wait(testRawMovementApi(self)); } else if (choice == 1) { // test moveKeys diff --git a/fdbserver/workloads/MockDDTest.actor.cpp b/fdbserver/workloads/MockDDTest.actor.cpp index c9ca9d1f8e..c46643566e 100644 --- a/fdbserver/workloads/MockDDTest.actor.cpp +++ b/fdbserver/workloads/MockDDTest.actor.cpp @@ -33,10 +33,17 @@ struct MockDDTestWorkload : public TestWorkload { bool simpleConfig; double testDuration; double meanDelay = 0.05; - double maxKeyspace = 0.1; + double maxKeyspace = 0.1; // range space + int maxByteSize = 1024, minByteSize = 32; // single point value size. The Key size is fixed to 16 bytes std::shared_ptr mgs; - std::shared_ptr mock; + Reference mock; + + KeyRange getRandomRange(double offset) const { + double len = deterministicRandom()->random01() * this->maxKeyspace; + double pos = offset + deterministicRandom()->random01() * (1.0 - len); + return KeyRangeRef(doubleToTestKey(pos), doubleToTestKey(pos + len)); + } MockDDTestWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { enabled = !clientId && g_network->isSimulated(); // only do this on the "first" client @@ -44,6 +51,8 @@ struct MockDDTestWorkload : public TestWorkload { testDuration = getOption(options, "testDuration"_sr, 10.0); meanDelay = getOption(options, "meanDelay"_sr, meanDelay); maxKeyspace = getOption(options, "maxKeyspace"_sr, maxKeyspace); + maxByteSize = getOption(options, "maxByteSize"_sr, maxByteSize); + minByteSize = getOption(options, "minByteSize"_sr, minByteSize); } Future setup(Database const& cx) override { @@ -58,9 +67,141 @@ struct MockDDTestWorkload : public TestWorkload { // initialize mgs mgs = std::make_shared(); + mgs->maxByteSize = maxByteSize; + mgs->minByteSize = minByteSize; mgs->initializeAsEmptyDatabaseMGS(dbConfig); - mock = std::make_shared(mgs); + mock = makeReference(mgs); return Void(); } +}; + +struct MockDDTrackerShardEvaluatorWorkload : public MockDDTestWorkload { + + DDSharedContext ddcx; + + PromiseStream output; + PromiseStream getShardMetrics; + PromiseStream getTopKMetrics; + PromiseStream getShardMetricsList; + PromiseStream> getAverageShardBytes; + + KeyRangeMap shards; + + ActorCollection actors; + uint64_t mockDbSize = 0; + const int keySize = 16; + + // --- test configs --- + + // Each key space is convert from an int N. [N, N+1) represent a key space. So at most we have 2G key spaces + int keySpaceCount = 0; + // 1. fixed -- each key space has fixed size. The size of each key space is calculated as minSpaceKeyCount * + // (minByteSize + 16) ; + // 2. linear -- from 0 to keySpaceCount the size of key space increase by size linearStride, from + // linearStartSize. Each value is fixed to minByteSize; + // 3. random -- each key space can has [minSpaceKeyCount, + // maxSpaceKeyCount] pairs and the size of value varies from [minByteSize, maxByteSize]; + Value keySpaceStrategy = "fixed"_sr; + int minSpaceKeyCount = 1000, maxSpaceKeyCount = 1000; + int linearStride = 10 * (1 << 20), linearStartSize = 10 * (1 << 20); + + MockDDTrackerShardEvaluatorWorkload(WorkloadContext const& wcx) + : MockDDTestWorkload(wcx), ddcx(deterministicRandom()->randomUniqueID()) { + keySpaceCount = getOption(options, "keySpaceCount"_sr, keySpaceCount); + keySpaceStrategy = getOption(options, "keySpaceStrategy"_sr, keySpaceStrategy); + minSpaceKeyCount = getOption(options, "minSpaceKeyCount"_sr, minSpaceKeyCount); + maxSpaceKeyCount = getOption(options, "maxSpaceKeyCount"_sr, maxSpaceKeyCount); + linearStride = getOption(options, "linearStride"_sr, linearStride); + linearStartSize = getOption(options, "linearStartSize"_sr, linearStartSize); + } + + void populateRandomStrategy() { + mockDbSize = 0; + for (int i = 0; i < keySpaceCount; ++i) { + int kCount = deterministicRandom()->randomInt(minSpaceKeyCount, maxSpaceKeyCount); + for (int j = 0; j < kCount; ++j) { + Key k = doubleToTestKey(i + deterministicRandom()->random01()); + auto vSize = deterministicRandom()->randomInt(minByteSize, maxByteSize + 1); + mgs->set(k, vSize, true); + mockDbSize += vSize + k.size(); + } + } + } + + void populateLinearStrategy() { + mockDbSize = 0; + auto pSize = minByteSize + keySize; + for (int i = 0; i < keySpaceCount; ++i) { + int kCount = std::ceil((linearStride * i + linearStartSize) * 1.0 / pSize); + for (int j = 0; j < kCount; ++j) { + Key k = doubleToTestKey(i + deterministicRandom()->random01()); + mgs->set(k, minByteSize, true); + } + mockDbSize += pSize * kCount; + } + } + + void populateFixedStrategy() { + auto pSize = minByteSize + keySize; + for (int i = 0; i < keySpaceCount; ++i) { + for (int j = 0; j < minSpaceKeyCount; ++j) { + Key k = doubleToTestKey(i + deterministicRandom()->random01()); + mgs->set(k, minByteSize, true); + } + } + mockDbSize = keySpaceCount * minSpaceKeyCount * pSize; + } + + void populateMgs() { + if (keySpaceStrategy == "linear") { + populateLinearStrategy(); + } else if (keySpaceStrategy == "fixed") { + populateFixedStrategy(); + } else if (keySpaceStrategy == "random") { + populateRandomStrategy(); + } + TraceEvent("PopulateMockGlobalState") + .detail("Strategy", keySpaceStrategy) + .detail("EstimatedDbSize", mockDbSize); + } + + Future setup(Database const& cx) override { + if (!enabled) + return Void(); + MockDDTestWorkload::setup(cx); + // populate mgs before run tracker + populateMgs(); + } + Future start(Database const& cx) override { + if (!enabled) + return Void(); + + // start mock servers + actors.add(waitForAll(mgs->runAllMockServers())); + + // start tracker + Reference initData = + mock->getInitialDataDistribution(ddcx.id(), ddcx.lock, {}, ddcx.ddEnabledState.get(), SkipDDModeCheck::True) + .get(); + Reference physicalShardCollection = makeReference(); + Reference> zeroHealthyTeams = makeReference>(false); + actors.add(dataDistributionTracker(initData, + mock, + output, + ddcx.shardsAffectedByTeamFailure, + physicalShardCollection, + getShardMetrics, + getTopKMetrics.getFuture(), + getShardMetricsList, + getAverageShardBytes.getFuture(), + Promise(), + zeroHealthyTeams, + ddcx.id(), + &shards, + &ddcx.trackerCancelled, + {})); + + return timeout(reportErrors(actors.getResult(), "MockDDTrackerShardEvaluatorWorkload"), testDuration, Void()); + } }; \ No newline at end of file From 38bd568e07fe157500198bf701575962cf368693 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Wed, 2 Nov 2022 21:43:27 -0700 Subject: [PATCH 18/57] change workload file structure --- .../include/fdbserver/workloads/MockDDTest.h | 46 ++++ fdbserver/workloads/MockDDTest.actor.cpp | 211 +++--------------- .../MockDDTrackerShardEvaluator.actor.cpp | 186 +++++++++++++++ 3 files changed, 263 insertions(+), 180 deletions(-) create mode 100644 fdbserver/include/fdbserver/workloads/MockDDTest.h create mode 100644 fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp diff --git a/fdbserver/include/fdbserver/workloads/MockDDTest.h b/fdbserver/include/fdbserver/workloads/MockDDTest.h new file mode 100644 index 0000000000..133f0b582e --- /dev/null +++ b/fdbserver/include/fdbserver/workloads/MockDDTest.h @@ -0,0 +1,46 @@ +/* + * MockDDTest.g + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef FOUNDATIONDB_MOCKDDTEST_H +#define FOUNDATIONDB_MOCKDDTEST_H + +#include "fdbserver/workloads/workloads.actor.h" +#include "fdbserver/DDSharedContext.h" +#include "fdbserver/DDTxnProcessor.h" +#include "fdbserver/MoveKeys.actor.h" +#include "fdbclient/StorageServerInterface.h" + +// other Mock DD workload can derive from this class to use the common settings +struct MockDDTestWorkload : public TestWorkload { + bool enabled; + bool simpleConfig; + double testDuration; + double meanDelay = 0.05; + double maxKeyspace = 0.1; // range space + int maxByteSize = 1024, minByteSize = 32; // single point value size. The Key size is fixed to 16 bytes + + std::shared_ptr mgs; + Reference mock; + + KeyRange getRandomRange(double offset) const; + MockDDTestWorkload(WorkloadContext const& wcx); + Future setup(Database const& cx) override; +}; + +#endif // FOUNDATIONDB_MOCKDDTEST_H diff --git a/fdbserver/workloads/MockDDTest.actor.cpp b/fdbserver/workloads/MockDDTest.actor.cpp index c46643566e..2577a23511 100644 --- a/fdbserver/workloads/MockDDTest.actor.cpp +++ b/fdbserver/workloads/MockDDTest.actor.cpp @@ -18,190 +18,41 @@ * limitations under the License. */ -#include "fdbserver/workloads/workloads.actor.h" -#include "fdbclient/FDBOptions.g.h" -#include "fdbclient/ManagementAPI.actor.h" -#include "fdbserver/DDSharedContext.h" -#include "fdbserver/DDTxnProcessor.h" -#include "fdbserver/MoveKeys.actor.h" -#include "fdbclient/StorageServerInterface.h" -#include "fdbserver/workloads/workloads.actor.h" +#include "fdbserver/workloads/MockDDTest.h" #include "flow/actorcompiler.h" // This must be the last #include. -struct MockDDTestWorkload : public TestWorkload { - bool enabled; - bool simpleConfig; - double testDuration; - double meanDelay = 0.05; - double maxKeyspace = 0.1; // range space - int maxByteSize = 1024, minByteSize = 32; // single point value size. The Key size is fixed to 16 bytes +KeyRange MockDDTestWorkload::getRandomRange(double offset) const { + double len = deterministicRandom()->random01() * this->maxKeyspace; + double pos = offset + deterministicRandom()->random01() * (1.0 - len); + return KeyRangeRef(doubleToTestKey(pos), doubleToTestKey(pos + len)); +} - std::shared_ptr mgs; - Reference mock; - - KeyRange getRandomRange(double offset) const { - double len = deterministicRandom()->random01() * this->maxKeyspace; - double pos = offset + deterministicRandom()->random01() * (1.0 - len); - return KeyRangeRef(doubleToTestKey(pos), doubleToTestKey(pos + len)); - } - - MockDDTestWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { - enabled = !clientId && g_network->isSimulated(); // only do this on the "first" client - simpleConfig = getOption(options, "simpleConfig"_sr, true); - testDuration = getOption(options, "testDuration"_sr, 10.0); - meanDelay = getOption(options, "meanDelay"_sr, meanDelay); - maxKeyspace = getOption(options, "maxKeyspace"_sr, maxKeyspace); - maxByteSize = getOption(options, "maxByteSize"_sr, maxByteSize); - minByteSize = getOption(options, "minByteSize"_sr, minByteSize); - } - - Future setup(Database const& cx) override { - if (!enabled) - return Void(); - // initialize configuration - BasicTestConfig testConfig; - testConfig.simpleConfig = simpleConfig; - testConfig.minimumReplication = 1; - testConfig.logAntiQuorum = 0; - DatabaseConfiguration dbConfig = generateNormalDatabaseConfiguration(testConfig); - - // initialize mgs - mgs = std::make_shared(); - mgs->maxByteSize = maxByteSize; - mgs->minByteSize = minByteSize; - mgs->initializeAsEmptyDatabaseMGS(dbConfig); - mock = makeReference(mgs); +MockDDTestWorkload::MockDDTestWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { + enabled = !clientId && g_network->isSimulated(); // only do this on the "first" client + simpleConfig = getOption(options, "simpleConfig"_sr, true); + testDuration = getOption(options, "testDuration"_sr, 10.0); + meanDelay = getOption(options, "meanDelay"_sr, meanDelay); + maxKeyspace = getOption(options, "maxKeyspace"_sr, maxKeyspace); + maxByteSize = getOption(options, "maxByteSize"_sr, maxByteSize); + minByteSize = getOption(options, "minByteSize"_sr, minByteSize); +} +Future MockDDTestWorkload::setup(Database const& cx) { + if (!enabled) return Void(); - } -}; + // initialize configuration + BasicTestConfig testConfig; + testConfig.simpleConfig = simpleConfig; + testConfig.minimumReplication = 1; + testConfig.logAntiQuorum = 0; + DatabaseConfiguration dbConfig = generateNormalDatabaseConfiguration(testConfig); -struct MockDDTrackerShardEvaluatorWorkload : public MockDDTestWorkload { + // initialize mgs + mgs = std::make_shared(); + mgs->maxByteSize = maxByteSize; + mgs->minByteSize = minByteSize; + mgs->initializeAsEmptyDatabaseMGS(dbConfig); + mock = makeReference(mgs); - DDSharedContext ddcx; - - PromiseStream output; - PromiseStream getShardMetrics; - PromiseStream getTopKMetrics; - PromiseStream getShardMetricsList; - PromiseStream> getAverageShardBytes; - - KeyRangeMap shards; - - ActorCollection actors; - uint64_t mockDbSize = 0; - const int keySize = 16; - - // --- test configs --- - - // Each key space is convert from an int N. [N, N+1) represent a key space. So at most we have 2G key spaces - int keySpaceCount = 0; - // 1. fixed -- each key space has fixed size. The size of each key space is calculated as minSpaceKeyCount * - // (minByteSize + 16) ; - // 2. linear -- from 0 to keySpaceCount the size of key space increase by size linearStride, from - // linearStartSize. Each value is fixed to minByteSize; - // 3. random -- each key space can has [minSpaceKeyCount, - // maxSpaceKeyCount] pairs and the size of value varies from [minByteSize, maxByteSize]; - Value keySpaceStrategy = "fixed"_sr; - int minSpaceKeyCount = 1000, maxSpaceKeyCount = 1000; - int linearStride = 10 * (1 << 20), linearStartSize = 10 * (1 << 20); - - MockDDTrackerShardEvaluatorWorkload(WorkloadContext const& wcx) - : MockDDTestWorkload(wcx), ddcx(deterministicRandom()->randomUniqueID()) { - keySpaceCount = getOption(options, "keySpaceCount"_sr, keySpaceCount); - keySpaceStrategy = getOption(options, "keySpaceStrategy"_sr, keySpaceStrategy); - minSpaceKeyCount = getOption(options, "minSpaceKeyCount"_sr, minSpaceKeyCount); - maxSpaceKeyCount = getOption(options, "maxSpaceKeyCount"_sr, maxSpaceKeyCount); - linearStride = getOption(options, "linearStride"_sr, linearStride); - linearStartSize = getOption(options, "linearStartSize"_sr, linearStartSize); - } - - void populateRandomStrategy() { - mockDbSize = 0; - for (int i = 0; i < keySpaceCount; ++i) { - int kCount = deterministicRandom()->randomInt(minSpaceKeyCount, maxSpaceKeyCount); - for (int j = 0; j < kCount; ++j) { - Key k = doubleToTestKey(i + deterministicRandom()->random01()); - auto vSize = deterministicRandom()->randomInt(minByteSize, maxByteSize + 1); - mgs->set(k, vSize, true); - mockDbSize += vSize + k.size(); - } - } - } - - void populateLinearStrategy() { - mockDbSize = 0; - auto pSize = minByteSize + keySize; - for (int i = 0; i < keySpaceCount; ++i) { - int kCount = std::ceil((linearStride * i + linearStartSize) * 1.0 / pSize); - for (int j = 0; j < kCount; ++j) { - Key k = doubleToTestKey(i + deterministicRandom()->random01()); - mgs->set(k, minByteSize, true); - } - mockDbSize += pSize * kCount; - } - } - - void populateFixedStrategy() { - auto pSize = minByteSize + keySize; - for (int i = 0; i < keySpaceCount; ++i) { - for (int j = 0; j < minSpaceKeyCount; ++j) { - Key k = doubleToTestKey(i + deterministicRandom()->random01()); - mgs->set(k, minByteSize, true); - } - } - mockDbSize = keySpaceCount * minSpaceKeyCount * pSize; - } - - void populateMgs() { - if (keySpaceStrategy == "linear") { - populateLinearStrategy(); - } else if (keySpaceStrategy == "fixed") { - populateFixedStrategy(); - } else if (keySpaceStrategy == "random") { - populateRandomStrategy(); - } - TraceEvent("PopulateMockGlobalState") - .detail("Strategy", keySpaceStrategy) - .detail("EstimatedDbSize", mockDbSize); - } - - Future setup(Database const& cx) override { - if (!enabled) - return Void(); - MockDDTestWorkload::setup(cx); - // populate mgs before run tracker - populateMgs(); - } - Future start(Database const& cx) override { - if (!enabled) - return Void(); - - // start mock servers - actors.add(waitForAll(mgs->runAllMockServers())); - - // start tracker - Reference initData = - mock->getInitialDataDistribution(ddcx.id(), ddcx.lock, {}, ddcx.ddEnabledState.get(), SkipDDModeCheck::True) - .get(); - Reference physicalShardCollection = makeReference(); - Reference> zeroHealthyTeams = makeReference>(false); - actors.add(dataDistributionTracker(initData, - mock, - output, - ddcx.shardsAffectedByTeamFailure, - physicalShardCollection, - getShardMetrics, - getTopKMetrics.getFuture(), - getShardMetricsList, - getAverageShardBytes.getFuture(), - Promise(), - zeroHealthyTeams, - ddcx.id(), - &shards, - &ddcx.trackerCancelled, - {})); - - return timeout(reportErrors(actors.getResult(), "MockDDTrackerShardEvaluatorWorkload"), testDuration, Void()); - } -}; \ No newline at end of file + return Void(); +} \ No newline at end of file diff --git a/fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp b/fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp new file mode 100644 index 0000000000..5988d15c64 --- /dev/null +++ b/fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp @@ -0,0 +1,186 @@ +/* + * MockDDTrackerShardEvaluator.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbserver/workloads/MockDDTest.h" +#include "flow/actorcompiler.h" // This must be the last #include. + +struct MockDDTrackerShardEvaluatorWorkload : public MockDDTestWorkload { + static constexpr auto NAME = "MockDDTrackerShardEvaluator"; + DDSharedContext ddcx; + + PromiseStream output; + PromiseStream getShardMetrics; + PromiseStream getTopKMetrics; + PromiseStream getShardMetricsList; + PromiseStream> getAverageShardBytes; + + KeyRangeMap shards; + + ActorCollection actors; + uint64_t mockDbSize = 0; + const int keySize = 16; + + std::map rsReasonCounts; + + // --- test configs --- + + // Each key space is convert from an int N. [N, N+1) represent a key space. So at most we have 2G key spaces + int keySpaceCount = 0; + // 1. fixed -- each key space has fixed size. The size of each key space is calculated as minSpaceKeyCount * + // (minByteSize + 16) ; + // 2. linear -- from 0 to keySpaceCount the size of key space increase by size linearStride, from + // linearStartSize. Each value is fixed to minByteSize; + // 3. random -- each key space can has [minSpaceKeyCount, + // maxSpaceKeyCount] pairs and the size of value varies from [minByteSize, maxByteSize]; + Value keySpaceStrategy = "fixed"_sr; + int minSpaceKeyCount = 1000, maxSpaceKeyCount = 1000; + int linearStride = 10 * (1 << 20), linearStartSize = 10 * (1 << 20); + + MockDDTrackerShardEvaluatorWorkload(WorkloadContext const& wcx) + : MockDDTestWorkload(wcx), ddcx(deterministicRandom()->randomUniqueID()) { + keySpaceCount = getOption(options, "keySpaceCount"_sr, keySpaceCount); + keySpaceStrategy = getOption(options, "keySpaceStrategy"_sr, keySpaceStrategy); + minSpaceKeyCount = getOption(options, "minSpaceKeyCount"_sr, minSpaceKeyCount); + maxSpaceKeyCount = getOption(options, "maxSpaceKeyCount"_sr, maxSpaceKeyCount); + linearStride = getOption(options, "linearStride"_sr, linearStride); + linearStartSize = getOption(options, "linearStartSize"_sr, linearStartSize); + } + + void populateRandomStrategy() { + mockDbSize = 0; + for (int i = 0; i < keySpaceCount; ++i) { + int kCount = deterministicRandom()->randomInt(minSpaceKeyCount, maxSpaceKeyCount); + for (int j = 0; j < kCount; ++j) { + Key k = doubleToTestKey(i + deterministicRandom()->random01()); + auto vSize = deterministicRandom()->randomInt(minByteSize, maxByteSize + 1); + mgs->set(k, vSize, true); + mockDbSize += vSize + k.size(); + } + } + } + + void populateLinearStrategy() { + mockDbSize = 0; + auto pSize = minByteSize + keySize; + for (int i = 0; i < keySpaceCount; ++i) { + int kCount = std::ceil((linearStride * i + linearStartSize) * 1.0 / pSize); + for (int j = 0; j < kCount; ++j) { + Key k = doubleToTestKey(i + deterministicRandom()->random01()); + mgs->set(k, minByteSize, true); + } + mockDbSize += pSize * kCount; + } + } + + void populateFixedStrategy() { + auto pSize = minByteSize + keySize; + for (int i = 0; i < keySpaceCount; ++i) { + for (int j = 0; j < minSpaceKeyCount; ++j) { + Key k = doubleToTestKey(i + deterministicRandom()->random01()); + mgs->set(k, minByteSize, true); + } + } + mockDbSize = keySpaceCount * minSpaceKeyCount * pSize; + } + + void populateMgs() { + // Will the sampling structure become too large? + std::cout << "MGS Populating ...\n"; + if (keySpaceStrategy == "linear") { + populateLinearStrategy(); + } else if (keySpaceStrategy == "fixed") { + populateFixedStrategy(); + } else if (keySpaceStrategy == "random") { + populateRandomStrategy(); + } + uint64_t totalSize = 0; + for (auto& server : mgs->allServers) { + totalSize = server.second.sumRangeSize(allKeys); + } + TraceEvent("PopulateMockGlobalState") + .detail("Strategy", keySpaceStrategy) + .detail("EstimatedDbSize", mockDbSize) + .detail("MGSReportedTotalSize", totalSize); + std::cout << "MGS Populated.\n"; + } + + Future setup(Database const& cx) override { + if (!enabled) + return Void(); + MockDDTestWorkload::setup(cx); + // populate mgs before run tracker + populateMgs(); + return Void(); + } + + ACTOR static Future relocateShardReporter(MockDDTrackerShardEvaluatorWorkload* self, + FutureStream input) { + loop choose { + when(RelocateShard rs = waitNext(input)) { ++self->rsReasonCounts[(int)rs.reason]; } + } + } + + Future start(Database const& cx) override { + if (!enabled) + return Void(); + + // start mock servers + actors.add(waitForAll(mgs->runAllMockServers())); + + // start tracker + Reference initData = + mock->getInitialDataDistribution(ddcx.id(), ddcx.lock, {}, ddcx.ddEnabledState.get(), SkipDDModeCheck::True) + .get(); + Reference physicalShardCollection = makeReference(); + Reference> zeroHealthyTeams = makeReference>(false); + actors.add(dataDistributionTracker(initData, + mock, + output, + ddcx.shardsAffectedByTeamFailure, + physicalShardCollection, + getShardMetrics, + getTopKMetrics.getFuture(), + getShardMetricsList, + getAverageShardBytes.getFuture(), + Promise(), + zeroHealthyTeams, + ddcx.id(), + &shards, + &ddcx.trackerCancelled, + {})); + actors.add(relocateShardReporter(this, output.getFuture())); + + return timeout(reportErrors(actors.getResult(), "MockDDTrackerShardEvaluatorWorkload"), testDuration, Void()); + } + + Future check(Database const& cx) override { + std::cout << "Check phase shards count: " << shards.size() << "\n"; + actors.clear(true); + return true; + } + + void getMetrics(std::vector& m) override { + for (auto& p : rsReasonCounts) { + m.push_back(PerfMetric(RelocateReason(p.first).toString(), p.second, Averaged::False)); + } + } +}; + +WorkloadFactory MockDDTrackerShardEvaluatorWorkload; \ No newline at end of file From 8ef0411b32c942dbf10237f22f10c4868a3083a7 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Thu, 3 Nov 2022 11:37:55 -0700 Subject: [PATCH 19/57] address code review comments and introduce offset parameter --- .../sphinx/source/command-line-interface.rst | 4 +- fdbcli/TenantCommands.actor.cpp | 76 ++++++++++++++----- fdbclient/include/fdbclient/KeyBackedTypes.h | 4 + .../fdbclient/MetaclusterManagement.actor.h | 62 ++++++++++----- .../workloads/MetaclusterConsistency.actor.h | 22 ------ .../MetaclusterManagementWorkload.actor.cpp | 32 ++++++++ 6 files changed, 136 insertions(+), 64 deletions(-) diff --git a/documentation/sphinx/source/command-line-interface.rst b/documentation/sphinx/source/command-line-interface.rst index a6c60d3f4f..14f8eaf1db 100644 --- a/documentation/sphinx/source/command-line-interface.rst +++ b/documentation/sphinx/source/command-line-interface.rst @@ -475,7 +475,7 @@ Deletes a tenant from the cluster. The tenant must be empty. list ^^^^ -``tenant list [BEGIN] [END] [LIMIT] [state=,,...]`` +``tenant list [BEGIN] [END] [limit=LIMIT] [offset=OFFSET] [state=,,...]`` Lists the tenants present in the cluster. @@ -485,6 +485,8 @@ Lists the tenants present in the cluster. ``LIMIT`` - the number of tenants to list. Defaults to 100. +``OFFSET`` - the number of items to skip over, starting from the beginning of the range. Defaults to 0. + ``STATE``` - TenantState(s) to filter the list with. Defaults to no filters. get diff --git a/fdbcli/TenantCommands.actor.cpp b/fdbcli/TenantCommands.actor.cpp index 00bd0e8309..c055878d80 100644 --- a/fdbcli/TenantCommands.actor.cpp +++ b/fdbcli/TenantCommands.actor.cpp @@ -87,6 +87,49 @@ parseTenantConfiguration(std::vector const& tokens, int startIndex, b return configParams; } +bool parseTenantListOptions(std::vector const& tokens, + int startIndex, + int& limit, + int& offset, + std::vector& filters) { + for (int tokenNum = startIndex; tokenNum < tokens.size(); ++tokenNum) { + Optional value; + StringRef token = tokens[tokenNum]; + StringRef param; + bool foundEquals; + param = token.eat("=", &foundEquals); + if (!foundEquals) { + fmt::print(stderr, + "ERROR: invalid option string `{}'. String must specify a value using `='.\n", + param.toString().c_str()); + return false; + } + value = token; + if (tokencmp(param, "limit")) { + limit = std::stoi(value.get().toString()); + if (limit <= 0) { + fmt::print(stderr, "ERROR: invalid limit `{}'\n", token.toString().c_str()); + return false; + } + } else if (tokencmp(param, "offset")) { + offset = std::stoi(value.get().toString()); + if (offset <= 0) { + fmt::print(stderr, "ERROR: invalid offset `{}'\n", token.toString().c_str()); + return false; + } + } else if (tokencmp(param, "state")) { + auto filterStrings = value.get().splitAny(","_sr); + for (auto sref : filterStrings) { + filters.push_back(TenantMapEntry::stringToTenantState(sref.toString())); + } + } else { + fmt::print(stderr, "ERROR: unrecognized parameter `{}'.\n", param.toString().c_str()); + return false; + } + } + return true; +} + Key makeConfigKey(TenantNameRef tenantName, StringRef configName) { return tenantConfigSpecialKeyRange.begin.withSuffix(Tuple().append(tenantName).append(configName).pack()); } @@ -225,18 +268,21 @@ ACTOR Future tenantDeleteCommand(Reference db, std::vector tenantListCommand(Reference db, std::vector tokens) { - if (tokens.size() > 6) { - fmt::print("Usage: tenant list [BEGIN] [END] [LIMIT] [state=,,...]\n\n"); + if (tokens.size() > 7) { + fmt::print("Usage: tenant list [BEGIN] [END] [limit=LIMIT] [offset=OFFSET] [state=,,...]\n\n"); fmt::print("Lists the tenants in a cluster.\n"); fmt::print("Only tenants in the range BEGIN - END will be printed.\n"); fmt::print("An optional LIMIT can be specified to limit the number of results (default 100).\n"); - fmt::print("Optional comma-separated state(s) can be provided to filter the list.\n"); + fmt::print("Optionally skip over the first OFFSET results (default 0).\n"); + fmt::print("Optional comma-separated tenant state(s) can be provided to filter the list.\n"); return false; } state StringRef beginTenant = ""_sr; state StringRef endTenant = "\xff\xff"_sr; state int limit = 100; + state int offset = 0; + state std::vector filters; if (tokens.size() >= 3) { beginTenant = tokens[2]; @@ -249,25 +295,11 @@ ACTOR Future tenantListCommand(Reference db, std::vector= 5) { - int n = 0; - if (sscanf(tokens[4].toString().c_str(), "%d%n", &limit, &n) != 1 || n != tokens[4].size() || limit <= 0) { - fmt::print(stderr, "ERROR: invalid limit `{}'\n", tokens[4].toString().c_str()); + if (!parseTenantListOptions(tokens, 4, limit, offset, filters)) { return false; } } - state std::vector filters; - if (tokens.size() == 6) { // state=ready,registering - if (!tokens[5].startsWith("state="_sr)) { - fmt::print(stderr, "ERROR: state filter must begin with `state='\n"); - return false; - } - auto filterStrings = tokens[5].removePrefix("state="_sr).splitAny(","_sr); - for (auto sref : filterStrings) { - filters.push_back(TenantMapEntry::stringToTenantState(sref.toString())); - } - } - state Key beginTenantKey = tenantMapSpecialKeyRange.begin.withSuffix(beginTenant); state Key endTenantKey = tenantMapSpecialKeyRange.begin.withSuffix(endTenant); state Reference tr = db->createTransaction(); @@ -279,7 +311,7 @@ ACTOR Future tenantListCommand(Reference db, std::vector tenantNames; if (clusterType == ClusterType::METACLUSTER_MANAGEMENT) { std::vector> tenants = - wait(MetaclusterAPI::listTenantsTransaction(tr, beginTenant, endTenant, limit, filters)); + wait(MetaclusterAPI::listTenants(db, beginTenant, endTenant, limit, offset, filters)); for (auto tenant : tenants) { tenantNames.push_back(tenant.first); } @@ -626,8 +658,10 @@ std::vector tenantHintGenerator(std::vector const& token } else if (tokencmp(tokens[1], "delete") && tokens.size() < 3) { static std::vector opts = { "" }; return std::vector(opts.begin() + tokens.size() - 2, opts.end()); - } else if (tokencmp(tokens[1], "list") && tokens.size() < 6) { - static std::vector opts = { "[BEGIN]", "[END]", "[LIMIT]", "[state=,,...]" }; + } else if (tokencmp(tokens[1], "list") && tokens.size() < 7) { + static std::vector opts = { + "[BEGIN]", "[END]", "[limit=LIMIT]", "[offset=OFFSET]", "[state=,,...]" + }; return std::vector(opts.begin() + tokens.size() - 2, opts.end()); } else if (tokencmp(tokens[1], "get") && tokens.size() < 4) { static std::vector opts = { "", "[JSON]" }; diff --git a/fdbclient/include/fdbclient/KeyBackedTypes.h b/fdbclient/include/fdbclient/KeyBackedTypes.h index 7446d52484..cb86aef2c9 100644 --- a/fdbclient/include/fdbclient/KeyBackedTypes.h +++ b/fdbclient/include/fdbclient/KeyBackedTypes.h @@ -168,6 +168,7 @@ template struct KeyBackedRangeResult { std::vector results; bool more; + Optional readThrough; }; // Convenient read/write access to a single value of type T stored at key @@ -368,6 +369,7 @@ public: rangeResult.results.push_back(PairType(key, val)); } rangeResult.more = kvs.more; + rangeResult.readThrough = kvs.readThrough; return rangeResult; })); } @@ -573,6 +575,7 @@ public: rangeResult.results.push_back(PairType(key, val)); } rangeResult.more = kvs.more; + rangeResult.readThrough = kvs.readThrough; return rangeResult; })); } @@ -660,6 +663,7 @@ public: rangeResult.results.push_back(Codec::unpack(kvs[i].key.removePrefix(prefix))); } rangeResult.more = kvs.more; + rangeResult.readThrough = kvs.readThrough; return rangeResult; })); } diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index 7d930a8d9d..941cc1338e 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1555,27 +1555,16 @@ Future deleteTenant(Reference db, TenantName name) { } ACTOR template -Future>> listTenantsTransaction( - Transaction tr, - TenantNameRef begin, - TenantNameRef end, - int limit, - std::vector filters = std::vector()) { +Future>> listTenantsTransaction(Transaction tr, + TenantNameRef begin, + TenantNameRef end, + int limit) { tr->setOption(FDBTransactionOptions::RAW_ACCESS); - KeyBackedRangeResult> results = + state KeyBackedRangeResult> results = wait(ManagementClusterMetadata::tenantMetadata().tenantMap.getRange(tr, begin, end, limit)); - if (filters.empty()) { - return results.results; - } - std::vector> filterResults; - for (auto pair : results.results) { - if (std::count(filters.begin(), filters.end(), pair.second.tenantState)) { - filterResults.push_back(pair); - } - } - return filterResults; + return results.results; } ACTOR template @@ -1584,6 +1573,7 @@ Future>> listTenants( TenantName begin, TenantName end, int limit, + int offset = 0, std::vector filters = std::vector()) { state Reference tr = db->createTransaction(); @@ -1591,9 +1581,41 @@ Future>> listTenants( try { tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE); - std::vector> tenants = - wait(listTenantsTransaction(tr, begin, end, limit, filters)); - return tenants; + if (offset == 0 && filters.empty()) { + std::vector> tenants = + wait(listTenantsTransaction(tr, begin, end, limit)); + return tenants; + } + tr->setOption(FDBTransactionOptions::RAW_ACCESS); + + state KeyBackedRangeResult> results = + wait(ManagementClusterMetadata::tenantMetadata().tenantMap.getRange(tr, begin, end, limit)); + state std::vector> filterResults; + state int count = 0; + loop { + for (auto pair : results.results) { + if (filters.empty() || std::count(filters.begin(), filters.end(), pair.second.tenantState)) { + ++count; + if (count > offset) { + filterResults.push_back(pair); + if (count - offset == limit) { + ASSERT(count - offset == filterResults.size()); + return filterResults; + } + } + } + } + if (!results.more) { + return filterResults; + } + if (results.readThrough.present()) { + begin = results.readThrough.get(); + } else { + begin = keyAfter(results.results.back().first); + } + wait(store(results, + ManagementClusterMetadata::tenantMetadata().tenantMap.getRange(tr, begin, end, limit))); + } } catch (Error& e) { wait(safeThreadFutureToFuture(tr->onError(e))); } diff --git a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h index 55b6aa863a..25f3fcae19 100644 --- a/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h +++ b/fdbserver/include/fdbserver/workloads/MetaclusterConsistency.actor.h @@ -71,20 +71,6 @@ private: ACTOR static Future loadManagementClusterMetadata(MetaclusterConsistencyCheck* self) { state Reference managementTr = self->managementDb->createTransaction(); state std::vector> tenantList; - state std::vector> tenantListReady; - state std::vector> tenantListOther; - - state std::vector readyFilter; - state std::vector otherFilter; - - readyFilter.push_back(TenantState::READY); - otherFilter.push_back(TenantState::REGISTERING); - otherFilter.push_back(TenantState::REMOVING); - otherFilter.push_back(TenantState::UPDATING_CONFIGURATION); - otherFilter.push_back(TenantState::RENAMING_FROM); - otherFilter.push_back(TenantState::RENAMING_TO); - otherFilter.push_back(TenantState::ERROR); - otherFilter.push_back(TenantState::INVALID); loop { try { @@ -115,12 +101,6 @@ private: store(tenantList, MetaclusterAPI::listTenantsTransaction( managementTr, ""_sr, "\xff\xff"_sr, metaclusterMaxTenants)) && - store(tenantListReady, - MetaclusterAPI::listTenantsTransaction( - managementTr, ""_sr, "\xff\xff"_sr, metaclusterMaxTenants, readyFilter)) && - store(tenantListOther, - MetaclusterAPI::listTenantsTransaction( - managementTr, ""_sr, "\xff\xff"_sr, metaclusterMaxTenants, otherFilter)) && store(self->managementMetadata.tenantGroups, MetaclusterAPI::ManagementClusterMetadata::tenantMetadata().tenantGroupMap.getRange( managementTr, {}, {}, metaclusterMaxTenants)) && @@ -133,8 +113,6 @@ private: } } - ASSERT(tenantListReady.size() + tenantListOther.size() == tenantList.size()); - self->managementMetadata.tenantMap = std::map(tenantList.begin(), tenantList.end()); for (auto t : self->managementMetadata.clusterTenantTuples.results) { diff --git a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp index 87d5a0419f..9022cbc80b 100644 --- a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp @@ -392,6 +392,33 @@ struct MetaclusterManagementWorkload : TestWorkload { return Void(); } + ACTOR static Future verifyListFilter(MetaclusterManagementWorkload* self, TenantName tenant) { + try { + state TenantMapEntry checkEntry = wait(MetaclusterAPI::getTenant(self->managementDb, tenant)); + state TenantState checkState = checkEntry.tenantState; + state std::vector> tenantList; + state std::vector filters; + filters.push_back(checkState); + wait(store(tenantList, + MetaclusterAPI::listTenants(self->managementDb, ""_sr, "\xff\xff"_sr, 10e6, 0, filters))); + ASSERT(!tenantList.empty()); + bool found = false; + for (auto pair : tenantList) { + ASSERT(pair.second.tenantState == checkState); + if (pair.first == tenant) { + found = true; + } + } + ASSERT(found); + } catch (Error& e) { + if (e.code() != error_code_tenant_not_found) { + TraceEvent(SevError, "VerifyListFilterFailure").error(e).detail("Tenant", tenant); + throw; + } + } + return Void(); + } + ACTOR static Future createTenant(MetaclusterManagementWorkload* self) { state TenantName tenant = self->chooseTenantName(); state Optional tenantGroup = self->chooseTenantGroup(); @@ -433,6 +460,7 @@ struct MetaclusterManagementWorkload : TestWorkload { break; } else { retried = true; + wait(verifyListFilter(self, tenant)); } } catch (Error& e) { if (e.code() == error_code_tenant_already_exists && retried && !exists) { @@ -533,6 +561,7 @@ struct MetaclusterManagementWorkload : TestWorkload { break; } else { retried = true; + wait(verifyListFilter(self, tenant)); } } catch (Error& e) { if (e.code() == error_code_tenant_not_found && retried && exists) { @@ -622,6 +651,7 @@ struct MetaclusterManagementWorkload : TestWorkload { if (result.present()) { break; } + wait(verifyListFilter(self, tenant)); } ASSERT(exists); @@ -716,6 +746,8 @@ struct MetaclusterManagementWorkload : TestWorkload { } retried = true; + wait(verifyListFilter(self, tenant)); + wait(verifyListFilter(self, newTenantName)); } catch (Error& e) { // If we retry the rename after it had succeeded, we will get an error that we should ignore if (e.code() == error_code_tenant_not_found && exists && !newTenantExists && retried) { From 96cf3f855b3bd8d33cc3aafe421ce8d7274fe5cb Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Mon, 7 Nov 2022 16:47:14 -0800 Subject: [PATCH 20/57] add rawMoveShard function --- fdbserver/DDTxnProcessor.actor.cpp | 4 +- fdbserver/MoveKeys.actor.cpp | 4 +- fdbserver/ShardsAffectedByTeamFailure.cpp | 20 ++++- .../fdbserver/ShardsAffectedByTeamFailure.h | 6 +- .../IDDTxnProcessorApiCorrectness.actor.cpp | 89 ++++++++++++++++--- tests/fast/IDDTxnProcessorApiCorrectness.toml | 4 + 6 files changed, 108 insertions(+), 19 deletions(-) diff --git a/fdbserver/DDTxnProcessor.actor.cpp b/fdbserver/DDTxnProcessor.actor.cpp index 991f4de95b..22a66d3f66 100644 --- a/fdbserver/DDTxnProcessor.actor.cpp +++ b/fdbserver/DDTxnProcessor.actor.cpp @@ -897,7 +897,9 @@ void DDMockTxnProcessor::rawStartMovement(MoveKeysParams& params, std::map destTeams; destTeams.emplace_back(params.destinationTeam, true); mgs->shardMapping->defineShard(params.keys); - mgs->shardMapping->moveShard(params.keys, destTeams); + auto teamPair = mgs->shardMapping->getTeamsFor(params.keys.begin); + auto& srcTeams = teamPair.second.empty() ? teamPair.first : teamPair.second; + mgs->shardMapping->rawMoveShard(params.keys, srcTeams, destTeams); auto randomRangeSize = deterministicRandom()->randomInt64(SERVER_KNOBS->MIN_SHARD_BYTES, SERVER_KNOBS->MAX_SHARD_BYTES); diff --git a/fdbserver/MoveKeys.actor.cpp b/fdbserver/MoveKeys.actor.cpp index cd4d6ac12b..1425ccb30a 100644 --- a/fdbserver/MoveKeys.actor.cpp +++ b/fdbserver/MoveKeys.actor.cpp @@ -580,8 +580,8 @@ ACTOR Future logWarningAfter(const char* context, double duration, std::ve // keyServer: map from keys to destination servers // serverKeys: two-dimension map: [servers][keys], value is the servers' state of having the keys: active(not-have), -// complete(already has), ""(). Set keyServers[keys].dest = servers Set serverKeys[servers][keys] = active for each -// subrange of keys that the server did not already have, complete for each subrange that it already has Set +// complete(already has), ""(). Set keyServers[keys].dest = servers. Set serverKeys[servers][keys] = active for each +// subrange of keys that the server did not already have, = complete for each subrange that it already has. Set // serverKeys[dest][keys] = "" for the dest servers of each existing shard in keys (unless that destination is a member // of servers OR if the source list is sufficiently degraded) ACTOR static Future startMoveKeys(Database occ, diff --git a/fdbserver/ShardsAffectedByTeamFailure.cpp b/fdbserver/ShardsAffectedByTeamFailure.cpp index bc1b150656..d774f658e7 100644 --- a/fdbserver/ShardsAffectedByTeamFailure.cpp +++ b/fdbserver/ShardsAffectedByTeamFailure.cpp @@ -107,7 +107,6 @@ void ShardsAffectedByTeamFailure::defineShard(KeyRangeRef keys) { check(); } -// Move keys to destinationTeams by updating shard_teams void ShardsAffectedByTeamFailure::moveShard(KeyRangeRef keys, std::vector destinationTeams) { /*TraceEvent("ShardsAffectedByTeamFailureMove") .detail("KeyBegin", keys.begin) @@ -158,6 +157,25 @@ void ShardsAffectedByTeamFailure::moveShard(KeyRangeRef keys, std::vector check(); } +void ShardsAffectedByTeamFailure::rawMoveShard(KeyRangeRef keys, + const std::vector& srcTeams, + const std::vector& destinationTeams) { + auto it = shard_teams.rangeContaining(keys.begin); + std::vector, std::vector>, KeyRange>> modifiedShards; + ASSERT(it->range() == keys); + + // erase the many teams that were associated with this one shard + for (auto t = it->value().first.begin(); t != it->value().first.end(); ++t) { + erase(*t, it->range()); + } + it.value() = std::make_pair(destinationTeams, srcTeams); + for(auto& team: destinationTeams) { + insert(team, keys); + } + + check(); +} + void ShardsAffectedByTeamFailure::finishMove(KeyRangeRef keys) { auto ranges = shard_teams.containedRanges(keys); for (auto it = ranges.begin(); it != ranges.end(); ++it) { diff --git a/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h b/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h index 326958bbb6..ca702ee4a4 100644 --- a/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h +++ b/fdbserver/include/fdbserver/ShardsAffectedByTeamFailure.h @@ -93,8 +93,12 @@ public: // Shard boundaries are modified in defineShard and the content of what servers correspond to each shard is a copy // or union of the shards already there void defineShard(KeyRangeRef keys); - // moveShard never change the shard boundary but just change the team value + // moveShard never change the shard boundary but just change the team value. Move keys to destinationTeams by + // updating shard_teams, the old destination teams will be added to new source teams. void moveShard(KeyRangeRef keys, std::vector destinationTeam); + // This function assume keys is exactly a shard in this mapping, this function set the srcTeam and destination + // directly without retaining the old destination team info + void rawMoveShard(KeyRangeRef keys, const std::vector& srcTeams, const std::vector& destinationTeam); // finishMove never change the shard boundary but just clear the old source team value void finishMove(KeyRangeRef keys); // a convenient function for (defineShard, moveShard, finishMove) pipeline diff --git a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp index 26cd3cf91c..aceae94ad7 100644 --- a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp +++ b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp @@ -28,13 +28,21 @@ #include "flow/actorcompiler.h" // This must be the last #include. #include "fdbclient/VersionedMap.h" +std::string describe(const DDShardInfo& a) { + std::string res = "key: " + a.key.toString() + "\n"; + res += "\tprimarySrc: " + describe(a.primarySrc) + "\n"; + res += "\tprimaryDest: " + describe(a.primaryDest) + "\n"; + res += "\tremoteSrc: " + describe(a.remoteSrc) + "\n"; + res += "\tremoteDest: " + describe(a.remoteDest) + "\n"; + return res; +} bool compareShardInfo(const DDShardInfo& a, const DDShardInfo& other) { // Mock DD just care about the server<->key mapping in DDShardInfo bool result = a.key == other.key && a.hasDest == other.hasDest && a.primaryDest == other.primaryDest && a.primarySrc == other.primarySrc && a.remoteSrc == other.remoteSrc && a.remoteDest == other.remoteDest; if (!result) { - std::cout << a.key.toHexString() << " | " << other.key.toHexString() << "\n"; + std::cout << a.key.toStringView() << " | " << other.key.toStringView() << "\n"; std::cout << a.hasDest << " | " << other.hasDest << "\n"; std::cout << describe(a.primarySrc) << " | " << describe(other.primarySrc) << "\n"; std::cout << describe(a.primaryDest) << " | " << describe(other.primaryDest) << "\n"; @@ -47,15 +55,25 @@ bool compareShardInfo(const DDShardInfo& a, const DDShardInfo& other) { void verifyInitDataEqual(Reference real, Reference mock) { // Mock DD just care about the team list and server<->key mapping are consistent with the real cluster if (real->shards.size() != mock->shards.size()) { - std::cout << "real.size: " << real->shards.size() << " mock.size: " << mock->shards.size() << "\n"; - ASSERT(false); + std::cout << "shardBoundaries: real v.s. mock \n"; + for (auto& shard : real->shards) { + std::cout << describe(shard); + } + std::cout << " ------- \n"; + for (auto& shard : mock->shards) { + std::cout << describe(shard); + } } + ASSERT_EQ(real->shards.size(), mock->shards.size()); ASSERT(std::equal( real->shards.begin(), real->shards.end(), mock->shards.begin(), mock->shards.end(), compareShardInfo)); - std::cout << describe(real->primaryTeams) << " | " << describe(mock->primaryTeams) << "\n"; - ASSERT(real->primaryTeams == mock->primaryTeams); + + if (real->primaryTeams != mock->primaryTeams) { + std::cout << describe(real->primaryTeams) << " | " << describe(mock->primaryTeams) << "\n"; + ASSERT(false); + } + ASSERT(real->remoteTeams == mock->remoteTeams); - ASSERT_EQ(real->shards.size(), mock->shards.size()); } // testers expose protected methods @@ -89,6 +107,7 @@ public: struct IDDTxnProcessorApiWorkload : TestWorkload { static constexpr auto NAME = "IDDTxnProcessorApiCorrectness"; bool enabled; + bool testStartOnly; double testDuration; double meanDelay = 0.05; double maxKeyspace = 0.1; @@ -99,12 +118,14 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { std::shared_ptr mock; Reference realInitDD; + std::set boundaries; IDDTxnProcessorApiWorkload(WorkloadContext const& wcx) : TestWorkload(wcx), ddContext(UID()) { enabled = !clientId && g_network->isSimulated(); // only do this on the "first" client testDuration = getOption(options, "testDuration"_sr, 10.0); meanDelay = getOption(options, "meanDelay"_sr, meanDelay); maxKeyspace = getOption(options, "maxKeyspace"_sr, maxKeyspace); + testStartOnly = getOption(options, "testStartOnly"_sr, false); } Future setup(Database const& cx) override { return enabled ? _setup(cx, this) : Void(); } @@ -135,13 +156,44 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { throw; } } + self->updateBoundaries(); return Void(); } + // according to boundaries, generate valid ranges for moveKeys operation KeyRange getRandomKeys() const { - double len = deterministicRandom()->random01() * this->maxKeyspace; - double pos = deterministicRandom()->random01() * (1.0 - len); - return KeyRangeRef(doubleToTestKey(pos), doubleToTestKey(pos + len)); + // merge or split operations + Key begin, end; + if (deterministicRandom()->coinflip()) { + // pure move + if (boundaries.size() == 2) { + begin = *boundaries.begin(); + end = *boundaries.rbegin(); + } else { + // merge shard + int a = deterministicRandom()->randomInt(0, boundaries.size() - 1); + int b = deterministicRandom()->randomInt(a + 1, boundaries.size()); + auto it = boundaries.begin(); + std::advance(it, a); + begin = *it; + std::advance(it, b - a); + end = *it; + } + } else { + // split + double start = deterministicRandom()->random01() * this->maxKeyspace; + begin = doubleToTestKey(start); + auto it = boundaries.upper_bound(begin); + ASSERT(it != boundaries.end()); // allKeys.end is larger than any random keys here + + double len = deterministicRandom()->random01() * (1 - maxKeyspace); + end = doubleToTestKey(start + len); + if (end > *it || deterministicRandom()->coinflip()) { + end = *it; + } + } + + return KeyRangeRef(begin, end); } std::vector getRandomTeam() { @@ -158,6 +210,13 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { return result; } + void updateBoundaries() { + boundaries.clear(); + for (auto& shard : realInitDD->shards) { + boundaries.insert(boundaries.end(), shard.key); + } + } + ACTOR Future _setup(Database cx, IDDTxnProcessorApiWorkload* self) { int oldMode = wait(setDDMode(cx, 0)); TraceEvent("IDDTxnApiTestStartModeSetting").detail("OldValue", oldMode).log(); @@ -169,7 +228,6 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { // FIXME: add support for generating random teams across DCs ASSERT_EQ(self->ddContext.usableRegions(), 1); wait(readRealInitialDataDistribution(self)); - return Void(); } @@ -212,7 +270,9 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { params.startMoveKeysParallelismLock = &fl1; params.finishMoveKeysParallelismLock = &fl2; params.relocationIntervalId = relocateShardInterval.pairID; - TraceEvent(SevDebug, relocateShardInterval.begin(), relocateShardInterval.pairID); + TraceEvent(SevDebug, relocateShardInterval.begin(), relocateShardInterval.pairID) + .detail("Key", params.keys) + .detail("Dest", params.destinationTeam); loop { params.dataMovementComplete.reset(); @@ -223,7 +283,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { wait(self->real->testRawStartMovement(params, emptyTssMapping)); // test finish or started but cancelled movement - if (true || deterministicRandom()->coinflip()) { + if (self->testStartOnly || deterministicRandom()->coinflip()) { CODE_PROBE(true, "RawMovementApi partial started"); break; } @@ -259,6 +319,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { KeyRange keys = self->getRandomKeys(); std::vector destTeams = self->getRandomTeam(); + std::sort(destTeams.begin(), destTeams.end()); return MoveKeysParams{ deterministicRandom()->randomUniqueID(), keys, destTeams, @@ -317,8 +378,9 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { ACTOR Future worker(Database cx, IDDTxnProcessorApiWorkload* self) { state double lastTime = now(); state int choice = 0; + state int maxChoice = self->testStartOnly ? 1 : 2; loop { - choice = deterministicRandom()->randomInt(0, 1); + choice = deterministicRandom()->randomInt(0, maxChoice); if (choice == 0) { // test rawStartMovement and rawFinishMovement separately wait(testRawMovementApi(self)); } else if (choice == 1) { // test moveKeys @@ -327,7 +389,6 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { ASSERT(false); } wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); - // Keep trying to get the moveKeysLock } } diff --git a/tests/fast/IDDTxnProcessorApiCorrectness.toml b/tests/fast/IDDTxnProcessorApiCorrectness.toml index b45755e833..2a8ad23ce4 100644 --- a/tests/fast/IDDTxnProcessorApiCorrectness.toml +++ b/tests/fast/IDDTxnProcessorApiCorrectness.toml @@ -2,9 +2,13 @@ generateFearless = false # prevent generating remote dc because in MGS there's no region setting yet disableTss = true # There's no TSS in MGS this prevent the DD operate TSS mapping +[[knobs]] +max_added_sources_multiplier = 0 # set to 0 because it's impossible to make sure SS and mock SS will finish fetch keys at the same time. + [[test]] testTitle = 'IDDTxnProcessorApiCorrectness' [[test.workload]] testName = 'IDDTxnProcessorApiCorrectness' testDuration = 50.0 + testStartOnly = true From fd425db1cfb1e03a860ef1e41a24d981e8cb84ad Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Tue, 8 Nov 2022 23:37:58 -0800 Subject: [PATCH 21/57] fix rawStartMovement bugs for merge shard; change the test toml file name --- fdbserver/DDTxnProcessor.actor.cpp | 38 ++++++++++++------- tests/CMakeLists.txt | 2 +- ...l => IDDTxnProcessorRawStartMovement.toml} | 2 +- 3 files changed, 27 insertions(+), 15 deletions(-) rename tests/fast/{IDDTxnProcessorApiCorrectness.toml => IDDTxnProcessorRawStartMovement.toml} (88%) diff --git a/fdbserver/DDTxnProcessor.actor.cpp b/fdbserver/DDTxnProcessor.actor.cpp index 22a66d3f66..78e663fceb 100644 --- a/fdbserver/DDTxnProcessor.actor.cpp +++ b/fdbserver/DDTxnProcessor.actor.cpp @@ -889,17 +889,29 @@ Future> DDMockTxnProcessor::getWorkers() const { } void DDMockTxnProcessor::rawStartMovement(MoveKeysParams& params, std::map& tssMapping) { - FlowLock::Releaser releaser(*params.startMoveKeysParallelismLock); - // Add wait(take) would always return immediately because there won’t be parallel rawStart or rawFinish in mock - // world due to the fact the following *mock* transaction code will always finish without coroutine switch. - ASSERT(params.startMoveKeysParallelismLock->take().isReady()); + // There won’t be parallel rawStart or rawFinish in mock world due to the fact the following *mock* transaction code + // will always finish without coroutine switch. + ASSERT(params.startMoveKeysParallelismLock->activePermits() == 0); std::vector destTeams; destTeams.emplace_back(params.destinationTeam, true); - mgs->shardMapping->defineShard(params.keys); - auto teamPair = mgs->shardMapping->getTeamsFor(params.keys.begin); - auto& srcTeams = teamPair.second.empty() ? teamPair.first : teamPair.second; - mgs->shardMapping->rawMoveShard(params.keys, srcTeams, destTeams); + // invariant: the splitting and merge operation won't happen at the same moveKeys action. For example, if [a,c) [c, + // e) exists, the params.keys won't be [b, d). + auto intersectRanges = mgs->shardMapping->intersectingRanges(params.keys); + // 1. splitting or just move a range. The new boundary need to be defined in startMovement + if (intersectRanges.begin().range().contains(params.keys)) { + mgs->shardMapping->defineShard(params.keys); + } + // 2. merge ops will coalesce the boundary in finishMovement; + intersectRanges = mgs->shardMapping->intersectingRanges(params.keys); + ASSERT(params.keys.begin == intersectRanges.begin().begin()); + ASSERT(params.keys.end == intersectRanges.end().begin()); + + for (auto it = intersectRanges.begin(); it != intersectRanges.end(); ++it) { + auto teamPair = mgs->shardMapping->getTeamsFor(it->begin()); + auto& srcTeams = teamPair.second.empty() ? teamPair.first : teamPair.second; + mgs->shardMapping->rawMoveShard(it->range(), srcTeams, destTeams); + } auto randomRangeSize = deterministicRandom()->randomInt64(SERVER_KNOBS->MIN_SHARD_BYTES, SERVER_KNOBS->MAX_SHARD_BYTES); @@ -912,15 +924,14 @@ void DDMockTxnProcessor::rawStartMovement(MoveKeysParams& params, std::map& tssMapping) { - FlowLock::Releaser releaser(*params.finishMoveKeysParallelismLock); - // Add wait(take) would always return immediately because there won’t be parallel rawStart or rawFinish in mock - // world due to the fact the following *mock* transaction code will always finish without coroutine switch. - ASSERT(params.finishMoveKeysParallelismLock->take().isReady()); + // There won’t be parallel rawStart or rawFinish in mock world due to the fact the following *mock* transaction code + // will always finish without coroutine switch. + ASSERT(params.finishMoveKeysParallelismLock->activePermits() == 0); // get source and dest teams auto [destTeams, srcTeams] = mgs->shardMapping->getTeamsForFirstShard(params.keys); - ASSERT_EQ(destTeams.size(), 0); + ASSERT_EQ(destTeams.size(), 1); // Will the multi-region or dynamic replica make destTeam.size() > 1? if (destTeams.front() != ShardsAffectedByTeamFailure::Team{ params.destinationTeam, true }) { TraceEvent(SevError, "MockRawFinishMovementError") .detail("Reason", "InconsistentDestinations") @@ -941,4 +952,5 @@ void DDMockTxnProcessor::rawFinishMovement(MoveKeysParams& params, } } mgs->shardMapping->finishMove(params.keys); + mgs->shardMapping->defineShard(params.keys); // coalesce for merge } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 85f05d1631..c49e58b14c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -170,7 +170,7 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES fast/MutationLogReaderCorrectness.toml) add_fdb_test(TEST_FILES fast/GetEstimatedRangeSize.toml) add_fdb_test(TEST_FILES fast/GetMappedRange.toml) - add_fdb_test(TEST_FILES fast/IDDTxnProcessorApiCorrectness.toml) + add_fdb_test(TEST_FILES fast/IDDTxnProcessorRawStartMovement.toml) add_fdb_test(TEST_FILES fast/PrivateEndpoints.toml) add_fdb_test(TEST_FILES fast/ProtocolVersion.toml) add_fdb_test(TEST_FILES fast/RandomSelector.toml) diff --git a/tests/fast/IDDTxnProcessorApiCorrectness.toml b/tests/fast/IDDTxnProcessorRawStartMovement.toml similarity index 88% rename from tests/fast/IDDTxnProcessorApiCorrectness.toml rename to tests/fast/IDDTxnProcessorRawStartMovement.toml index 2a8ad23ce4..8bec1e456a 100644 --- a/tests/fast/IDDTxnProcessorApiCorrectness.toml +++ b/tests/fast/IDDTxnProcessorRawStartMovement.toml @@ -11,4 +11,4 @@ testTitle = 'IDDTxnProcessorApiCorrectness' [[test.workload]] testName = 'IDDTxnProcessorApiCorrectness' testDuration = 50.0 - testStartOnly = true + testStartOnly = true # only test startMovement implementation From f08b2b86d9b1f8c4fe6c8323ac8e3f6e72b0fe29 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Wed, 9 Nov 2022 10:45:19 -0800 Subject: [PATCH 22/57] remove readthrough and have minimum limit for the getRange --- fdbclient/include/fdbclient/KeyBackedTypes.h | 4 ---- .../include/fdbclient/MetaclusterManagement.actor.h | 12 +++++------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/fdbclient/include/fdbclient/KeyBackedTypes.h b/fdbclient/include/fdbclient/KeyBackedTypes.h index cb86aef2c9..7446d52484 100644 --- a/fdbclient/include/fdbclient/KeyBackedTypes.h +++ b/fdbclient/include/fdbclient/KeyBackedTypes.h @@ -168,7 +168,6 @@ template struct KeyBackedRangeResult { std::vector results; bool more; - Optional readThrough; }; // Convenient read/write access to a single value of type T stored at key @@ -369,7 +368,6 @@ public: rangeResult.results.push_back(PairType(key, val)); } rangeResult.more = kvs.more; - rangeResult.readThrough = kvs.readThrough; return rangeResult; })); } @@ -575,7 +573,6 @@ public: rangeResult.results.push_back(PairType(key, val)); } rangeResult.more = kvs.more; - rangeResult.readThrough = kvs.readThrough; return rangeResult; })); } @@ -663,7 +660,6 @@ public: rangeResult.results.push_back(Codec::unpack(kvs[i].key.removePrefix(prefix))); } rangeResult.more = kvs.more; - rangeResult.readThrough = kvs.readThrough; return rangeResult; })); } diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index 941cc1338e..f8467d7e8c 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1589,7 +1589,8 @@ Future>> listTenants( tr->setOption(FDBTransactionOptions::RAW_ACCESS); state KeyBackedRangeResult> results = - wait(ManagementClusterMetadata::tenantMetadata().tenantMap.getRange(tr, begin, end, limit)); + wait(ManagementClusterMetadata::tenantMetadata().tenantMap.getRange( + tr, begin, end, std::max(limit + offset, 100))); state std::vector> filterResults; state int count = 0; loop { @@ -1608,13 +1609,10 @@ Future>> listTenants( if (!results.more) { return filterResults; } - if (results.readThrough.present()) { - begin = results.readThrough.get(); - } else { - begin = keyAfter(results.results.back().first); - } + begin = keyAfter(results.results.back().first); wait(store(results, - ManagementClusterMetadata::tenantMetadata().tenantMap.getRange(tr, begin, end, limit))); + ManagementClusterMetadata::tenantMetadata().tenantMap.getRange( + tr, begin, end, std::max(limit + offset, 100)))); } } catch (Error& e) { wait(safeThreadFutureToFuture(tr->onError(e))); From 62b88a07725df0b2251e03a62551f74d11d8fd4c Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Wed, 9 Nov 2022 11:34:47 -0800 Subject: [PATCH 23/57] wait on lock take --- fdbserver/DDTxnProcessor.actor.cpp | 29 +++++++++++++++---- fdbserver/include/fdbserver/DDTxnProcessor.h | 4 +-- .../IDDTxnProcessorApiCorrectness.actor.cpp | 16 +++++----- tests/CMakeLists.txt | 1 + tests/fast/IDDTxnProcessorMoveKeys.toml | 13 +++++++++ .../fast/IDDTxnProcessorRawStartMovement.toml | 2 +- 6 files changed, 49 insertions(+), 16 deletions(-) create mode 100644 tests/fast/IDDTxnProcessorMoveKeys.toml diff --git a/fdbserver/DDTxnProcessor.actor.cpp b/fdbserver/DDTxnProcessor.actor.cpp index 78e663fceb..9690f7afda 100644 --- a/fdbserver/DDTxnProcessor.actor.cpp +++ b/fdbserver/DDTxnProcessor.actor.cpp @@ -705,12 +705,12 @@ struct DDMockTxnProcessorImpl { std::sort(params.destinationTeam.begin(), params.destinationTeam.end()); std::sort(params.healthyDestinations.begin(), params.healthyDestinations.end()); - self->rawStartMovement(params, tssMapping); + wait(self->rawStartMovement(params, tssMapping)); ASSERT(tssMapping.empty()); wait(checkFetchingState(self, params.destinationTeam, params.keys)); - self->rawFinishMovement(params, tssMapping); + wait(self->rawFinishMovement(params, tssMapping)); if (!params.dataMovementComplete.isSet()) params.dataMovementComplete.send(Void()); return Void(); @@ -888,10 +888,14 @@ Future> DDMockTxnProcessor::getWorkers() const { return Future>(); } -void DDMockTxnProcessor::rawStartMovement(MoveKeysParams& params, std::map& tssMapping) { +ACTOR Future rawStartMovement(std::shared_ptr mgs, + MoveKeysParams params, + std::map tssMapping) { // There won’t be parallel rawStart or rawFinish in mock world due to the fact the following *mock* transaction code // will always finish without coroutine switch. ASSERT(params.startMoveKeysParallelismLock->activePermits() == 0); + wait(params.startMoveKeysParallelismLock->take(TaskPriority::DataDistributionLaunch)); + state FlowLock::Releaser releaser(*params.startMoveKeysParallelismLock); std::vector destTeams; destTeams.emplace_back(params.destinationTeam, true); @@ -920,13 +924,22 @@ void DDMockTxnProcessor::rawStartMovement(MoveKeysParams& params, std::maprestrictSize); server.signalFetchKeys(params.keys, randomRangeSize); } + return Void(); } -void DDMockTxnProcessor::rawFinishMovement(MoveKeysParams& params, - const std::map& tssMapping) { +Future DDMockTxnProcessor::rawStartMovement(MoveKeysParams& params, + std::map& tssMapping) { + return ::rawStartMovement(mgs, params, tssMapping); +} + +ACTOR Future rawFinishMovement(std::shared_ptr mgs, + MoveKeysParams params, + std::map tssMapping) { // There won’t be parallel rawStart or rawFinish in mock world due to the fact the following *mock* transaction code // will always finish without coroutine switch. ASSERT(params.finishMoveKeysParallelismLock->activePermits() == 0); + wait(params.finishMoveKeysParallelismLock->take(TaskPriority::DataDistributionLaunch)); + state FlowLock::Releaser releaser(*params.finishMoveKeysParallelismLock); // get source and dest teams auto [destTeams, srcTeams] = mgs->shardMapping->getTeamsForFirstShard(params.keys); @@ -953,4 +966,10 @@ void DDMockTxnProcessor::rawFinishMovement(MoveKeysParams& params, } mgs->shardMapping->finishMove(params.keys); mgs->shardMapping->defineShard(params.keys); // coalesce for merge + return Void(); +} + +Future DDMockTxnProcessor::rawFinishMovement(MoveKeysParams& params, + const std::map& tssMapping) { + return ::rawFinishMovement(mgs, params, tssMapping); } diff --git a/fdbserver/include/fdbserver/DDTxnProcessor.h b/fdbserver/include/fdbserver/DDTxnProcessor.h index 0142c95183..09a9f48160 100644 --- a/fdbserver/include/fdbserver/DDTxnProcessor.h +++ b/fdbserver/include/fdbserver/DDTxnProcessor.h @@ -292,9 +292,9 @@ public: Future> getWorkers() const override; protected: - void rawStartMovement(MoveKeysParams& params, std::map& tssMapping); + Future rawStartMovement(MoveKeysParams& params, std::map& tssMapping); - void rawFinishMovement(MoveKeysParams& params, const std::map& tssMapping); + Future rawFinishMovement(MoveKeysParams& params, const std::map& tssMapping); }; #endif // FOUNDATIONDB_DDTXNPROCESSOR_H diff --git a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp index aceae94ad7..bf5eccfa91 100644 --- a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp +++ b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp @@ -80,12 +80,12 @@ void verifyInitDataEqual(Reference real, Reference mgs = nullptr) : DDMockTxnProcessor(mgs) {} - void testRawStartMovement(MoveKeysParams& params, std::map& tssMapping) { - rawStartMovement(params, tssMapping); + Future testRawStartMovement(MoveKeysParams& params, std::map& tssMapping) { + return rawStartMovement(params, tssMapping); } - void testRawFinishMovement(MoveKeysParams& params, const std::map& tssMapping) { - rawFinishMovement(params, tssMapping); + Future testRawFinishMovement(MoveKeysParams& params, const std::map& tssMapping) { + return rawFinishMovement(params, tssMapping); } }; @@ -94,12 +94,12 @@ public: explicit DDTxnProcessorTester(Database cx) : DDTxnProcessor(cx) {} Future testRawStartMovement(MoveKeysParams& params, std::map& tssMapping) { - return this->rawStartMovement(params, tssMapping); + return rawStartMovement(params, tssMapping); } Future testRawFinishMovement(MoveKeysParams& params, const std::map& tssMapping) { - return this->rawFinishMovement(params, tssMapping); + return rawFinishMovement(params, tssMapping); } }; @@ -279,7 +279,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { wait(store(params.lock, self->real->takeMoveKeysLock(UID()))); try { // test start - self->mock->testRawStartMovement(params, emptyTssMapping); + wait(self->mock->testRawStartMovement(params, emptyTssMapping)); wait(self->real->testRawStartMovement(params, emptyTssMapping)); // test finish or started but cancelled movement @@ -288,7 +288,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { break; } - self->mock->testRawFinishMovement(params, emptyTssMapping); + wait(self->mock->testRawFinishMovement(params, emptyTssMapping)); wait(self->real->testRawFinishMovement(params, emptyTssMapping)); break; } catch (Error& e) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c49e58b14c..bc850f3333 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -171,6 +171,7 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES fast/GetEstimatedRangeSize.toml) add_fdb_test(TEST_FILES fast/GetMappedRange.toml) add_fdb_test(TEST_FILES fast/IDDTxnProcessorRawStartMovement.toml) + add_fdb_test(TEST_FILES fast/IDDTxnProcessorMoveKeys.toml IGNORE) add_fdb_test(TEST_FILES fast/PrivateEndpoints.toml) add_fdb_test(TEST_FILES fast/ProtocolVersion.toml) add_fdb_test(TEST_FILES fast/RandomSelector.toml) diff --git a/tests/fast/IDDTxnProcessorMoveKeys.toml b/tests/fast/IDDTxnProcessorMoveKeys.toml new file mode 100644 index 0000000000..9dedc67253 --- /dev/null +++ b/tests/fast/IDDTxnProcessorMoveKeys.toml @@ -0,0 +1,13 @@ +[configuration] +generateFearless = false # prevent generating remote dc because in MGS there's no region setting yet +disableTss = true # There's no TSS in MGS this prevent the DD operate TSS mapping + +[[knobs]] +max_added_sources_multiplier = 0 # set to 0 because it's impossible to make sure SS and mock SS will finish fetch keys at the same time. + +[[test]] +testTitle = 'IDDTxnProcessorMoveKeys' + + [[test.workload]] + testName = 'IDDTxnProcessorApiCorrectness' + testDuration = 50.0 diff --git a/tests/fast/IDDTxnProcessorRawStartMovement.toml b/tests/fast/IDDTxnProcessorRawStartMovement.toml index 8bec1e456a..73109583ee 100644 --- a/tests/fast/IDDTxnProcessorRawStartMovement.toml +++ b/tests/fast/IDDTxnProcessorRawStartMovement.toml @@ -6,7 +6,7 @@ disableTss = true # There's no TSS in MGS this prevent the DD operate TSS mappin max_added_sources_multiplier = 0 # set to 0 because it's impossible to make sure SS and mock SS will finish fetch keys at the same time. [[test]] -testTitle = 'IDDTxnProcessorApiCorrectness' +testTitle = 'IDDTxnProcessorRawStartMovement' [[test.workload]] testName = 'IDDTxnProcessorApiCorrectness' From 3fb12680e3459e428c3caad699686eb9c5e49995 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Wed, 9 Nov 2022 13:15:46 -0800 Subject: [PATCH 24/57] revert the mvccStorageBytes for write sample change and mark it with FIXME --- fdbclient/include/fdbclient/StorageServerInterface.h | 11 ++++++++++- fdbserver/MockGlobalState.actor.cpp | 4 ++-- fdbserver/storageserver.actor.cpp | 9 +++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/fdbclient/include/fdbclient/StorageServerInterface.h b/fdbclient/include/fdbclient/StorageServerInterface.h index b8ad4523c9..a1b6e0ce08 100644 --- a/fdbclient/include/fdbclient/StorageServerInterface.h +++ b/fdbclient/include/fdbclient/StorageServerInterface.h @@ -634,7 +634,7 @@ struct GetShardStateRequest { struct StorageMetrics { constexpr static FileIdentifier file_identifier = 13622226; int64_t bytes = 0; // total storage - int64_t writeBytesPerKSecond = 0; // network bandwidth (average over 10s) == write bandwidth through any IO devices + int64_t writeBytesPerKSecond = 0; // bytes write to SQ // FIXME: currently, iosPerKSecond is not used in DataDistribution calculations. int64_t iosPerKSecond = 0; @@ -1180,4 +1180,13 @@ struct StorageQueuingMetricsRequest { } }; +// Memory size for storing mutation in the mutation log and the versioned map. +inline int mvccStorageBytes(int mutationBytes) { + // Why * 2: + // - 1 insertion into version map costs 2 nodes in avg; + // - The mutation will be stored in both mutation log and versioned map; + return VersionedMap::overheadPerItem * 2 + + (mutationBytes + MutationRef::OVERHEAD_BYTES) * 2; +} + #endif diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index 388b3da93a..240ff27f6d 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -401,7 +401,8 @@ void MockStorageServer::clearRangeTotalBytes(KeyRangeRef const& range, int64_t b void MockStorageServer::notifyWriteMetrics(KeyRef const& key, int64_t size) { // update write bandwidth and iops as mock the cost of writing a mutation StorageMetrics s; - s.writeBytesPerKSecond = size + MutationRef::OVERHEAD_BYTES; + // FIXME: remove the / 2 and double the related knobs. + s.writeBytesPerKSecond = mvccStorageBytes(size) / 2; s.iosPerKSecond = 1; metrics.notify(key, s); } @@ -1000,7 +1001,6 @@ TEST_CASE("/MockGlobalState/MockStorageServer/DataOpsSet") { // If sampled ASSERT_EQ(res.first.get().bytes, testSize); ASSERT_GT(res.first.get().writeBytesPerKSecond, 0); - ASSERT_GT(res.first.get().iosPerKSecond, 0); } } return Void(); diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 266a841ca1..ded64b5d3b 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -534,12 +534,8 @@ const int VERSION_OVERHEAD = // createNewVersion(version+1) ], 64b // overhead for map -// Memory size for storing mutation in the mutation log and the versioned map. static int mvccStorageBytes(MutationRef const& m) { - // Why * 2: - // - 1 insertion into version map costs 2 nodes in avg; - // - The mutation will be stored in both mutation log and versioned map; - return VersionedMap::overheadPerItem * 2 + m.totalSize() * 2; + return mvccStorageBytes(m.param1.size() + m.param2.size()); } struct FetchInjectionInfo { @@ -5616,7 +5612,8 @@ void applyMutation(StorageServer* self, // m is expected to be in arena already // Clear split keys are added to arena StorageMetrics metrics; - metrics.writeBytesPerKSecond = m.totalSize(); // comparable to counter.mutationBytes + // FIXME: remove the / 2 and double the related knobs. + metrics.writeBytesPerKSecond = mvccStorageBytes(m) / 2; // comparable to counter.bytesInput / 2 metrics.iosPerKSecond = 1; self->metrics.notify(m.param1, metrics); From 7c9334121a1dba17bfcff0e9c62c598904cda4b7 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Thu, 10 Nov 2022 12:51:22 -0800 Subject: [PATCH 25/57] mark MoveKeysParams const& --- fdbserver/DDTxnProcessor.actor.cpp | 8 ++++---- fdbserver/MockGlobalState.actor.cpp | 16 +++++++++++++++- fdbserver/MoveKeys.actor.cpp | 5 +++-- fdbserver/include/fdbserver/DDTxnProcessor.h | 9 +++++---- fdbserver/include/fdbserver/MoveKeys.actor.h | 5 +++-- .../IDDTxnProcessorApiCorrectness.actor.cpp | 14 ++++++++++++-- 6 files changed, 42 insertions(+), 15 deletions(-) diff --git a/fdbserver/DDTxnProcessor.actor.cpp b/fdbserver/DDTxnProcessor.actor.cpp index 9690f7afda..614e15679b 100644 --- a/fdbserver/DDTxnProcessor.actor.cpp +++ b/fdbserver/DDTxnProcessor.actor.cpp @@ -669,12 +669,12 @@ Future> DDTxnProcessor::getWorkers() const { return ::getWorkers(cx); } -Future DDTxnProcessor::rawStartMovement(MoveKeysParams& params, +Future DDTxnProcessor::rawStartMovement(const MoveKeysParams& params, std::map& tssMapping) { return ::rawStartMovement(cx, params, tssMapping); } -Future DDTxnProcessor::rawFinishMovement(MoveKeysParams& params, +Future DDTxnProcessor::rawFinishMovement(const MoveKeysParams& params, const std::map& tssMapping) { return ::rawFinishMovement(cx, params, tssMapping); } @@ -927,7 +927,7 @@ ACTOR Future rawStartMovement(std::shared_ptr mgs, return Void(); } -Future DDMockTxnProcessor::rawStartMovement(MoveKeysParams& params, +Future DDMockTxnProcessor::rawStartMovement(const MoveKeysParams& params, std::map& tssMapping) { return ::rawStartMovement(mgs, params, tssMapping); } @@ -969,7 +969,7 @@ ACTOR Future rawFinishMovement(std::shared_ptr mgs, return Void(); } -Future DDMockTxnProcessor::rawFinishMovement(MoveKeysParams& params, +Future DDMockTxnProcessor::rawFinishMovement(const MoveKeysParams& params, const std::map& tssMapping) { return ::rawFinishMovement(mgs, params, tssMapping); } diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index 240ff27f6d..aabe9f379c 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -166,6 +166,7 @@ bool MockStorageServer::allShardStatusEqual(const KeyRangeRef& range, MockShardS bool MockStorageServer::allShardStatusIn(const KeyRangeRef& range, const std::set& status) { auto ranges = serverKeys.intersectingRanges(range); + TraceEvent("AllShardStatusIn", id).detail("RangesEmpty", ranges.empty()).detail("Range", range); ASSERT(!ranges.empty()); // at least the range is allKeys for (auto it = ranges.begin(); it != ranges.end(); ++it) { @@ -177,7 +178,15 @@ bool MockStorageServer::allShardStatusIn(const KeyRangeRef& range, const std::se void MockStorageServer::setShardStatus(const KeyRangeRef& range, MockShardStatus status, bool restrictSize) { auto ranges = serverKeys.intersectingRanges(range); - ASSERT(!ranges.empty()); + TraceEvent("SetShardStatus", id).detail("KeyRange", range).detail("Status", status); + + if (ranges.empty()) { + CODE_PROBE(true, "new shard is adding to server"); + serverKeys.insert(range, ShardInfo{ status, 0 }); + return; + } + + // change the old status if (ranges.begin().begin() < range.begin && ranges.begin().end() > range.end) { CODE_PROBE(true, "Implicitly split single shard to 3 pieces"); threeWayShardSplitting(ranges.begin().range(), range, ranges.begin().cvalue().shardSize, restrictSize); @@ -502,6 +511,11 @@ bool MockGlobalState::serverIsSourceForShard(const UID& serverId, KeyRangeRef sh } bool MockGlobalState::serverIsDestForShard(const UID& serverId, KeyRangeRef shard) { + TraceEvent(SevDebug, "ServerIsDestForShard") + .detail("ServerId", serverId) + .detail("Keys", shard) + .detail("Contains", allServers.count(serverId)); + if (!allServers.count(serverId)) return false; diff --git a/fdbserver/MoveKeys.actor.cpp b/fdbserver/MoveKeys.actor.cpp index 1425ccb30a..90169e2177 100644 --- a/fdbserver/MoveKeys.actor.cpp +++ b/fdbserver/MoveKeys.actor.cpp @@ -2476,7 +2476,8 @@ ACTOR Future cleanUpDataMove(Database occ, return Void(); } -Future rawStartMovement(Database occ, MoveKeysParams& params, std::map& tssMapping) { +Future rawStartMovement(Database occ, + const MoveKeysParams& params, std::map& tssMapping) { if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) { return startMoveShards(std::move(occ), params.dataMoveId, @@ -2499,7 +2500,7 @@ Future rawStartMovement(Database occ, MoveKeysParams& params, std::map rawFinishMovement(Database occ, - MoveKeysParams& params, + const MoveKeysParams& params, const std::map& tssMapping) { if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) { return finishMoveShards(std::move(occ), diff --git a/fdbserver/include/fdbserver/DDTxnProcessor.h b/fdbserver/include/fdbserver/DDTxnProcessor.h index 09a9f48160..503dcca108 100644 --- a/fdbserver/include/fdbserver/DDTxnProcessor.h +++ b/fdbserver/include/fdbserver/DDTxnProcessor.h @@ -225,9 +225,9 @@ public: Future> getWorkers() const override; protected: - Future rawStartMovement(MoveKeysParams& params, std::map& tssMapping); + Future rawStartMovement(const MoveKeysParams& params, std::map& tssMapping); - Future rawFinishMovement(MoveKeysParams& params, const std::map& tssMapping); + Future rawFinishMovement(const MoveKeysParams& params, const std::map& tssMapping); }; struct DDMockTxnProcessorImpl; @@ -237,6 +237,7 @@ struct DDMockTxnProcessorImpl; class DDMockTxnProcessor : public IDDTxnProcessor { friend struct DDMockTxnProcessorImpl; +protected: std::shared_ptr mgs; std::vector getDDShardInfos() const; @@ -292,9 +293,9 @@ public: Future> getWorkers() const override; protected: - Future rawStartMovement(MoveKeysParams& params, std::map& tssMapping); + Future rawStartMovement(const MoveKeysParams& params, std::map& tssMapping); - Future rawFinishMovement(MoveKeysParams& params, const std::map& tssMapping); + Future rawFinishMovement(const MoveKeysParams& params, const std::map& tssMapping); }; #endif // FOUNDATIONDB_DDTXNPROCESSOR_H diff --git a/fdbserver/include/fdbserver/MoveKeys.actor.h b/fdbserver/include/fdbserver/MoveKeys.actor.h index 0318a70644..24ed41cdf5 100644 --- a/fdbserver/include/fdbserver/MoveKeys.actor.h +++ b/fdbserver/include/fdbserver/MoveKeys.actor.h @@ -86,10 +86,11 @@ void seedShardServers(Arena& trArena, CommitTransactionRef& tr, std::vector rawStartMovement(Database occ, MoveKeysParams& params, std::map& tssMapping); +Future rawStartMovement(Database occ, + const MoveKeysParams& params, std::map& tssMapping); Future rawFinishMovement(Database occ, - MoveKeysParams& params, + const MoveKeysParams& params, const std::map& tssMapping); // Eventually moves the given keys to the given destination team // Caller is responsible for cancelling it before issuing an overlapping move, diff --git a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp index bf5eccfa91..8e72072f18 100644 --- a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp +++ b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp @@ -260,6 +260,12 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { return Void(); } + void verifyServerKeyDest(MoveKeysParams& params) { + // check destination servers + for(auto& id: params.destinationTeam) { + ASSERT(mgs->serverIsDestForShard(id, params.keys)); + } + } ACTOR static Future testRawMovementApi(IDDTxnProcessorApiWorkload* self) { state TraceInterval relocateShardInterval("RelocateShard_TestRawMovementApi"); state FlowLock fl1(1); @@ -282,6 +288,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { wait(self->mock->testRawStartMovement(params, emptyTssMapping)); wait(self->real->testRawStartMovement(params, emptyTssMapping)); + self->verifyServerKeyDest(params); // test finish or started but cancelled movement if (self->testStartOnly || deterministicRandom()->coinflip()) { CODE_PROBE(true, "RawMovementApi partial started"); @@ -344,13 +351,15 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { params.startMoveKeysParallelismLock = &fl1; params.finishMoveKeysParallelismLock = &fl2; params.relocationIntervalId = relocateShardInterval.pairID; - TraceEvent(SevDebug, relocateShardInterval.begin(), relocateShardInterval.pairID); + TraceEvent(SevDebug, relocateShardInterval.begin(), relocateShardInterval.pairID) + .detail("Key", params.keys) + .detail("Dest", params.destinationTeam); loop { params.dataMovementComplete.reset(); wait(store(params.lock, self->real->takeMoveKeysLock(UID()))); try { - self->mock->moveKeys(params); + wait(self->mock->moveKeys(params)); wait(self->real->moveKeys(params)); break; } catch (Error& e) { @@ -375,6 +384,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { TraceEvent(SevDebug, relocateShardInterval.end(), relocateShardInterval.pairID); return Void(); } + ACTOR Future worker(Database cx, IDDTxnProcessorApiWorkload* self) { state double lastTime = now(); state int choice = 0; From 4691a352151082a97b5e53ba94f2d426a7def9f3 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Thu, 10 Nov 2022 13:11:24 -0800 Subject: [PATCH 26/57] format code --- fdbserver/MoveKeys.actor.cpp | 3 ++- fdbserver/ShardsAffectedByTeamFailure.cpp | 2 +- fdbserver/include/fdbserver/DDTxnProcessor.h | 6 ++++-- fdbserver/include/fdbserver/MoveKeys.actor.h | 3 ++- fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp | 5 +++-- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fdbserver/MoveKeys.actor.cpp b/fdbserver/MoveKeys.actor.cpp index 90169e2177..1979173fdf 100644 --- a/fdbserver/MoveKeys.actor.cpp +++ b/fdbserver/MoveKeys.actor.cpp @@ -2477,7 +2477,8 @@ ACTOR Future cleanUpDataMove(Database occ, } Future rawStartMovement(Database occ, - const MoveKeysParams& params, std::map& tssMapping) { + const MoveKeysParams& params, + std::map& tssMapping) { if (SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) { return startMoveShards(std::move(occ), params.dataMoveId, diff --git a/fdbserver/ShardsAffectedByTeamFailure.cpp b/fdbserver/ShardsAffectedByTeamFailure.cpp index d774f658e7..b8ab69bab7 100644 --- a/fdbserver/ShardsAffectedByTeamFailure.cpp +++ b/fdbserver/ShardsAffectedByTeamFailure.cpp @@ -169,7 +169,7 @@ void ShardsAffectedByTeamFailure::rawMoveShard(KeyRangeRef keys, erase(*t, it->range()); } it.value() = std::make_pair(destinationTeams, srcTeams); - for(auto& team: destinationTeams) { + for (auto& team : destinationTeams) { insert(team, keys); } diff --git a/fdbserver/include/fdbserver/DDTxnProcessor.h b/fdbserver/include/fdbserver/DDTxnProcessor.h index 503dcca108..d350bda61f 100644 --- a/fdbserver/include/fdbserver/DDTxnProcessor.h +++ b/fdbserver/include/fdbserver/DDTxnProcessor.h @@ -227,7 +227,8 @@ public: protected: Future rawStartMovement(const MoveKeysParams& params, std::map& tssMapping); - Future rawFinishMovement(const MoveKeysParams& params, const std::map& tssMapping); + Future rawFinishMovement(const MoveKeysParams& params, + const std::map& tssMapping); }; struct DDMockTxnProcessorImpl; @@ -295,7 +296,8 @@ public: protected: Future rawStartMovement(const MoveKeysParams& params, std::map& tssMapping); - Future rawFinishMovement(const MoveKeysParams& params, const std::map& tssMapping); + Future rawFinishMovement(const MoveKeysParams& params, + const std::map& tssMapping); }; #endif // FOUNDATIONDB_DDTXNPROCESSOR_H diff --git a/fdbserver/include/fdbserver/MoveKeys.actor.h b/fdbserver/include/fdbserver/MoveKeys.actor.h index 24ed41cdf5..ed027a29fa 100644 --- a/fdbserver/include/fdbserver/MoveKeys.actor.h +++ b/fdbserver/include/fdbserver/MoveKeys.actor.h @@ -87,7 +87,8 @@ void seedShardServers(Arena& trArena, CommitTransactionRef& tr, std::vector rawStartMovement(Database occ, - const MoveKeysParams& params, std::map& tssMapping); + const MoveKeysParams& params, + std::map& tssMapping); Future rawFinishMovement(Database occ, const MoveKeysParams& params, diff --git a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp index 8e72072f18..e7909e9813 100644 --- a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp +++ b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp @@ -84,7 +84,8 @@ public: return rawStartMovement(params, tssMapping); } - Future testRawFinishMovement(MoveKeysParams& params, const std::map& tssMapping) { + Future testRawFinishMovement(MoveKeysParams& params, + const std::map& tssMapping) { return rawFinishMovement(params, tssMapping); } }; @@ -262,7 +263,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { void verifyServerKeyDest(MoveKeysParams& params) { // check destination servers - for(auto& id: params.destinationTeam) { + for (auto& id : params.destinationTeam) { ASSERT(mgs->serverIsDestForShard(id, params.keys)); } } From 93fb151e6c403fa7ad64df4659b45b155ed516fa Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Thu, 10 Nov 2022 14:29:01 -0800 Subject: [PATCH 27/57] add fdbcli error handling, remove invalid state and change some offset logic --- fdbcli/TenantCommands.actor.cpp | 11 ++++++++--- fdbclient/Tenant.cpp | 4 +--- .../include/fdbclient/MetaclusterManagement.actor.h | 11 ++++++++--- fdbclient/include/fdbclient/Tenant.h | 12 +----------- .../MetaclusterManagementWorkload.actor.cpp | 12 +++++++++--- 5 files changed, 27 insertions(+), 23 deletions(-) diff --git a/fdbcli/TenantCommands.actor.cpp b/fdbcli/TenantCommands.actor.cpp index c055878d80..daacb80fbd 100644 --- a/fdbcli/TenantCommands.actor.cpp +++ b/fdbcli/TenantCommands.actor.cpp @@ -113,14 +113,19 @@ bool parseTenantListOptions(std::vector const& tokens, } } else if (tokencmp(param, "offset")) { offset = std::stoi(value.get().toString()); - if (offset <= 0) { + if (offset < 0) { fmt::print(stderr, "ERROR: invalid offset `{}'\n", token.toString().c_str()); return false; } } else if (tokencmp(param, "state")) { auto filterStrings = value.get().splitAny(","_sr); - for (auto sref : filterStrings) { - filters.push_back(TenantMapEntry::stringToTenantState(sref.toString())); + try { + for (auto sref : filterStrings) { + filters.push_back(TenantMapEntry::stringToTenantState(sref.toString())); + } + } catch (Error& e) { + fmt::print(stderr, "ERROR: unrecognized tenant state(s) `{}'.\n", value.get().toString()); + return false; } } else { fmt::print(stderr, "ERROR: unrecognized parameter `{}'.\n", param.toString().c_str()); diff --git a/fdbclient/Tenant.cpp b/fdbclient/Tenant.cpp index 8ef4a8b9e3..b863a4ff85 100644 --- a/fdbclient/Tenant.cpp +++ b/fdbclient/Tenant.cpp @@ -64,8 +64,6 @@ std::string TenantMapEntry::tenantStateToString(TenantState tenantState) { return "renaming to"; case TenantState::ERROR: return "error"; - case TenantState::INVALID: - return "invalid"; default: UNREACHABLE(); } @@ -89,7 +87,7 @@ TenantState TenantMapEntry::stringToTenantState(std::string stateStr) { return TenantState::ERROR; } - return TenantState::INVALID; + throw invalid_option(); } std::string TenantMapEntry::tenantLockStateToString(TenantLockState tenantState) { diff --git a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h index f8467d7e8c..019849aba8 100644 --- a/fdbclient/include/fdbclient/MetaclusterManagement.actor.h +++ b/fdbclient/include/fdbclient/MetaclusterManagement.actor.h @@ -1581,9 +1581,14 @@ Future>> listTenants( try { tr->setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::READ_LOCK_AWARE); - if (offset == 0 && filters.empty()) { - std::vector> tenants = - wait(listTenantsTransaction(tr, begin, end, limit)); + if (filters.empty()) { + state std::vector> tenants; + wait(store(tenants, listTenantsTransaction(tr, begin, end, limit + offset))); + if (offset >= tenants.size()) { + tenants.clear(); + } else if (offset > 0) { + tenants.erase(tenants.begin(), tenants.begin() + offset); + } return tenants; } tr->setOption(FDBTransactionOptions::RAW_ACCESS); diff --git a/fdbclient/include/fdbclient/Tenant.h b/fdbclient/include/fdbclient/Tenant.h index 5dcfb8ce8a..d2b1e34c40 100644 --- a/fdbclient/include/fdbclient/Tenant.h +++ b/fdbclient/include/fdbclient/Tenant.h @@ -49,7 +49,6 @@ typedef Standalone TenantGroupName; // RENAMING_TO - the tenant is being created as a rename from an existing tenant and is awaiting the rename to complete // on the data cluster // ERROR - the tenant is in an error state -// INVALID - Unrecognized state - likely the result of a failed parsing // // A tenant in any configuration is allowed to be removed. Only tenants in the READY or UPDATING_CONFIGURATION phases // can have their configuration updated. A tenant must not exist or be in the REGISTERING phase to be created. To be @@ -58,16 +57,7 @@ typedef Standalone TenantGroupName; // // If an operation fails and the tenant is left in a non-ready state, re-running the same operation is legal. If // successful, the tenant will return to the READY state. -enum class TenantState { - REGISTERING, - READY, - REMOVING, - UPDATING_CONFIGURATION, - RENAMING_FROM, - RENAMING_TO, - ERROR, - INVALID -}; +enum class TenantState { REGISTERING, READY, REMOVING, UPDATING_CONFIGURATION, RENAMING_FROM, RENAMING_TO, ERROR }; // Represents the lock state the tenant could be in. // Can be used in conjunction with the other tenant states above. diff --git a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp index 9022cbc80b..de6f81b9dc 100644 --- a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp @@ -396,12 +396,18 @@ struct MetaclusterManagementWorkload : TestWorkload { try { state TenantMapEntry checkEntry = wait(MetaclusterAPI::getTenant(self->managementDb, tenant)); state TenantState checkState = checkEntry.tenantState; - state std::vector> tenantList; state std::vector filters; filters.push_back(checkState); - wait(store(tenantList, + state std::vector> tenantList; + // Possible to have changed state between now and the getTenant call above + state TenantMapEntry checkEntry2; + wait(store(checkEntry2, MetaclusterAPI::getTenant(self->managementDb, tenant)) && + store(tenantList, MetaclusterAPI::listTenants(self->managementDb, ""_sr, "\xff\xff"_sr, 10e6, 0, filters))); - ASSERT(!tenantList.empty()); + if (tenantList.empty()) { + ASSERT(checkEntry2.tenantState != checkState); + return Void(); + } bool found = false; for (auto pair : tenantList) { ASSERT(pair.second.tenantState == checkState); From 1816e5caa8be85fc26503f7ddc5c86cf490d8101 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Thu, 10 Nov 2022 15:22:01 -0800 Subject: [PATCH 28/57] setup the MGS after each test call --- fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp index e7909e9813..04b73e9a09 100644 --- a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp +++ b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp @@ -319,6 +319,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { verifyInitDataEqual(self->realInitDD, mockInitData); TraceEvent(SevDebug, relocateShardInterval.end(), relocateShardInterval.pairID); + self->mock->setupMockGlobalState(self->realInitDD); // in case SS remove or recruit return Void(); } @@ -383,6 +384,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { verifyInitDataEqual(self->realInitDD, mockInitData); TraceEvent(SevDebug, relocateShardInterval.end(), relocateShardInterval.pairID); + self->mock->setupMockGlobalState(self->realInitDD); // in case SS remove or recruit return Void(); } From 23706c957b00b85a74840cacf71f735dbe524877 Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Fri, 30 Sep 2022 10:39:43 -0700 Subject: [PATCH 29/57] Use DDSketch for Sample Data. --- fdbclient/NativeAPI.actor.cpp | 12 +- fdbclient/ServerKnobs.cpp | 4 +- .../include/fdbclient/BlobWorkerCommon.h | 12 +- fdbclient/include/fdbclient/DatabaseContext.h | 6 +- fdbclient/include/fdbclient/ServerKnobs.h | 4 +- fdbrpc/FlowTransport.actor.cpp | 4 +- fdbrpc/include/fdbrpc/AsyncFileKAIO.actor.h | 6 +- fdbrpc/include/fdbrpc/DDSketch.h | 311 ++++++++++++++++++ fdbrpc/include/fdbrpc/FlowTransport.h | 6 +- fdbrpc/include/fdbrpc/Stats.h | 35 +- fdbrpc/include/fdbrpc/TSSComparison.h | 17 +- fdbserver/BlobWorker.actor.cpp | 5 +- fdbserver/EncryptKeyProxy.actor.cpp | 6 +- fdbserver/GrvProxyServer.actor.cpp | 10 +- fdbserver/KeyValueStoreRocksDB.actor.cpp | 6 +- fdbserver/TLogServer.actor.cpp | 2 +- fdbserver/VersionedBTree.actor.cpp | 2 +- .../include/fdbserver/ProxyCommitData.actor.h | 8 +- .../workloads/ReadWriteWorkload.actor.h | 3 +- fdbserver/masterserver.actor.cpp | 6 +- fdbserver/storageserver.actor.cpp | 22 +- fdbserver/workloads/AtomicOps.actor.cpp | 1 - .../workloads/BackgroundSelectors.actor.cpp | 1 - fdbserver/workloads/BulkLoad.actor.cpp | 7 +- fdbserver/workloads/BulkSetup.actor.cpp | 74 +++++ fdbserver/workloads/DDBalance.actor.cpp | 6 +- .../workloads/FastTriggeredWatches.actor.cpp | 1 - fdbserver/workloads/FileSystem.actor.cpp | 8 +- fdbserver/workloads/IndexScan.actor.cpp | 1 - fdbserver/workloads/LowLatency.actor.cpp | 1 - fdbserver/workloads/Mako.actor.cpp | 6 +- fdbserver/workloads/MemoryLifetime.actor.cpp | 1 - fdbserver/workloads/MetricLogging.actor.cpp | 1 - fdbserver/workloads/QueuePush.actor.cpp | 6 +- fdbserver/workloads/RYWDisable.actor.cpp | 1 - fdbserver/workloads/RYWPerformance.actor.cpp | 1 - fdbserver/workloads/ReadAfterWrite.actor.cpp | 6 +- .../workloads/ReadHotDetection.actor.cpp | 2 +- fdbserver/workloads/ReadWrite.actor.cpp | 6 +- fdbserver/workloads/SkewedReadWrite.actor.cpp | 4 +- fdbserver/workloads/SnapTest.actor.cpp | 1 - fdbserver/workloads/StreamingRead.actor.cpp | 6 +- fdbserver/workloads/Throughput.actor.cpp | 7 +- fdbserver/workloads/Unreadable.actor.cpp | 1 - fdbserver/workloads/VersionStamp.actor.cpp | 1 - fdbserver/workloads/WatchAndWait.actor.cpp | 1 - fdbserver/workloads/Watches.actor.cpp | 8 +- .../WatchesSameKeyCorrectness.actor.cpp | 1 - fdbserver/workloads/WriteBandwidth.actor.cpp | 8 +- .../workloads/WriteTagThrottling.actor.cpp | 9 +- flow/Knobs.cpp | 5 +- flow/include/flow/Knobs.h | 5 +- flowbench/BenchSamples.cpp | 54 +++ tests/CMakeLists.txt | 1 + 54 files changed, 572 insertions(+), 157 deletions(-) create mode 100644 fdbrpc/include/fdbrpc/DDSketch.h create mode 100644 fdbserver/workloads/BulkSetup.actor.cpp diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 0345bfee39..17c5d749f5 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -578,7 +578,7 @@ void traceTSSErrors(const char* name, UID tssId, const std::unordered_map& sample) { +void traceSSOrTSSPercentiles(TraceEvent& ev, const std::string name, DDSketch& sample) { ev.detail(name + "Mean", sample.mean()); // don't log the larger percentiles unless we actually have enough samples to log the accurate percentile instead of // the largest sample in this window @@ -595,8 +595,8 @@ void traceSSOrTSSPercentiles(TraceEvent& ev, const std::string name, ContinuousS void traceTSSPercentiles(TraceEvent& ev, const std::string name, - ContinuousSample& ssSample, - ContinuousSample& tssSample) { + DDSketch& ssSample, + DDSketch& tssSample) { ASSERT(ssSample.getPopulationSize() == tssSample.getPopulationSize()); ev.detail(name + "Count", ssSample.getPopulationSize()); if (ssSample.getPopulationSize() > 0) { @@ -1534,12 +1534,12 @@ DatabaseContext::DatabaseContext(Reference resnapshotLock, Reference deltaWritesLock, double sampleLoggingInterval, - int fileOpLatencySampleSize, - int requestLatencySampleSize) + int fileOpLatencySketchAccuracy, + int requestLatencySketchAccuracy) : cc("BlobWorkerStats", id.toString()), s3PutReqs("S3PutReqs", cc), s3GetReqs("S3GetReqs", cc), s3DeleteReqs("S3DeleteReqs", cc), @@ -95,10 +95,10 @@ struct BlobWorkerStats { forceFlushCleanups("ForceFlushCleanups", cc), readDrivenCompactions("ReadDrivenCompactions", cc), numRangesAssigned(0), mutationBytesBuffered(0), activeReadRequests(0), granulesPendingSplitCheck(0), minimumCFVersion(0), cfVersionLag(0), notAtLatestChangeFeeds(0), lastResidentMemory(0), - snapshotBlobWriteLatencySample("SnapshotBlobWriteMetrics", id, sampleLoggingInterval, fileOpLatencySampleSize), - deltaBlobWriteLatencySample("DeltaBlobWriteMetrics", id, sampleLoggingInterval, fileOpLatencySampleSize), - reSnapshotLatencySample("GranuleResnapshotMetrics", id, sampleLoggingInterval, fileOpLatencySampleSize), - readLatencySample("GranuleReadLatencyMetrics", id, sampleLoggingInterval, requestLatencySampleSize), + snapshotBlobWriteLatencySample("SnapshotBlobWriteMetrics", id, sampleLoggingInterval, fileOpLatencySketchAccuracy), + deltaBlobWriteLatencySample("DeltaBlobWriteMetrics", id, sampleLoggingInterval, fileOpLatencySketchAccuracy), + reSnapshotLatencySample("GranuleResnapshotMetrics", id, sampleLoggingInterval, fileOpLatencySketchAccuracy), + readLatencySample("GranuleReadLatencyMetrics", id, sampleLoggingInterval, requestLatencySketchAccuracy), estimatedMaxResidentMemory(0), initialSnapshotLock(initialSnapshotLock), resnapshotLock(resnapshotLock), deltaWritesLock(deltaWritesLock) { specialCounter(cc, "NumRangesAssigned", [this]() { return this->numRangesAssigned; }); diff --git a/fdbclient/include/fdbclient/DatabaseContext.h b/fdbclient/include/fdbclient/DatabaseContext.h index f28da0399a..32b4a7c153 100644 --- a/fdbclient/include/fdbclient/DatabaseContext.h +++ b/fdbclient/include/fdbclient/DatabaseContext.h @@ -42,8 +42,8 @@ #include "fdbrpc/MultiInterface.h" #include "flow/TDMetric.actor.h" #include "fdbclient/EventTypes.actor.h" -#include "fdbrpc/ContinuousSample.h" #include "fdbrpc/Smoother.h" +#include "fdbrpc/DDSketch.h" class StorageServerInfo : public ReferencedInterface { public: @@ -565,7 +565,7 @@ public: Counter bgReadRowsCleared; Counter bgReadRowsInserted; Counter bgReadRowsUpdated; - ContinuousSample bgLatencies, bgGranulesPerRequest; + DDSketch bgLatencies, bgGranulesPerRequest; // Change Feed metrics. Omit change feed metrics from logging if not used bool usedAnyChangeFeeds; @@ -577,7 +577,7 @@ public: Counter feedPops; Counter feedPopsFallback; - ContinuousSample latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, + DDSketch latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, bytesPerCommit; int outstandingWatches; diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index da85f88625..9b5c2b939e 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -920,8 +920,8 @@ public: std::string REDWOOD_PRIORITY_LAUNCHS; // Server request latency measurement - int LATENCY_SAMPLE_SIZE; - int FILE_LATENCY_SAMPLE_SIZE; + double LATENCY_SKETCH_ACCURACY; + double FILE_LATENCY_SKETCH_ACCURACY; double LATENCY_METRICS_LOGGING_INTERVAL; // Cluster recovery diff --git a/fdbrpc/FlowTransport.actor.cpp b/fdbrpc/FlowTransport.actor.cpp index 8bb476efc9..23e6de902e 100644 --- a/fdbrpc/FlowTransport.actor.cpp +++ b/fdbrpc/FlowTransport.actor.cpp @@ -878,11 +878,11 @@ Peer::Peer(TransportData* transport, NetworkAddress const& destination) : transport(transport), destination(destination), compatible(true), outgoingConnectionIdle(true), lastConnectTime(0.0), reconnectionDelay(FLOW_KNOBS->INITIAL_RECONNECTION_TIME), peerReferences(-1), bytesReceived(0), bytesSent(0), lastDataPacketSentTime(now()), outstandingReplies(0), - pingLatencies(destination.isPublic() ? FLOW_KNOBS->PING_SAMPLE_AMOUNT : 1), lastLoggedTime(0.0), + pingLatencies(destination.isPublic() ? FLOW_KNOBS->PING_SKETCH_ACCURACY : 0.1), lastLoggedTime(0.0), lastLoggedBytesReceived(0), lastLoggedBytesSent(0), timeoutCount(0), protocolVersion(Reference>>(new AsyncVar>())), connectOutgoingCount(0), connectIncomingCount(0), connectFailedCount(0), - connectLatencies(destination.isPublic() ? FLOW_KNOBS->NETWORK_CONNECT_SAMPLE_AMOUNT : 1) { + connectLatencies(destination.isPublic() ? FLOW_KNOBS->PING_SKETCH_ACCURACY : 0.1) { IFailureMonitor::failureMonitor().setStatus(destination, FailureStatus(false)); } diff --git a/fdbrpc/include/fdbrpc/AsyncFileKAIO.actor.h b/fdbrpc/include/fdbrpc/AsyncFileKAIO.actor.h index 40a84e6d9e..4925990342 100644 --- a/fdbrpc/include/fdbrpc/AsyncFileKAIO.actor.h +++ b/fdbrpc/include/fdbrpc/AsyncFileKAIO.actor.h @@ -62,15 +62,15 @@ public: LatencySample readLatencySample = { "AsyncFileKAIOReadLatency", UID(), FLOW_KNOBS->KAIO_LATENCY_LOGGING_INTERVAL, - FLOW_KNOBS->KAIO_LATENCY_SAMPLE_SIZE }; + FLOW_KNOBS->KAIO_LATENCY_SKETCH_ACCURACY }; LatencySample writeLatencySample = { "AsyncFileKAIOWriteLatency", UID(), FLOW_KNOBS->KAIO_LATENCY_LOGGING_INTERVAL, - FLOW_KNOBS->KAIO_LATENCY_SAMPLE_SIZE }; + FLOW_KNOBS->KAIO_LATENCY_SKETCH_ACCURACY }; LatencySample syncLatencySample = { "AsyncFileKAIOSyncLatency", UID(), FLOW_KNOBS->KAIO_LATENCY_LOGGING_INTERVAL, - FLOW_KNOBS->KAIO_LATENCY_SAMPLE_SIZE }; + FLOW_KNOBS->KAIO_LATENCY_SKETCH_ACCURACY }; }; static AsyncFileKAIOMetrics& getMetrics() { diff --git a/fdbrpc/include/fdbrpc/DDSketch.h b/fdbrpc/include/fdbrpc/DDSketch.h new file mode 100644 index 0000000000..2bbe350ab8 --- /dev/null +++ b/fdbrpc/include/fdbrpc/DDSketch.h @@ -0,0 +1,311 @@ +/* + * DDSketch.h + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2020 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DDSKETCH_H +#define DDSKETCH_H +#include +#include +#include +#pragma once + +#include +#include +#include +#include +#include "flow/Error.h" +#include "flow/UnitTest.h" + +// A namespace for fast log() computation. +namespace fastLogger { +// Basically, the goal is to compute log(x)/log(r). +// For double, it is represented as 2^e*(1+s) (0<=s<1), so our goal becomes +// e*log(2)/log(r)*log(1+s), and we approximate log(1+s) with a cubic function. +// See more details on Datadog's paper, or CubicallyInterpolatedMapping.java in +// https://github.com/DataDog/sketches-java/ +inline const double correctingFactor = 1.00988652862227438516; // = 7 / (10 * log(2)); +constexpr inline const double A = 6.0 / 35.0, B = -3.0 / 5.0, C = 10.0 / 7.0; + +inline double fastlog(double value) { + int e; + double s = frexp(value, &e); + s = s * 2 - 1; + return ((A * s + B) * s + C) * s + e - 1; +} + +inline double reverseLog(double index) { + long exponent = floor(index); + // Derived from Cardano's formula + double d0 = B * B - 3 * A * C; + double d1 = 2 * B * B * B - 9 * A * B * C - 27 * A * A * (index - exponent); + double p = cbrt((d1 - sqrt(d1 * d1 - 4 * d0 * d0 * d0)) / 2); + double significandPlusOne = -(B + p + d0 / p) / (3 * A) + 1; + return ldexp(significandPlusOne / 2, exponent + 1); +} +}; // namespace fastLogger + +// DDSketch for non-negative numbers (those < EPS = 10^-18 are +// treated as 0, and huge numbers (>1/EPS) fail ASSERT). This is the base +// class without a concrete log() implementation. +template +class DDSketchBase { + + static constexpr T defaultMin() { return std::numeric_limits::max(); } + + static constexpr T defaultMax() { + if constexpr (std::is_floating_point_v) { + return -std::numeric_limits::max(); + } else { + return std::numeric_limits::min(); + } + } + +public: + explicit DDSketchBase(double errorGuarantee) + : errorGuarantee(errorGuarantee), populationSize(0), zeroPopulationSize(0), minValue(defaultMin()), + maxValue(defaultMax()), sum(T()) {} + + DDSketchBase& addSample(T sample) { + // Call it addSample for now, while it is not a sample anymore + if (!populationSize) + minValue = maxValue = sample; + + if (sample <= EPS) { + zeroPopulationSize++; + } else { + int index = static_cast(this)->getIndex(sample); + assert(index >= 0 && index < buckets.size()); + buckets[index]++; + } + + populationSize++; + sum += sample; + maxValue = std::max(maxValue, sample); + minValue = std::min(minValue, sample); + return *this; + } + + double mean() const { + if (populationSize == 0) + return 0; + return (double)sum / populationSize; + } + + T median() { return percentile(0.5); } + + T percentile(double percentile) { + assert(percentile >= 0 && percentile <= 1); + + if (populationSize == 0) + return T(); + uint64_t targetPercentilePopulation = percentile * (populationSize - 1); + // Now find the tPP-th (0-indexed) element + if (targetPercentilePopulation < zeroPopulationSize) + return T(0); + + int index = -1; + [[maybe_unused]] bool found = false; + if (percentile <= 0.5) { // count up + uint64_t count = zeroPopulationSize; + for (size_t i = 0; i < buckets.size(); i++) { + if (targetPercentilePopulation < count + buckets[i]) { + // count + buckets[i] = # of numbers so far (from the rightmost to + // this bucket, inclusive), so if target is in this bucket, it should + // means tPP < cnt + bck[i] + found = true; + index = i; + break; + } + count += buckets[i]; + } + } else { // and count down + uint64_t count = 0; + for (auto rit = buckets.rbegin(); rit != buckets.rend(); rit++) { + if (targetPercentilePopulation + count + *rit >= populationSize) { + // cnt + bkt[i] is # of numbers to the right of this bucket (incl.), + // so if target is not in this bucket (i.e., to the left of this + // bucket), it would be as right as the left bucket's rightmost + // number, so we would have tPP + cnt + bkt[i] < total population (tPP + // is 0-indexed), that means target is in this bucket if this + // condition is not satisfied. + found = true; + index = std::distance(rit, buckets.rend()) - 1; + break; + } + count += *rit; + } + } + assert(found); + return static_cast(this)->getValue(index); + } + + T min() const { return minValue; } + T max() const { return maxValue; } + + void clear() { + std::fill(buckets.begin(), buckets.end(), 0); + populationSize = zeroPopulationSize = 0; + sum = 0; + minValue = defaultMin(); + maxValue = defaultMax(); + } + + uint64_t getPopulationSize() const { return populationSize; } + + double getErrorGuarantee() const { return errorGuarantee; } + + size_t getBucketSize() const { return buckets.size(); } + + DDSketchBase& mergeWith(const DDSketchBase& anotherSketch) { + // Must have the same guarantee + assert(fabs(errorGuarantee - anotherSketch.errorGuarantee) < EPS && + anotherSketch.buckets.size() == buckets.size()); + for (size_t i = 0; i < anotherSketch.buckets.size(); i++) { + buckets[i] += anotherSketch.buckets[i]; + } + populationSize += anotherSketch.populationSize; + zeroPopulationSize += anotherSketch.zeroPopulationSize; + minValue = std::min(minValue, anotherSketch.minValue); + maxValue = std::max(maxValue, anotherSketch.maxValue); + sum += anotherSketch.sum; + return *this; + } + + constexpr static double EPS = 1e-18; // smaller numbers are considered as 0 +protected: + double errorGuarantee; // As defined in the paper + + uint64_t populationSize, zeroPopulationSize; // we need to separately count 0s + std::vector buckets; + T minValue, maxValue, sum; + void setBucketSize(int capacity) { buckets.resize(capacity, 0); } +}; + +// DDSketch with fast log implementation for float numbers +template +class DDSketch : public DDSketchBase, T> { +public: + explicit DDSketch(double errorGuarantee = 0.1) + : DDSketchBase, T>(errorGuarantee), gamma((1.0 + errorGuarantee) / (1.0 - errorGuarantee)), + multiplier(fastLogger::correctingFactor * log(2) / log(gamma)) { + offset = getIndex(1.0 / DDSketchBase, T>::EPS); + this->setBucketSize(2 * offset); + } + + int getIndex(T sample) { + static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems"); + return ceil(fastLogger::fastlog(sample) * multiplier) + offset; + } + + T getValue(int index) { return fastLogger::reverseLog((index - offset) / multiplier) * 2.0 / (1 + gamma); } + +private: + double gamma, multiplier; + int offset = 0; +}; + +// DDSketch with log. Slow and only use this when others doesn't work. +template +class DDSketchSlow : public DDSketchBase, T> { +public: + DDSketchSlow(double errorGuarantee = 0.1) + : DDSketchBase, T>(errorGuarantee), gamma((1.0 + errorGuarantee) / (1.0 - errorGuarantee)), + logGamma(log(gamma)) { + offset = getIndex(1.0 / DDSketchBase, T>::EPS) + 5; + this->setBucketSize(2 * offset); + } + + int getIndex(T sample) { return ceil(log(sample) / logGamma) + offset; } + + T getValue(int index) { return (T)(2.0 * pow(gamma, (index - offset)) / (1 + gamma)); } + +private: + double gamma, logGamma; + int offset = 0; +}; + +// DDSketch for unsigned int. Faster than the float version. Fixed accuracy. +class DDSketchFastUnsigned : public DDSketchBase { +public: + DDSketchFastUnsigned() : DDSketchBase(errorGuarantee) { this->setBucketSize(129); } + + int getIndex(unsigned sample) { + __uint128_t v = sample; + v *= v; + v *= v; // sample^4 + uint64_t low = (uint64_t)v, high = (uint64_t)(v >> 64); + + return 128 - (high == 0 ? ((low == 0 ? 64 : __builtin_clzll(low)) + 64) : __builtin_clzll(high)); + } + + unsigned getValue(int index) { + double r = 1, g = gamma; + while (index) { // quick power method for power(gamma, index) + if (index & 1) + r *= g; + g *= g; + index >>= 1; + } + // 2.0 * pow(gamma, index) / (1 + gamma) is what we need + return (unsigned)(2.0 * r / (1 + gamma) + 0.5); // round to nearest int + } + +private: + constexpr static double errorGuarantee = 0.08642723372; + // getIndex basically calc floor(log_2(x^4)) + 1, + // which is almost ceil(log_2(x^4)) as it only matters when x is a power of 2, + // and it does not change the error bound. Original sketch asks for + // ceil(log_r(x)), so we know r = pow(2, 1/4) = 1.189207115. And r = (1 + eG) + // / (1 - eG) so eG = 0.08642723372. + constexpr static double gamma = 1.189207115; +}; + +#endif + +TEST_CASE("/fdbrpc/ddsketch/accuracy") { + + int TRY = 100, SIZE = 1e6; + const int totalPercentiles = 7; + double targetPercentiles[totalPercentiles] = { .0001, .01, .1, .50, .90, .99, .9999 }; + double stat[totalPercentiles] = { 0 }; + for (int t = 0; t < TRY; t++) { + DDSketch dd; + std::vector nums; + for (int i = 0; i < SIZE; i++) { + static double a = 1, b = 1; // a skewed distribution + auto y = deterministicRandom()->random01(); + auto num = b / pow(1 - y, 1 / a); + nums.push_back(num); + dd.addSample(num); + } + std::sort(nums.begin(), nums.end()); + for (int percentID = 0; percentID < totalPercentiles; percentID++) { + double percentile = targetPercentiles[percentID]; + double ground = nums[percentile * (SIZE - 1)], ddvalue = dd.percentile(percentile); + double relativeError = fabs(ground - ddvalue) / ground; + stat[percentID] += relativeError; + } + } + + for (int percentID = 0; percentID < totalPercentiles; percentID++) { + printf("%.4lf per, relative error %.4lf\n", targetPercentiles[percentID], stat[percentID] / TRY); + } + + return Void(); +} diff --git a/fdbrpc/include/fdbrpc/FlowTransport.h b/fdbrpc/include/fdbrpc/FlowTransport.h index 0f0c3a52e5..d8b4652e20 100644 --- a/fdbrpc/include/fdbrpc/FlowTransport.h +++ b/fdbrpc/include/fdbrpc/FlowTransport.h @@ -24,7 +24,7 @@ #include -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbrpc/HealthMonitor.h" #include "flow/genericactors.actor.h" #include "flow/network.h" @@ -159,7 +159,7 @@ struct Peer : public ReferenceCounted { int64_t bytesSent; double lastDataPacketSentTime; int outstandingReplies; - ContinuousSample pingLatencies; + DDSketch pingLatencies; double lastLoggedTime; int64_t lastLoggedBytesReceived; int64_t lastLoggedBytesSent; @@ -171,7 +171,7 @@ struct Peer : public ReferenceCounted { int connectOutgoingCount; int connectIncomingCount; int connectFailedCount; - ContinuousSample connectLatencies; + DDSketch connectLatencies; Promise disconnect; explicit Peer(TransportData* transport, NetworkAddress const& destination); diff --git a/fdbrpc/include/fdbrpc/Stats.h b/fdbrpc/include/fdbrpc/Stats.h index 2247299580..46d334c6e0 100644 --- a/fdbrpc/include/fdbrpc/Stats.h +++ b/fdbrpc/include/fdbrpc/Stats.h @@ -38,7 +38,7 @@ MyCounters() : foo("foo", cc), bar("bar", cc), baz("baz", cc) {} #include #include "flow/flow.h" #include "flow/TDMetric.actor.h" -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" struct ICounter { // All counters have a name and value @@ -216,40 +216,39 @@ public: class LatencySample { public: - LatencySample(std::string name, UID id, double loggingInterval, int sampleSize) - : name(name), id(id), sampleStart(now()), sample(sampleSize), - latencySampleEventHolder(makeReference(id.toString() + "/" + name)) { + LatencySample(std::string name, UID id, double loggingInterval, double accuracy) + : name(name), id(id), sampleStart(now()), sketch(accuracy) { logger = recurring([this]() { logSample(); }, loggingInterval); } - void addMeasurement(double measurement) { sample.addSample(measurement); } + void addMeasurement(double measurement) { sketch.addSample(measurement); } private: std::string name; UID id; double sampleStart; - ContinuousSample sample; + DDSketch sketch; Future logger; Reference latencySampleEventHolder; void logSample() { TraceEvent(name.c_str(), id) - .detail("Count", sample.getPopulationSize()) + .detail("Count", sketch.getPopulationSize()) .detail("Elapsed", now() - sampleStart) - .detail("Min", sample.min()) - .detail("Max", sample.max()) - .detail("Mean", sample.mean()) - .detail("Median", sample.median()) - .detail("P25", sample.percentile(0.25)) - .detail("P90", sample.percentile(0.9)) - .detail("P95", sample.percentile(0.95)) - .detail("P99", sample.percentile(0.99)) - .detail("P99.9", sample.percentile(0.999)) - .trackLatest(latencySampleEventHolder->trackingKey); + .detail("Min", sketch.min()) + .detail("Max", sketch.max()) + .detail("Mean", sketch.mean()) + .detail("Median", sketch.median()) + .detail("P25", sketch.percentile(0.25)) + .detail("P90", sketch.percentile(0.9)) + .detail("P95", sketch.percentile(0.95)) + .detail("P99", sketch.percentile(0.99)) + .detail("P99.9", sketch.percentile(0.999)) + .trackLatest(id.toString() + "/" + name); - sample.clear(); + sketch.clear(); sampleStart = now(); } }; diff --git a/fdbrpc/include/fdbrpc/TSSComparison.h b/fdbrpc/include/fdbrpc/TSSComparison.h index 3c0765c948..7fcc84499b 100644 --- a/fdbrpc/include/fdbrpc/TSSComparison.h +++ b/fdbrpc/include/fdbrpc/TSSComparison.h @@ -25,7 +25,6 @@ #ifndef FDBRPC_TSS_COMPARISON_H #define FDBRPC_TSS_COMPARISON_H -#include "fdbrpc/ContinuousSample.h" #include "fdbrpc/Stats.h" // refcounted + noncopyable because both DatabaseContext and individual endpoints share ownership @@ -48,15 +47,15 @@ struct TSSMetrics : ReferenceCounted, NonCopyable { Counter mismatches; // We could probably just ignore getKey as it's seldom used? - ContinuousSample SSgetValueLatency; - ContinuousSample SSgetKeyLatency; - ContinuousSample SSgetKeyValuesLatency; - ContinuousSample SSgetMappedKeyValuesLatency; + DDSketch SSgetValueLatency; + DDSketch SSgetKeyLatency; + DDSketch SSgetKeyValuesLatency; + DDSketch SSgetMappedKeyValuesLatency; - ContinuousSample TSSgetValueLatency; - ContinuousSample TSSgetKeyLatency; - ContinuousSample TSSgetKeyValuesLatency; - ContinuousSample TSSgetMappedKeyValuesLatency; + DDSketch TSSgetValueLatency; + DDSketch TSSgetKeyLatency; + DDSketch TSSgetKeyValuesLatency; + DDSketch TSSgetMappedKeyValuesLatency; std::unordered_map ssErrorsByCode; std::unordered_map tssErrorsByCode; diff --git a/fdbserver/BlobWorker.actor.cpp b/fdbserver/BlobWorker.actor.cpp index ccd7284fe1..157c34bf72 100644 --- a/fdbserver/BlobWorker.actor.cpp +++ b/fdbserver/BlobWorker.actor.cpp @@ -303,8 +303,8 @@ struct BlobWorkerData : NonCopyable, ReferenceCounted { resnapshotLock, deltaWritesLock, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->FILE_LATENCY_SAMPLE_SIZE, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->FILE_LATENCY_SKETCH_ACCURACY, + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), isEncryptionEnabled(isEncryptionOpSupported(EncryptOperationType::BLOB_GRANULE_ENCRYPTION)) {} bool managerEpochOk(int64_t epoch) { @@ -1750,7 +1750,6 @@ bool granuleCanRetry(const Error& e) { case error_code_http_request_failed: case error_code_connection_failed: case error_code_lookup_failed: // dns - case error_code_platform_error: // injected faults return true; default: return false; diff --git a/fdbserver/EncryptKeyProxy.actor.cpp b/fdbserver/EncryptKeyProxy.actor.cpp index 7eb8b264a5..37c085db98 100644 --- a/fdbserver/EncryptKeyProxy.actor.cpp +++ b/fdbserver/EncryptKeyProxy.actor.cpp @@ -244,15 +244,15 @@ public: kmsLookupByIdsReqLatency("EKPKmsLookupByIdsReqLatency", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), kmsLookupByDomainIdsReqLatency("EKPKmsLookupByDomainIdsReqLatency", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), kmsBlobMetadataReqLatency("EKPKmsBlobMetadataReqLatency", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE) {} + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY) {} EncryptBaseCipherDomainIdKeyIdCacheKey getBaseCipherDomainIdKeyIdCacheKey( const EncryptCipherDomainId domainId, diff --git a/fdbserver/GrvProxyServer.actor.cpp b/fdbserver/GrvProxyServer.actor.cpp index ebca499738..49fbe4445b 100644 --- a/fdbserver/GrvProxyServer.actor.cpp +++ b/fdbserver/GrvProxyServer.actor.cpp @@ -117,20 +117,20 @@ struct GrvProxyStats { defaultTxnGRVTimeInQueue("DefaultTxnGRVTimeInQueue", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), batchTxnGRVTimeInQueue("BatchTxnGRVTimeInQueue", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), grvLatencyBands("GRVLatencyBands", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY), grvLatencySample("GRVLatencyMetrics", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), grvBatchLatencySample("GRVBatchLatencyMetrics", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), recentRequests(0), lastBucketBegin(now()), bucketInterval(FLOW_KNOBS->BASIC_LOAD_BALANCE_UPDATE_RATE / FLOW_KNOBS->BASIC_LOAD_BALANCE_BUCKETS), grvConfirmEpochLiveDist( @@ -215,7 +215,7 @@ struct GrvProxyData { versionVectorSizeOnGRVReply("VersionVectorSizeOnGRVReply", dbgid, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), updateCommitRequests(0), lastCommitTime(0), version(0), minKnownCommittedVersion(invalidVersion), tagThrottler(SERVER_KNOBS->PROXY_MAX_TAG_THROTTLE_DURATION) {} }; diff --git a/fdbserver/KeyValueStoreRocksDB.actor.cpp b/fdbserver/KeyValueStoreRocksDB.actor.cpp index 5bf5efd1d8..490f2bfa4b 100644 --- a/fdbserver/KeyValueStoreRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreRocksDB.actor.cpp @@ -111,15 +111,15 @@ SharedRocksDBState::SharedRocksDBState(UID id) readOptions(initialReadOptions()), commitLatency(LatencySample("RocksDBCommitLatency", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE)), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY)), commitQueueLatency(LatencySample("RocksDBCommitQueueLatency", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE)), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY)), dbWriteLatency(LatencySample("RocksDBWriteLatency", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE)) {} + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY)) {} rocksdb::ColumnFamilyOptions SharedRocksDBState::initialCfOptions() { rocksdb::ColumnFamilyOptions options; diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index dd2e83e360..1c5ddcab0c 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1825,7 +1825,7 @@ Future tLogPeekMessages(PromiseType replyPromise, UID ssID = nondeterministicRandom()->randomUniqueID(); std::string s = "BlockingPeekLatencies-" + reqTag.toString(); logData->blockingPeekLatencies.try_emplace( - reqTag, s, ssID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, SERVER_KNOBS->LATENCY_SAMPLE_SIZE); + reqTag, s, ssID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, SERVER_KNOBS->LATENCY_SKETCH_ACCURACY); } LatencySample& sample = logData->blockingPeekLatencies.at(reqTag); sample.addMeasurement(latency); diff --git a/fdbserver/VersionedBTree.actor.cpp b/fdbserver/VersionedBTree.actor.cpp index 24c9f47cbe..30c80c128a 100644 --- a/fdbserver/VersionedBTree.actor.cpp +++ b/fdbserver/VersionedBTree.actor.cpp @@ -21,7 +21,7 @@ #include "fdbclient/CommitTransaction.h" #include "fdbclient/FDBTypes.h" #include "fdbclient/Tuple.h" -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbrpc/simulator.h" #include "fdbserver/DeltaTree.h" #include "fdbserver/IKeyValueStore.h" diff --git a/fdbserver/include/fdbserver/ProxyCommitData.actor.h b/fdbserver/include/fdbserver/ProxyCommitData.actor.h index 379f13bc51..d8db57a650 100644 --- a/fdbserver/include/fdbserver/ProxyCommitData.actor.h +++ b/fdbserver/include/fdbserver/ProxyCommitData.actor.h @@ -121,20 +121,20 @@ struct ProxyStats { commitLatencySample("CommitLatencyMetrics", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), commitLatencyBands("CommitLatencyBands", id, SERVER_KNOBS->STORAGE_LOGGING_DELAY), commitBatchingEmptyMessageRatio("CommitBatchingEmptyMessageRatio", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), commitBatchingWindowSize("CommitBatchingWindowSize", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), computeLatency("ComputeLatency", id, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), maxComputeNS(0), minComputeNS(1e12), commitBatchQueuingDist( Histogram::getHistogram("CommitProxy"_sr, "CommitBatchQueuing"_sr, Histogram::Unit::microseconds)), diff --git a/fdbserver/include/fdbserver/workloads/ReadWriteWorkload.actor.h b/fdbserver/include/fdbserver/workloads/ReadWriteWorkload.actor.h index 89bc36f393..5323235795 100644 --- a/fdbserver/include/fdbserver/workloads/ReadWriteWorkload.actor.h +++ b/fdbserver/include/fdbserver/workloads/ReadWriteWorkload.actor.h @@ -25,6 +25,7 @@ #elif !defined(FDBSERVER_READWRITEWORKLOAD_ACTOR_H) #define FDBSERVER_READWRITEWORKLOAD_ACTOR_H +#include "fdbrpc/DDSketch.h" #include "fdbserver/workloads/workloads.actor.h" #include "flow/TDMetric.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. @@ -75,7 +76,7 @@ struct ReadWriteCommon : KVWorkload { EventMetricHandle transactionFailureMetric; EventMetricHandle readMetric; PerfIntCounter aTransactions, bTransactions, retries; - ContinuousSample latencies, readLatencies, commitLatencies, GRVLatencies, fullReadLatencies; + DDSketch latencies, readLatencies, commitLatencies, GRVLatencies, fullReadLatencies; double readLatencyTotal; int readLatencyCount; std::vector periodicMetrics; diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp index 69a02e2bc2..2a1f0ee94b 100644 --- a/fdbserver/masterserver.actor.cpp +++ b/fdbserver/masterserver.actor.cpp @@ -102,17 +102,17 @@ struct MasterData : NonCopyable, ReferenceCounted { versionVectorTagUpdates("VersionVectorTagUpdates", dbgid, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), waitForPrevCommitRequests("WaitForPrevCommitRequests", cc), nonWaitForPrevCommitRequests("NonWaitForPrevCommitRequests", cc), versionVectorSizeOnCVReply("VersionVectorSizeOnCVReply", dbgid, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), waitForPrevLatencies("WaitForPrevLatencies", dbgid, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), addActor(addActor) { logger = cc.traceCounters("MasterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "MasterMetrics"); if (forceRecovery && !myInterface.locality.dcId().present()) { diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index a6bb04860b..db5c63f9e8 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -1266,48 +1266,48 @@ public: readLatencySample("ReadLatencyMetrics", self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), readKeyLatencySample("GetKeyMetrics", self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), readValueLatencySample("GetValueMetrics", self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), readRangeLatencySample("GetRangeMetrics", self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), readVersionWaitSample("ReadVersionWaitMetrics", self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), readQueueWaitSample("ReadQueueWaitMetrics", self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), readLatencyBands("ReadLatencyBands", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY), mappedRangeSample("GetMappedRangeMetrics", self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), mappedRangeRemoteSample("GetMappedRangeRemoteMetrics", self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), mappedRangeLocalSample("GetMappedRangeLocalMetrics", self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), kvReadRangeLatencySample("KVGetRangeMetrics", self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), updateLatencySample("UpdateLatencyMetrics", self->thisServerID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE) { + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY) { specialCounter(cc, "LastTLogVersion", [self]() { return self->lastTLogVersion; }); specialCounter(cc, "Version", [self]() { return self->version.get(); }); specialCounter(cc, "StorageVersion", [self]() { return self->storageVersion(); }); diff --git a/fdbserver/workloads/AtomicOps.actor.cpp b/fdbserver/workloads/AtomicOps.actor.cpp index e679dce9f9..662d4d30ae 100644 --- a/fdbserver/workloads/AtomicOps.actor.cpp +++ b/fdbserver/workloads/AtomicOps.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/workloads/BulkSetup.actor.h" diff --git a/fdbserver/workloads/BackgroundSelectors.actor.cpp b/fdbserver/workloads/BackgroundSelectors.actor.cpp index 2b55db49ad..5f255535d3 100644 --- a/fdbserver/workloads/BackgroundSelectors.actor.cpp +++ b/fdbserver/workloads/BackgroundSelectors.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/workloads/workloads.actor.h" diff --git a/fdbserver/workloads/BulkLoad.actor.cpp b/fdbserver/workloads/BulkLoad.actor.cpp index cc5447c321..684108d811 100644 --- a/fdbserver/workloads/BulkLoad.actor.cpp +++ b/fdbserver/workloads/BulkLoad.actor.cpp @@ -18,7 +18,7 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/workloads/workloads.actor.h" @@ -34,11 +34,10 @@ struct BulkLoadWorkload : TestWorkload { std::vector> clients; PerfIntCounter transactions, retries; - ContinuousSample latencies; + DDSketch latencies; BulkLoadWorkload(WorkloadContext const& wcx) - : TestWorkload(wcx), clientCount(wcx.clientCount), transactions("Transactions"), retries("Retries"), - latencies(2000) { + : TestWorkload(wcx), clientCount(wcx.clientCount), transactions("Transactions"), retries("Retries"), latencies() { testDuration = getOption(options, "testDuration"_sr, 10.0); actorCount = getOption(options, "actorCount"_sr, 20); writesPerTransaction = getOption(options, "writesPerTransaction"_sr, 10); diff --git a/fdbserver/workloads/BulkSetup.actor.cpp b/fdbserver/workloads/BulkSetup.actor.cpp new file mode 100644 index 0000000000..59389cf0f7 --- /dev/null +++ b/fdbserver/workloads/BulkSetup.actor.cpp @@ -0,0 +1,74 @@ +/* + * BulkSetup.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fdbclient/NativeAPI.actor.h" +#include "fdbserver/TesterInterface.actor.h" +#include "fdbserver/workloads/workloads.actor.h" +#include "fdbserver/workloads/BulkSetup.actor.h" +#include "flow/actorcompiler.h" // This must be the last #include. + +struct BulkSetupWorkload : TestWorkload { + + std::vector tenantNames; + int nodeCount; + double transactionsPerSecond; + Key keyPrefix; + + BulkSetupWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { + transactionsPerSecond = getOption(options, "transactionsPerSecond"_sr, 5000.0) / clientCount; + nodeCount = getOption(options, "nodeCount"_sr, transactionsPerSecond * clientCount); + keyPrefix = unprintable(getOption(options, "keyPrefix"_sr, LiteralStringRef("")).toString()); + std::vector tenants = getOption(options, "tenants"_sr, std::vector()); + for (std::string tenant : tenants) { + tenantNames.push_back(TenantName(tenant)); + } + } + + std::string description() const override { return "BulkSetup"; } + + void getMetrics(std::vector& m) override {} + + Key keyForIndex(int n) { return key(n); } + Key key(int n) { return doubleToTestKey((double)n / nodeCount, keyPrefix); } + Value value(int n) { return doubleToTestKey(n, keyPrefix); } + + Standalone operator()(int n) { return KeyValueRef(key(n), value((n + 1) % nodeCount)); } + + Future start(Database const& cx) override { + return bulkSetup(cx, + this, + nodeCount, + Promise(), + false, + 0.0, + 1e12, + std::vector(), + Promise>>(), + 0, + 0.1, + 0, + 0, + this->tenantNames); + } + + Future check(Database const& cx) override { return true; } +}; + +WorkloadFactory BulkSetupWorkloadFactory("BulkSetup"); diff --git a/fdbserver/workloads/DDBalance.actor.cpp b/fdbserver/workloads/DDBalance.actor.cpp index efd3dbfb84..95fe5143b1 100644 --- a/fdbserver/workloads/DDBalance.actor.cpp +++ b/fdbserver/workloads/DDBalance.actor.cpp @@ -18,7 +18,7 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/workloads/workloads.actor.h" @@ -33,10 +33,10 @@ struct DDBalanceWorkload : TestWorkload { std::vector> clients; PerfIntCounter bin_shifts, operations, retries; - ContinuousSample latencies; + DDSketch latencies; DDBalanceWorkload(WorkloadContext const& wcx) - : TestWorkload(wcx), bin_shifts("Bin_Shifts"), operations("Operations"), retries("Retries"), latencies(2000) { + : TestWorkload(wcx), bin_shifts("Bin_Shifts"), operations("Operations"), retries("Retries"), latencies() { testDuration = getOption(options, "testDuration"_sr, 10.0); binCount = getOption(options, "binCount"_sr, 1000); writesPerTransaction = getOption(options, "writesPerTransaction"_sr, 1); diff --git a/fdbserver/workloads/FastTriggeredWatches.actor.cpp b/fdbserver/workloads/FastTriggeredWatches.actor.cpp index 32ba9ed1a6..b689adc3cf 100644 --- a/fdbserver/workloads/FastTriggeredWatches.actor.cpp +++ b/fdbserver/workloads/FastTriggeredWatches.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbclient/ReadYourWrites.h" diff --git a/fdbserver/workloads/FileSystem.actor.cpp b/fdbserver/workloads/FileSystem.actor.cpp index 722c1c59ac..b0cfbdeedc 100644 --- a/fdbserver/workloads/FileSystem.actor.cpp +++ b/fdbserver/workloads/FileSystem.actor.cpp @@ -18,7 +18,7 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/workloads/workloads.actor.h" @@ -33,8 +33,8 @@ struct FileSystemWorkload : TestWorkload { std::vector> clients; PerfIntCounter queries, writes; - ContinuousSample latencies; - ContinuousSample writeLatencies; + DDSketch latencies; + DDSketch writeLatencies; class FileSystemOp { public: @@ -44,7 +44,7 @@ struct FileSystemWorkload : TestWorkload { }; FileSystemWorkload(WorkloadContext const& wcx) - : TestWorkload(wcx), queries("Queries"), writes("Latency"), latencies(2500), writeLatencies(1000) { + : TestWorkload(wcx), queries("Queries"), writes("Latency"), latencies(), writeLatencies() { testDuration = getOption(options, "testDuration"_sr, 10.0); transactionsPerSecond = getOption(options, "transactionsPerSecond"_sr, 5000.0) / clientCount; double allowedLatency = getOption(options, "allowedLatency"_sr, 0.250); diff --git a/fdbserver/workloads/IndexScan.actor.cpp b/fdbserver/workloads/IndexScan.actor.cpp index b0297a1c31..d5f8a57db4 100644 --- a/fdbserver/workloads/IndexScan.actor.cpp +++ b/fdbserver/workloads/IndexScan.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/workloads/workloads.actor.h" diff --git a/fdbserver/workloads/LowLatency.actor.cpp b/fdbserver/workloads/LowLatency.actor.cpp index 65ea46e750..3ac468ad9a 100644 --- a/fdbserver/workloads/LowLatency.actor.cpp +++ b/fdbserver/workloads/LowLatency.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/IKnobCollection.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" diff --git a/fdbserver/workloads/Mako.actor.cpp b/fdbserver/workloads/Mako.actor.cpp index ff30bbb2fd..7f18cd67b0 100644 --- a/fdbserver/workloads/Mako.actor.cpp +++ b/fdbserver/workloads/Mako.actor.cpp @@ -63,7 +63,7 @@ struct MakoWorkload : TestWorkload { // used for periodically tracing std::vector periodicMetrics; // store latency of each operation with sampling - std::vector> opLatencies; + std::vector> opLatencies; // key used to store checkSum for given key range std::vector csKeys; // key prefix of for all generated keys @@ -142,7 +142,7 @@ struct MakoWorkload : TestWorkload { parseOperationsSpec(); for (int i = 0; i < MAX_OP; ++i) { // initilize per-operation latency record - opLatencies.push_back(ContinuousSample(rowCount / sampleSize)); + opLatencies.push_back(DDSketch()); // initialize per-operation counter opCounters.push_back(PerfIntCounter(opNames[i])); } @@ -658,7 +658,7 @@ struct MakoWorkload : TestWorkload { return Void(); } ACTOR template - static Future logLatency(Future f, ContinuousSample* opLatencies) { + static Future logLatency(Future f, DDSketch* opLatencies) { state double opBegin = timer(); wait(success(f)); opLatencies->addSample(timer() - opBegin); diff --git a/fdbserver/workloads/MemoryLifetime.actor.cpp b/fdbserver/workloads/MemoryLifetime.actor.cpp index a206fbb7d2..8eb59b37a3 100644 --- a/fdbserver/workloads/MemoryLifetime.actor.cpp +++ b/fdbserver/workloads/MemoryLifetime.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "flow/DeterministicRandom.h" diff --git a/fdbserver/workloads/MetricLogging.actor.cpp b/fdbserver/workloads/MetricLogging.actor.cpp index 817727a6c7..4b3ce6b97e 100644 --- a/fdbserver/workloads/MetricLogging.actor.cpp +++ b/fdbserver/workloads/MetricLogging.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "flow/TDMetric.actor.h" diff --git a/fdbserver/workloads/QueuePush.actor.cpp b/fdbserver/workloads/QueuePush.actor.cpp index c7963a6b29..36d511d970 100644 --- a/fdbserver/workloads/QueuePush.actor.cpp +++ b/fdbserver/workloads/QueuePush.actor.cpp @@ -19,7 +19,7 @@ */ #include -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/workloads/workloads.actor.h" @@ -38,10 +38,10 @@ struct QueuePushWorkload : TestWorkload { std::vector> clients; PerfIntCounter transactions, retries; - ContinuousSample commitLatencies, GRVLatencies; + DDSketch commitLatencies, GRVLatencies; QueuePushWorkload(WorkloadContext const& wcx) - : TestWorkload(wcx), transactions("Transactions"), retries("Retries"), commitLatencies(2000), GRVLatencies(2000) { + : TestWorkload(wcx), transactions("Transactions"), retries("Retries"), commitLatencies(), GRVLatencies() { testDuration = getOption(options, "testDuration"_sr, 10.0); actorCount = getOption(options, "actorCount"_sr, 50); diff --git a/fdbserver/workloads/RYWDisable.actor.cpp b/fdbserver/workloads/RYWDisable.actor.cpp index 6d9d6a67bd..cbd5dfc818 100644 --- a/fdbserver/workloads/RYWDisable.actor.cpp +++ b/fdbserver/workloads/RYWDisable.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbclient/ReadYourWrites.h" diff --git a/fdbserver/workloads/RYWPerformance.actor.cpp b/fdbserver/workloads/RYWPerformance.actor.cpp index 21683439b7..ca45b0d482 100644 --- a/fdbserver/workloads/RYWPerformance.actor.cpp +++ b/fdbserver/workloads/RYWPerformance.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbclient/ReadYourWrites.h" diff --git a/fdbserver/workloads/ReadAfterWrite.actor.cpp b/fdbserver/workloads/ReadAfterWrite.actor.cpp index 219f30975f..ed080722d0 100644 --- a/fdbserver/workloads/ReadAfterWrite.actor.cpp +++ b/fdbserver/workloads/ReadAfterWrite.actor.cpp @@ -25,8 +25,6 @@ #include "flow/genericactors.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. -static constexpr int SAMPLE_SIZE = 10000; - // If the log->storage propagation delay is longer than 1 second, then it's likely that our read // will see a `future_version` error from the storage server. We need to retry the read until // a value is returned, or a different error is thrown. @@ -51,9 +49,9 @@ struct ReadAfterWriteWorkload : KVWorkload { static constexpr auto NAME = "ReadAfterWrite"; double testDuration; - ContinuousSample propagationLatency; + DDSketch propagationLatency; - ReadAfterWriteWorkload(WorkloadContext const& wcx) : KVWorkload(wcx), propagationLatency(SAMPLE_SIZE) { + ReadAfterWriteWorkload(WorkloadContext const& wcx) : KVWorkload(wcx), propagationLatency() { testDuration = getOption(options, "testDuration"_sr, 10.0); } diff --git a/fdbserver/workloads/ReadHotDetection.actor.cpp b/fdbserver/workloads/ReadHotDetection.actor.cpp index 7779d3a6b2..0bae939bb4 100644 --- a/fdbserver/workloads/ReadHotDetection.actor.cpp +++ b/fdbserver/workloads/ReadHotDetection.actor.cpp @@ -18,7 +18,7 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/workloads/BulkSetup.actor.h" diff --git a/fdbserver/workloads/ReadWrite.actor.cpp b/fdbserver/workloads/ReadWrite.actor.cpp index 1571e084e5..6d2e37b003 100644 --- a/fdbserver/workloads/ReadWrite.actor.cpp +++ b/fdbserver/workloads/ReadWrite.actor.cpp @@ -23,7 +23,7 @@ #include #include "fdbclient/FDBTypes.h" -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/WorkerInterface.actor.h" @@ -200,7 +200,7 @@ struct ReadWriteCommonImpl { } } ACTOR static Future logLatency(Future> f, - ContinuousSample* latencies, + DDSketch* latencies, double* totalLatency, int* latencyCount, EventMetricHandle readMetric, @@ -220,7 +220,7 @@ struct ReadWriteCommonImpl { return Void(); } ACTOR static Future logLatency(Future f, - ContinuousSample* latencies, + DDSketch* latencies, double* totalLatency, int* latencyCount, EventMetricHandle readMetric, diff --git a/fdbserver/workloads/SkewedReadWrite.actor.cpp b/fdbserver/workloads/SkewedReadWrite.actor.cpp index 20fcf24233..ba37e7ac42 100644 --- a/fdbserver/workloads/SkewedReadWrite.actor.cpp +++ b/fdbserver/workloads/SkewedReadWrite.actor.cpp @@ -22,7 +22,7 @@ #include #include -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/WorkerInterface.actor.h" @@ -389,4 +389,4 @@ TEST_CASE("/KVWorkload/methods/ParseKeyForIndex") { ASSERT(parse == idx); } return Void(); -} \ No newline at end of file +} diff --git a/fdbserver/workloads/SnapTest.actor.cpp b/fdbserver/workloads/SnapTest.actor.cpp index e69c5ab9c2..f1256e6f5f 100644 --- a/fdbserver/workloads/SnapTest.actor.cpp +++ b/fdbserver/workloads/SnapTest.actor.cpp @@ -23,7 +23,6 @@ #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/ReadYourWrites.h" #include "fdbclient/SystemData.h" -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/SimpleIni.h" #include "fdbserver/Status.actor.h" #include "fdbserver/TesterInterface.actor.h" diff --git a/fdbserver/workloads/StreamingRead.actor.cpp b/fdbserver/workloads/StreamingRead.actor.cpp index da8656d2b3..3031f9bf56 100644 --- a/fdbserver/workloads/StreamingRead.actor.cpp +++ b/fdbserver/workloads/StreamingRead.actor.cpp @@ -18,7 +18,7 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/workloads/workloads.actor.h" @@ -37,11 +37,11 @@ struct StreamingReadWorkload : TestWorkload { std::vector> clients; PerfIntCounter transactions, readKeys; PerfIntCounter readValueBytes; - ContinuousSample latencies; + DDSketch latencies; StreamingReadWorkload(WorkloadContext const& wcx) : TestWorkload(wcx), transactions("Transactions"), readKeys("Keys Read"), readValueBytes("Value Bytes Read"), - latencies(2000) { + latencies() { testDuration = getOption(options, "testDuration"_sr, 10.0); actorCount = getOption(options, "actorCount"_sr, 20); readsPerTransaction = getOption(options, "readsPerTransaction"_sr, 10); diff --git a/fdbserver/workloads/Throughput.actor.cpp b/fdbserver/workloads/Throughput.actor.cpp index fdee65878d..b5cde15078 100644 --- a/fdbserver/workloads/Throughput.actor.cpp +++ b/fdbserver/workloads/Throughput.actor.cpp @@ -18,7 +18,7 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/WorkerInterface.actor.h" @@ -189,12 +189,11 @@ struct MeasureSinglePeriod : IMeasurer { double delay, duration; double startT; - ContinuousSample totalLatency, grvLatency, rowReadLatency, commitLatency; + DDSketch totalLatency, grvLatency, rowReadLatency, commitLatency; ITransactor::Stats stats; // totalled over the period MeasureSinglePeriod(double delay, double duration) - : delay(delay), duration(duration), totalLatency(2000), grvLatency(2000), rowReadLatency(2000), - commitLatency(2000) {} + : delay(delay), duration(duration), totalLatency(), grvLatency(), rowReadLatency(), commitLatency() {} Future start() override { startT = now(); diff --git a/fdbserver/workloads/Unreadable.actor.cpp b/fdbserver/workloads/Unreadable.actor.cpp index 5f541e647f..3436990a30 100644 --- a/fdbserver/workloads/Unreadable.actor.cpp +++ b/fdbserver/workloads/Unreadable.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/workloads/BulkSetup.actor.h" diff --git a/fdbserver/workloads/VersionStamp.actor.cpp b/fdbserver/workloads/VersionStamp.actor.cpp index 82e7d38c47..b542e3ad04 100644 --- a/fdbserver/workloads/VersionStamp.actor.cpp +++ b/fdbserver/workloads/VersionStamp.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/ClusterConnectionMemoryRecord.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" diff --git a/fdbserver/workloads/WatchAndWait.actor.cpp b/fdbserver/workloads/WatchAndWait.actor.cpp index 4dfd852422..bea4f6be23 100644 --- a/fdbserver/workloads/WatchAndWait.actor.cpp +++ b/fdbserver/workloads/WatchAndWait.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/workloads/BulkSetup.actor.h" diff --git a/fdbserver/workloads/Watches.actor.cpp b/fdbserver/workloads/Watches.actor.cpp index f2bafae6b0..7175cf2565 100644 --- a/fdbserver/workloads/Watches.actor.cpp +++ b/fdbserver/workloads/Watches.actor.cpp @@ -18,15 +18,13 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "flow/DeterministicRandom.h" #include "fdbserver/workloads/workloads.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. -const int sampleSize = 10000; - struct WatchesWorkload : TestWorkload { static constexpr auto NAME = "Watches"; @@ -34,10 +32,10 @@ struct WatchesWorkload : TestWorkload { double testDuration; std::vector> clients; PerfIntCounter cycles; - ContinuousSample cycleLatencies; + DDSketch cycleLatencies; std::vector nodeOrder; - WatchesWorkload(WorkloadContext const& wcx) : TestWorkload(wcx), cycles("Cycles"), cycleLatencies(sampleSize) { + WatchesWorkload(WorkloadContext const& wcx) : TestWorkload(wcx), cycles("Cycles"), cycleLatencies() { testDuration = getOption(options, "testDuration"_sr, 600.0); nodes = getOption(options, "nodeCount"_sr, 100); extraPerNode = getOption(options, "extraPerNode"_sr, 1000); diff --git a/fdbserver/workloads/WatchesSameKeyCorrectness.actor.cpp b/fdbserver/workloads/WatchesSameKeyCorrectness.actor.cpp index 01e74303cd..04380a7472 100644 --- a/fdbserver/workloads/WatchesSameKeyCorrectness.actor.cpp +++ b/fdbserver/workloads/WatchesSameKeyCorrectness.actor.cpp @@ -18,7 +18,6 @@ * limitations under the License. */ -#include "fdbrpc/ContinuousSample.h" #include "fdbclient/ReadYourWrites.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" diff --git a/fdbserver/workloads/WriteBandwidth.actor.cpp b/fdbserver/workloads/WriteBandwidth.actor.cpp index 4446ab676a..75bab9720c 100644 --- a/fdbserver/workloads/WriteBandwidth.actor.cpp +++ b/fdbserver/workloads/WriteBandwidth.actor.cpp @@ -20,7 +20,7 @@ #include -#include "fdbrpc/ContinuousSample.h" +#include "fdbrpc/DDSketch.h" #include "fdbclient/NativeAPI.actor.h" #include "fdbserver/TesterInterface.actor.h" #include "fdbserver/WorkerInterface.actor.h" @@ -37,11 +37,11 @@ struct WriteBandwidthWorkload : KVWorkload { std::vector> clients; PerfIntCounter transactions, retries; - ContinuousSample commitLatencies, GRVLatencies; + DDSketch commitLatencies, GRVLatencies; WriteBandwidthWorkload(WorkloadContext const& wcx) - : KVWorkload(wcx), loadTime(0.0), transactions("Transactions"), retries("Retries"), commitLatencies(2000), - GRVLatencies(2000) { + : KVWorkload(wcx), loadTime(0.0), transactions("Transactions"), retries("Retries"), commitLatencies(), + GRVLatencies() { testDuration = getOption(options, "testDuration"_sr, 10.0); keysPerTransaction = getOption(options, "keysPerTransaction"_sr, 100); valueString = std::string(maxValueBytes, '.'); diff --git a/fdbserver/workloads/WriteTagThrottling.actor.cpp b/fdbserver/workloads/WriteTagThrottling.actor.cpp index 043bae5e0f..8f53d360d3 100644 --- a/fdbserver/workloads/WriteTagThrottling.actor.cpp +++ b/fdbserver/workloads/WriteTagThrottling.actor.cpp @@ -26,7 +26,6 @@ #include "fdbclient/TagThrottle.actor.h" #include "flow/actorcompiler.h" // This must be the last #include. -constexpr int SAMPLE_SIZE = 10000; // workload description: // This workload aims to test whether we can throttling some bad clients that doing penetrating write on write hot-spot // range. There are several good clientActor just randomly do read and write ops in transaction. Also, some bad @@ -41,8 +40,8 @@ struct WriteTagThrottlingWorkload : KVWorkload { int badActorTrNum = 0, badActorRetries = 0, badActorTooOldRetries = 0, badActorCommitFailedRetries = 0; int goodActorThrottleRetries = 0, badActorThrottleRetries = 0; double badActorTotalLatency = 0.0, goodActorTotalLatency = 0.0; - ContinuousSample badActorReadLatency, goodActorReadLatency; - ContinuousSample badActorCommitLatency, goodActorCommitLatency; + DDSketch badActorReadLatency, goodActorReadLatency; + DDSketch badActorCommitLatency, goodActorCommitLatency; // Test configuration // KVWorkload::actorCount int goodActorPerClient, badActorPerClient; @@ -64,8 +63,8 @@ struct WriteTagThrottlingWorkload : KVWorkload { static constexpr int MIN_TRANSACTION_TAG_LENGTH = 2; WriteTagThrottlingWorkload(WorkloadContext const& wcx) - : KVWorkload(wcx), badActorReadLatency(SAMPLE_SIZE), goodActorReadLatency(SAMPLE_SIZE), - badActorCommitLatency(SAMPLE_SIZE), goodActorCommitLatency(SAMPLE_SIZE) { + : KVWorkload(wcx), badActorReadLatency(), goodActorReadLatency(), badActorCommitLatency(), + goodActorCommitLatency() { testDuration = getOption(options, "testDuration"_sr, 120.0); badOpRate = getOption(options, "badOpRate"_sr, 0.9); numWritePerTr = getOption(options, "numWritePerTr"_sr, 1); diff --git a/flow/Knobs.cpp b/flow/Knobs.cpp index 06b4e07355..08bf96e529 100644 --- a/flow/Knobs.cpp +++ b/flow/Knobs.cpp @@ -112,8 +112,7 @@ void FlowKnobs::initialize(Randomize randomize, IsSimulated isSimulated) { init( PEER_UNAVAILABLE_FOR_LONG_TIME_TIMEOUT, 3600.0 ); init( INCOMPATIBLE_PEER_DELAY_BEFORE_LOGGING, 5.0 ); init( PING_LOGGING_INTERVAL, 3.0 ); - init( PING_SAMPLE_AMOUNT, 100 ); - init( NETWORK_CONNECT_SAMPLE_AMOUNT, 100 ); + init( PING_SKETCH_ACCURACY, 0.1 ); init( TLS_CERT_REFRESH_DELAY_SECONDS, 12*60*60 ); init( TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT, 9.0 ); @@ -168,7 +167,7 @@ void FlowKnobs::initialize(Randomize randomize, IsSimulated isSimulated) { init( MIN_SUBMIT, 10 ); init( SQLITE_DISK_METRIC_LOGGING_INTERVAL, 5.0 ); init( KAIO_LATENCY_LOGGING_INTERVAL, 30.0 ); - init( KAIO_LATENCY_SAMPLE_SIZE, 30000 ); + init( KAIO_LATENCY_SKETCH_ACCURACY, 0.01 ); init( PAGE_WRITE_CHECKSUM_HISTORY, 0 ); if( randomize && BUGGIFY ) PAGE_WRITE_CHECKSUM_HISTORY = 10000000; init( DISABLE_POSIX_KERNEL_AIO, 0 ); diff --git a/flow/include/flow/Knobs.h b/flow/include/flow/Knobs.h index d0e40dd67f..0ba1e3b4ff 100644 --- a/flow/include/flow/Knobs.h +++ b/flow/include/flow/Knobs.h @@ -176,8 +176,7 @@ public: int ACCEPT_BATCH_SIZE; double INCOMPATIBLE_PEER_DELAY_BEFORE_LOGGING; double PING_LOGGING_INTERVAL; - int PING_SAMPLE_AMOUNT; - int NETWORK_CONNECT_SAMPLE_AMOUNT; + double PING_SKETCH_ACCURACY; int TLS_CERT_REFRESH_DELAY_SECONDS; double TLS_SERVER_CONNECTION_THROTTLE_TIMEOUT; @@ -231,7 +230,7 @@ public: int MIN_SUBMIT; double SQLITE_DISK_METRIC_LOGGING_INTERVAL; double KAIO_LATENCY_LOGGING_INTERVAL; - int KAIO_LATENCY_SAMPLE_SIZE; + double KAIO_LATENCY_SKETCH_ACCURACY; int PAGE_WRITE_CHECKSUM_HISTORY; int DISABLE_POSIX_KERNEL_AIO; diff --git a/flowbench/BenchSamples.cpp b/flowbench/BenchSamples.cpp index 687a371048..1ac9e034d2 100644 --- a/flowbench/BenchSamples.cpp +++ b/flowbench/BenchSamples.cpp @@ -22,8 +22,62 @@ #include "flow/IRandom.h" #include "flowbench/GlobalData.h" #include "fdbrpc/Stats.h" +#include "fdbrpc/DDSketch.h" +#include "fdbrpc/ContinuousSample.h" #include "flow/Histogram.h" +static void bench_ddsketchUnsigned(benchmark::State& state) { + DDSketchFastUnsigned dds; + InputGenerator data(1e6, []() { return deterministicRandom()->randomInt64(0, 1e9); }); + + for (auto _ : state) { + dds.addSample(data.next()); + } + + state.SetItemsProcessed(state.iterations()); +} +// DDSketchFastUnsigned has a fixed error margin (~8%) +BENCHMARK(bench_ddsketchUnsigned)->ReportAggregatesOnly(true); + +static void bench_ddsketchInt(benchmark::State& state) { + DDSketch dds((double)state.range(0) / 100); + InputGenerator data(1e6, []() { return deterministicRandom()->randomInt64(0, 1e9); }); + + for (auto _ : state) { + dds.addSample(data.next()); + } + + state.SetItemsProcessed(state.iterations()); +} +// Try with 10%, 5% and 1% error margins +BENCHMARK(bench_ddsketchInt)->Arg(10)->Arg(5)->Arg(1)->ReportAggregatesOnly(true); + +static void bench_ddsketchDouble(benchmark::State& state) { + DDSketch dds((double)state.range(0) / 100); + InputGenerator data(1e6, []() { return deterministicRandom()->randomInt64(0, 1e9); }); + + for (auto _ : state) { + dds.addSample(data.next()); + } + + state.SetItemsProcessed(state.iterations()); +} +// Try with 10%, 5% and 1% error margins +BENCHMARK(bench_ddsketchDouble)->Arg(10)->Arg(5)->Arg(1)->ReportAggregatesOnly(true); + +static void bench_ddsketchLatency(benchmark::State& state) { + DDSketch dds((double)state.range(0) / 100); + InputGenerator data(1e6, []() { return deterministicRandom()->random01() * 2.0; }); + + for (auto _ : state) { + dds.addSample(data.next()); + } + + state.SetItemsProcessed(state.iterations()); +} +// Try with 10%, 5% and 1% error margins +BENCHMARK(bench_ddsketchLatency)->Arg(10)->Arg(5)->Arg(1)->ReportAggregatesOnly(true); + static void bench_continuousSampleInt(benchmark::State& state) { ContinuousSample cs(state.range(0)); InputGenerator data(1e6, []() { return deterministicRandom()->randomInt64(0, 1e9); }); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a72fc06645..d62ea5fb14 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -56,6 +56,7 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES BlobManagerUnit.toml) add_fdb_test(TEST_FILES ConsistencyCheck.txt IGNORE) add_fdb_test(TEST_FILES DDMetricsExclude.txt IGNORE) + add_fdb_test(TEST_FILES DDSketch.txt IGNORE) add_fdb_test(TEST_FILES DataDistributionMetrics.txt IGNORE) add_fdb_test(TEST_FILES DiskDurability.txt IGNORE) add_fdb_test(TEST_FILES FileSystem.txt IGNORE) From dfc5a3a78a9ce7868366273157489adb85b21633 Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Mon, 3 Oct 2022 12:44:41 -0700 Subject: [PATCH 30/57] Default errorGuarantee -> 1% --- fdbrpc/include/fdbrpc/DDSketch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbrpc/include/fdbrpc/DDSketch.h b/fdbrpc/include/fdbrpc/DDSketch.h index 2bbe350ab8..a5c2105da4 100644 --- a/fdbrpc/include/fdbrpc/DDSketch.h +++ b/fdbrpc/include/fdbrpc/DDSketch.h @@ -201,7 +201,7 @@ protected: template class DDSketch : public DDSketchBase, T> { public: - explicit DDSketch(double errorGuarantee = 0.1) + explicit DDSketch(double errorGuarantee = 0.01) : DDSketchBase, T>(errorGuarantee), gamma((1.0 + errorGuarantee) / (1.0 - errorGuarantee)), multiplier(fastLogger::correctingFactor * log(2) / log(gamma)) { offset = getIndex(1.0 / DDSketchBase, T>::EPS); From d59b6f3f47152ddf6a457151445fd8c8888b7d1b Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Sat, 12 Nov 2022 13:59:05 -0800 Subject: [PATCH 31/57] merge errors --- fdbrpc/include/fdbrpc/Stats.h | 2 +- fdbserver/BlobWorker.actor.cpp | 1 + fdbserver/workloads/BulkSetup.actor.cpp | 74 ------------------------- 3 files changed, 2 insertions(+), 75 deletions(-) delete mode 100644 fdbserver/workloads/BulkSetup.actor.cpp diff --git a/fdbrpc/include/fdbrpc/Stats.h b/fdbrpc/include/fdbrpc/Stats.h index 46d334c6e0..897705cfbf 100644 --- a/fdbrpc/include/fdbrpc/Stats.h +++ b/fdbrpc/include/fdbrpc/Stats.h @@ -246,7 +246,7 @@ private: .detail("P95", sketch.percentile(0.95)) .detail("P99", sketch.percentile(0.99)) .detail("P99.9", sketch.percentile(0.999)) - .trackLatest(id.toString() + "/" + name); + .trackLatest(latencySampleEventHolder->trackingKey); sketch.clear(); sampleStart = now(); diff --git a/fdbserver/BlobWorker.actor.cpp b/fdbserver/BlobWorker.actor.cpp index e7a262c2b8..d88e37704b 100644 --- a/fdbserver/BlobWorker.actor.cpp +++ b/fdbserver/BlobWorker.actor.cpp @@ -1752,6 +1752,7 @@ bool granuleCanRetry(const Error& e) { case error_code_http_request_failed: case error_code_connection_failed: case error_code_lookup_failed: // dns + case error_code_platform_error: // injected faults return true; default: return false; diff --git a/fdbserver/workloads/BulkSetup.actor.cpp b/fdbserver/workloads/BulkSetup.actor.cpp deleted file mode 100644 index 59389cf0f7..0000000000 --- a/fdbserver/workloads/BulkSetup.actor.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * BulkSetup.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdbclient/NativeAPI.actor.h" -#include "fdbserver/TesterInterface.actor.h" -#include "fdbserver/workloads/workloads.actor.h" -#include "fdbserver/workloads/BulkSetup.actor.h" -#include "flow/actorcompiler.h" // This must be the last #include. - -struct BulkSetupWorkload : TestWorkload { - - std::vector tenantNames; - int nodeCount; - double transactionsPerSecond; - Key keyPrefix; - - BulkSetupWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { - transactionsPerSecond = getOption(options, "transactionsPerSecond"_sr, 5000.0) / clientCount; - nodeCount = getOption(options, "nodeCount"_sr, transactionsPerSecond * clientCount); - keyPrefix = unprintable(getOption(options, "keyPrefix"_sr, LiteralStringRef("")).toString()); - std::vector tenants = getOption(options, "tenants"_sr, std::vector()); - for (std::string tenant : tenants) { - tenantNames.push_back(TenantName(tenant)); - } - } - - std::string description() const override { return "BulkSetup"; } - - void getMetrics(std::vector& m) override {} - - Key keyForIndex(int n) { return key(n); } - Key key(int n) { return doubleToTestKey((double)n / nodeCount, keyPrefix); } - Value value(int n) { return doubleToTestKey(n, keyPrefix); } - - Standalone operator()(int n) { return KeyValueRef(key(n), value((n + 1) % nodeCount)); } - - Future start(Database const& cx) override { - return bulkSetup(cx, - this, - nodeCount, - Promise(), - false, - 0.0, - 1e12, - std::vector(), - Promise>>(), - 0, - 0.1, - 0, - 0, - this->tenantNames); - } - - Future check(Database const& cx) override { return true; } -}; - -WorkloadFactory BulkSetupWorkloadFactory("BulkSetup"); From 7f33b0fa70d49948bf749f53105c07acc24048c2 Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Sat, 12 Nov 2022 14:09:31 -0800 Subject: [PATCH 32/57] clang-format --- fdbclient/NativeAPI.actor.cpp | 69 +++++++++++++------ .../include/fdbclient/BlobWorkerCommon.h | 5 +- fdbclient/include/fdbclient/DatabaseContext.h | 3 +- fdbserver/TLogServer.actor.cpp | 31 ++++++--- 4 files changed, 75 insertions(+), 33 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index a927f31690..70b1c261a3 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1016,7 +1016,9 @@ ACTOR static Future monitorClientDBInfoChange(DatabaseContext* cx, proxiesChangeTrigger->trigger(); } } - when(wait(actors.getResult())) { UNSTOPPABLE_ASSERT(false); } + when(wait(actors.getResult())) { + UNSTOPPABLE_ASSERT(false); + } } } } @@ -1534,17 +1536,16 @@ DatabaseContext::DatabaseContext(ReferenceSHARD_STAT_SMOOTH_AMOUNT), + feedPopsFallback("FeedPopsFallback", ccFeed), latencies(), readLatencies(), commitLatencies(), GRVLatencies(), + mutationsPerCommit(), bytesPerCommit(), outstandingWatches(0), sharedStatePtr(nullptr), lastGrvTime(0.0), + cachedReadVersion(0), lastRkBatchThrottleTime(0.0), lastRkDefaultThrottleTime(0.0), lastProxyRequestTime(0.0), + transactionTracingSample(false), taskID(taskID), clientInfo(clientInfo), clientInfoMonitor(clientInfoMonitor), + coordinator(coordinator), apiVersion(_apiVersion), mvCacheInsertLocation(0), healthMetricsLastUpdated(0), + detailedHealthMetricsLastUpdated(0), smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT), specialKeySpace(std::make_unique(specialKeys.begin, specialKeys.end, /* test */ false)), connectToDatabaseEventCacheHolder(format("ConnectToDatabase/%s", dbId.toString().c_str())) { @@ -3422,7 +3423,9 @@ ACTOR Future> getValue(Reference trState, std::vector{ transaction_too_old(), future_version() }); } choose { - when(wait(trState->cx->connectionFileChanged())) { throw transaction_too_old(); } + when(wait(trState->cx->connectionFileChanged())) { + throw transaction_too_old(); + } when(GetValueReply _reply = wait(loadBalance( trState->cx.getPtr(), locationInfo.locations, @@ -3569,7 +3572,9 @@ ACTOR Future getKey(Reference trState, state GetKeyReply reply; try { choose { - when(wait(trState->cx->connectionFileChanged())) { throw transaction_too_old(); } + when(wait(trState->cx->connectionFileChanged())) { + throw transaction_too_old(); + } when(GetKeyReply _reply = wait(loadBalance( trState->cx.getPtr(), locationInfo.locations, @@ -3733,7 +3738,9 @@ ACTOR Future watchValue(Database cx, Reference p TaskPriority::DefaultPromiseEndpoint))) { resp = r; } - when(wait(cx->connectionRecord ? cx->connectionRecord->onChange() : Never())) { wait(Never()); } + when(wait(cx->connectionRecord ? cx->connectionRecord->onChange() : Never())) { + wait(Never()); + } } if (watchValueID.present()) { g_traceBatch.addEvent("WatchValueDebug", watchValueID.get().first(), "NativeAPI.watchValue.After"); @@ -4052,7 +4059,9 @@ Future getExactRange(Reference trState, state GetKeyValuesFamilyReply rep; try { choose { - when(wait(trState->cx->connectionFileChanged())) { throw transaction_too_old(); } + when(wait(trState->cx->connectionFileChanged())) { + throw transaction_too_old(); + } when(GetKeyValuesFamilyReply _rep = wait(loadBalance( trState->cx.getPtr(), locations[shard].locations, @@ -4951,7 +4960,9 @@ ACTOR Future getRangeStreamFragment(Reference trState, return Void(); } - when(GetKeyValuesStreamReply _rep = waitNext(replyStream.getFuture())) { rep = _rep; } + when(GetKeyValuesStreamReply _rep = waitNext(replyStream.getFuture())) { + rep = _rep; + } } ++trState->cx->transactionPhysicalReadsCompleted; } catch (Error& e) { @@ -5444,7 +5455,9 @@ ACTOR Future watch(Reference watch, loop { choose { // NativeAPI watchValue future finishes or errors - when(wait(watch->watchFuture)) { break; } + when(wait(watch->watchFuture)) { + break; + } when(wait(cx->connectionFileChanged())) { CODE_PROBE(true, "Recreated a watch after switch"); @@ -7029,7 +7042,9 @@ ACTOR Future getConsistentReadVersion(SpanContext parentSpa state Future onProxiesChanged = cx->onProxiesChanged(); choose { - when(wait(onProxiesChanged)) { onProxiesChanged = cx->onProxiesChanged(); } + when(wait(onProxiesChanged)) { + onProxiesChanged = cx->onProxiesChanged(); + } when(GetReadVersionReply v = wait(basicLoadBalance(cx->getGrvProxies(UseProvisionalProxies( flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES)), @@ -7455,7 +7470,9 @@ ACTOR Future getClusterProtocolImpl( needToConnect = false; } choose { - when(wait(coordinator->onChange())) { needToConnect = true; } + when(wait(coordinator->onChange())) { + needToConnect = true; + } when(ProtocolVersion pv = wait(protocolVersion)) { if (!expectedVersion.present() || expectedVersion.get() != pv) { @@ -9015,8 +9032,12 @@ ACTOR Future> getCheckpointMetaData(Database cx, } choose { - when(wait(cx->connectionFileChanged())) { cx->invalidateCache(KeyRef(), keys); } - when(wait(waitForAll(futures))) { break; } + when(wait(cx->connectionFileChanged())) { + cx->invalidateCache(KeyRef(), keys); + } + when(wait(waitForAll(futures))) { + break; + } when(wait(delay(timeout))) { TraceEvent("GetCheckpointTimeout").detail("Range", keys).detail("Version", version); } @@ -9663,8 +9684,12 @@ ACTOR Future changeFeedWhenAtLatest(Reference self, Versio // only allowed to use empty versions if you're caught up Future waitEmptyVersion = (self->notAtLatest.get() == 0) ? changeFeedWaitLatest(self, version) : Never(); choose { - when(wait(waitEmptyVersion)) { break; } - when(wait(lastReturned)) { break; } + when(wait(waitEmptyVersion)) { + break; + } + when(wait(lastReturned)) { + break; + } when(wait(self->refresh.getFuture())) {} when(wait(self->notAtLatest.onChange())) {} } diff --git a/fdbclient/include/fdbclient/BlobWorkerCommon.h b/fdbclient/include/fdbclient/BlobWorkerCommon.h index 617f167111..6f44682ec9 100644 --- a/fdbclient/include/fdbclient/BlobWorkerCommon.h +++ b/fdbclient/include/fdbclient/BlobWorkerCommon.h @@ -95,7 +95,10 @@ struct BlobWorkerStats { forceFlushCleanups("ForceFlushCleanups", cc), readDrivenCompactions("ReadDrivenCompactions", cc), numRangesAssigned(0), mutationBytesBuffered(0), activeReadRequests(0), granulesPendingSplitCheck(0), minimumCFVersion(0), cfVersionLag(0), notAtLatestChangeFeeds(0), lastResidentMemory(0), - snapshotBlobWriteLatencySample("SnapshotBlobWriteMetrics", id, sampleLoggingInterval, fileOpLatencySketchAccuracy), + snapshotBlobWriteLatencySample("SnapshotBlobWriteMetrics", + id, + sampleLoggingInterval, + fileOpLatencySketchAccuracy), deltaBlobWriteLatencySample("DeltaBlobWriteMetrics", id, sampleLoggingInterval, fileOpLatencySketchAccuracy), reSnapshotLatencySample("GranuleResnapshotMetrics", id, sampleLoggingInterval, fileOpLatencySketchAccuracy), readLatencySample("GranuleReadLatencyMetrics", id, sampleLoggingInterval, requestLatencySketchAccuracy), diff --git a/fdbclient/include/fdbclient/DatabaseContext.h b/fdbclient/include/fdbclient/DatabaseContext.h index b068157fbf..e7c0d4329f 100644 --- a/fdbclient/include/fdbclient/DatabaseContext.h +++ b/fdbclient/include/fdbclient/DatabaseContext.h @@ -578,8 +578,7 @@ public: Counter feedPops; Counter feedPopsFallback; - DDSketch latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, - bytesPerCommit; + DDSketch latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, bytesPerCommit; int outstandingWatches; int maxOutstandingWatches; diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 1c5ddcab0c..d9796923e3 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1445,7 +1445,9 @@ ACTOR Future updateStorage(TLogData* self) { ACTOR Future updateStorageLoop(TLogData* self) { wait(delay(0, TaskPriority::UpdateStorage)); - loop { wait(updateStorage(self)); } + loop { + wait(updateStorage(self)); + } } void commitMessages(TLogData* self, @@ -1606,7 +1608,9 @@ ACTOR Future waitForMessagesForTag(Reference self, Tag reqTag, Ve // we want the caller to finish first, otherwise the data structure it is building might not be complete wait(delay(0.0)); } - when(wait(delay(timeout))) { self->blockingPeekTimeouts += 1; } + when(wait(delay(timeout))) { + self->blockingPeekTimeouts += 1; + } } return Void(); } @@ -1824,8 +1828,11 @@ Future tLogPeekMessages(PromiseType replyPromise, if (logData->blockingPeekLatencies.find(reqTag) == logData->blockingPeekLatencies.end()) { UID ssID = nondeterministicRandom()->randomUniqueID(); std::string s = "BlockingPeekLatencies-" + reqTag.toString(); - logData->blockingPeekLatencies.try_emplace( - reqTag, s, ssID, SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, SERVER_KNOBS->LATENCY_SKETCH_ACCURACY); + logData->blockingPeekLatencies.try_emplace(reqTag, + s, + ssID, + SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY); } LatencySample& sample = logData->blockingPeekLatencies.at(reqTag); sample.addMeasurement(latency); @@ -2788,7 +2795,9 @@ ACTOR Future pullAsyncData(TLogData* self, while (!endVersion.present() || logData->version.get() < endVersion.get()) { loop { choose { - when(wait(r ? r->getMore(TaskPriority::TLogCommit) : Never())) { break; } + when(wait(r ? r->getMore(TaskPriority::TLogCommit) : Never())) { + break; + } when(wait(dbInfoChange)) { if (logData->logSystem->get()) { r = logData->logSystem->get()->peek(logData->logId, tagAt, endVersion, tags, true); @@ -3269,7 +3278,9 @@ ACTOR Future restorePersistentState(TLogData* self, choose { when(wait(updateStorage(self))) {} - when(wait(allRemoved)) { throw worker_removed(); } + when(wait(allRemoved)) { + throw worker_removed(); + } } } } else { @@ -3280,7 +3291,9 @@ ACTOR Future restorePersistentState(TLogData* self, } } } - when(wait(allRemoved)) { throw worker_removed(); } + when(wait(allRemoved)) { + throw worker_removed(); + } } } } catch (Error& e) { @@ -3626,7 +3639,9 @@ ACTOR Future tLog(IKeyValueStore* persistentData, forwardPromise(req.reply, self.tlogCache.get(req.recruitmentID)); } } - when(wait(error)) { throw internal_error(); } + when(wait(error)) { + throw internal_error(); + } when(wait(activeSharedChange)) { if (activeSharedTLog->get() == tlogId) { TraceEvent("SharedTLogNowActive", self.dbgid).detail("NowActive", activeSharedTLog->get()); From 351525c189b63db4ccdd9f2f3266a66781df6f4c Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Sat, 12 Nov 2022 14:53:23 -0800 Subject: [PATCH 33/57] merge issue --- fdbrpc/include/fdbrpc/Stats.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fdbrpc/include/fdbrpc/Stats.h b/fdbrpc/include/fdbrpc/Stats.h index 897705cfbf..2765916b53 100644 --- a/fdbrpc/include/fdbrpc/Stats.h +++ b/fdbrpc/include/fdbrpc/Stats.h @@ -217,7 +217,8 @@ public: class LatencySample { public: LatencySample(std::string name, UID id, double loggingInterval, double accuracy) - : name(name), id(id), sampleStart(now()), sketch(accuracy) { + : name(name), id(id), sampleStart(now()), sketch(accuracy), + latencySampleEventHolder(makeReference(id.toString() + "/" + name)) { logger = recurring([this]() { logSample(); }, loggingInterval); } From 4783e9fd721fd1be432893b9acbc5a2fb6237218 Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Mon, 14 Nov 2022 09:49:58 -0800 Subject: [PATCH 34/57] fix types --- fdbclient/include/fdbclient/BlobWorkerCommon.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbclient/include/fdbclient/BlobWorkerCommon.h b/fdbclient/include/fdbclient/BlobWorkerCommon.h index 6f44682ec9..9ff3b3b30e 100644 --- a/fdbclient/include/fdbclient/BlobWorkerCommon.h +++ b/fdbclient/include/fdbclient/BlobWorkerCommon.h @@ -75,8 +75,8 @@ struct BlobWorkerStats { Reference resnapshotLock, Reference deltaWritesLock, double sampleLoggingInterval, - int fileOpLatencySketchAccuracy, - int requestLatencySketchAccuracy) + double fileOpLatencySketchAccuracy, + double requestLatencySketchAccuracy) : cc("BlobWorkerStats", id.toString()), s3PutReqs("S3PutReqs", cc), s3GetReqs("S3GetReqs", cc), s3DeleteReqs("S3DeleteReqs", cc), From 92dccdd9eb1bc923dca702718731e39d7633377e Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Mon, 14 Nov 2022 09:50:13 -0800 Subject: [PATCH 35/57] experimental changes to fix out of bounds errors --- fdbrpc/include/fdbrpc/DDSketch.h | 30 ++++++++++++++++++------------ fdbrpc/include/fdbrpc/Stats.h | 4 ++++ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/fdbrpc/include/fdbrpc/DDSketch.h b/fdbrpc/include/fdbrpc/DDSketch.h index a5c2105da4..551c087169 100644 --- a/fdbrpc/include/fdbrpc/DDSketch.h +++ b/fdbrpc/include/fdbrpc/DDSketch.h @@ -89,9 +89,13 @@ public: if (sample <= EPS) { zeroPopulationSize++; } else { - int index = static_cast(this)->getIndex(sample); + size_t index = static_cast(this)->getIndex(sample); assert(index >= 0 && index < buckets.size()); - buckets[index]++; + try { + buckets.at(index)++; + } catch (std::out_of_range const& e) { + fmt::print(stderr, "ERROR: Invalid DDSketch bucket index ({}) at {}/{} for sample: {}\n", e.what(), index, buckets.size(), sample); + } } populationSize++; @@ -119,7 +123,7 @@ public: if (targetPercentilePopulation < zeroPopulationSize) return T(0); - int index = -1; + size_t index = 0; [[maybe_unused]] bool found = false; if (percentile <= 0.5) { // count up uint64_t count = zeroPopulationSize; @@ -152,6 +156,7 @@ public: } } assert(found); + if (!found) return -1; return static_cast(this)->getValue(index); } @@ -194,7 +199,7 @@ protected: uint64_t populationSize, zeroPopulationSize; // we need to separately count 0s std::vector buckets; T minValue, maxValue, sum; - void setBucketSize(int capacity) { buckets.resize(capacity, 0); } + void setBucketSize(size_t capacity) { buckets.resize(capacity, 0); } }; // DDSketch with fast log implementation for float numbers @@ -204,20 +209,21 @@ public: explicit DDSketch(double errorGuarantee = 0.01) : DDSketchBase, T>(errorGuarantee), gamma((1.0 + errorGuarantee) / (1.0 - errorGuarantee)), multiplier(fastLogger::correctingFactor * log(2) / log(gamma)) { + assert(errorGuarantee > 0); offset = getIndex(1.0 / DDSketchBase, T>::EPS); this->setBucketSize(2 * offset); } - int getIndex(T sample) { + size_t getIndex(T sample) { static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems"); return ceil(fastLogger::fastlog(sample) * multiplier) + offset; } - T getValue(int index) { return fastLogger::reverseLog((index - offset) / multiplier) * 2.0 / (1 + gamma); } + T getValue(size_t index) { return fastLogger::reverseLog((index - offset) / multiplier) * 2.0 / (1 + gamma); } private: double gamma, multiplier; - int offset = 0; + size_t offset = 0; }; // DDSketch with log. Slow and only use this when others doesn't work. @@ -231,13 +237,13 @@ public: this->setBucketSize(2 * offset); } - int getIndex(T sample) { return ceil(log(sample) / logGamma) + offset; } + size_t getIndex(T sample) { return ceil(log(sample) / logGamma) + offset; } - T getValue(int index) { return (T)(2.0 * pow(gamma, (index - offset)) / (1 + gamma)); } + T getValue(size_t index) { return (T)(2.0 * pow(gamma, (index - offset)) / (1 + gamma)); } private: double gamma, logGamma; - int offset = 0; + size_t offset = 0; }; // DDSketch for unsigned int. Faster than the float version. Fixed accuracy. @@ -245,7 +251,7 @@ class DDSketchFastUnsigned : public DDSketchBase public: DDSketchFastUnsigned() : DDSketchBase(errorGuarantee) { this->setBucketSize(129); } - int getIndex(unsigned sample) { + size_t getIndex(unsigned sample) { __uint128_t v = sample; v *= v; v *= v; // sample^4 @@ -254,7 +260,7 @@ public: return 128 - (high == 0 ? ((low == 0 ? 64 : __builtin_clzll(low)) + 64) : __builtin_clzll(high)); } - unsigned getValue(int index) { + unsigned getValue(size_t index) { double r = 1, g = gamma; while (index) { // quick power method for power(gamma, index) if (index & 1) diff --git a/fdbrpc/include/fdbrpc/Stats.h b/fdbrpc/include/fdbrpc/Stats.h index 2765916b53..c0564780bb 100644 --- a/fdbrpc/include/fdbrpc/Stats.h +++ b/fdbrpc/include/fdbrpc/Stats.h @@ -219,6 +219,10 @@ public: LatencySample(std::string name, UID id, double loggingInterval, double accuracy) : name(name), id(id), sampleStart(now()), sketch(accuracy), latencySampleEventHolder(makeReference(id.toString() + "/" + name)) { + assert(accuracy > 0); + if (accuracy <= 0) { + fmt::print(stderr, "ERROR: LatencySample {} has invalid accuracy ({})", name, accuracy); + } logger = recurring([this]() { logSample(); }, loggingInterval); } From 0d4915f5ea82690d5353f85cb3d22611258e4314 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Mon, 14 Nov 2022 09:57:27 -0800 Subject: [PATCH 36/57] extra error check case --- fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp index de6f81b9dc..b2ac5ab39b 100644 --- a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp @@ -415,7 +415,7 @@ struct MetaclusterManagementWorkload : TestWorkload { found = true; } } - ASSERT(found); + ASSERT(found || checkEntry2.tenantState != checkState); } catch (Error& e) { if (e.code() != error_code_tenant_not_found) { TraceEvent(SevError, "VerifyListFilterFailure").error(e).detail("Tenant", tenant); From 34b8c5eb2b0fb7b1a35fc7633e86a0c85f683f5c Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Mon, 14 Nov 2022 10:47:45 -0800 Subject: [PATCH 37/57] ENCRYPT_KEY_CACHE_LOGGING_SAMPLE_SIZE -> ENCRYPT_KEY_CACHE_LOGGING_SKETCH_ACCURACY --- fdbclient/BlobCipher.cpp | 10 +++++----- flow/Knobs.cpp | 2 +- flow/include/flow/Knobs.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fdbclient/BlobCipher.cpp b/fdbclient/BlobCipher.cpp index 24bbf3ceb6..1eb338f13c 100644 --- a/fdbclient/BlobCipher.cpp +++ b/fdbclient/BlobCipher.cpp @@ -57,11 +57,11 @@ BlobCipherMetrics::CounterSet::CounterSet(CounterCollection& cc, std::string nam getCipherKeysLatency(name + "GetCipherKeysLatency", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL, - FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SAMPLE_SIZE), + FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SKETCH_ACCURACY), getLatestCipherKeysLatency(name + "GetLatestCipherKeysLatency", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL, - FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SAMPLE_SIZE) {} + FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SKETCH_ACCURACY) {} BlobCipherMetrics::BlobCipherMetrics() : cc("BlobCipher"), cipherKeyCacheHit("CipherKeyCacheHit", cc), cipherKeyCacheMiss("CipherKeyCacheMiss", cc), @@ -71,15 +71,15 @@ BlobCipherMetrics::BlobCipherMetrics() getCipherKeysLatency("GetCipherKeysLatency", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL, - FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SAMPLE_SIZE), + FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SKETCH_ACCURACY), getLatestCipherKeysLatency("GetLatestCipherKeysLatency", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL, - FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SAMPLE_SIZE), + FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SKETCH_ACCURACY), getBlobMetadataLatency("GetBlobMetadataLatency", UID(), FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_INTERVAL, - FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SAMPLE_SIZE), + FLOW_KNOBS->ENCRYPT_KEY_CACHE_LOGGING_SKETCH_ACCURACY), counterSets({ CounterSet(cc, "TLog"), CounterSet(cc, "KVMemory"), CounterSet(cc, "KVRedwood"), diff --git a/flow/Knobs.cpp b/flow/Knobs.cpp index 08bf96e529..7d6132bc6c 100644 --- a/flow/Knobs.cpp +++ b/flow/Knobs.cpp @@ -302,7 +302,7 @@ void FlowKnobs::initialize(Randomize randomize, IsSimulated isSimulated) { if ( randomize && BUGGIFY) { ENCRYPT_KEY_REFRESH_INTERVAL = deterministicRandom()->randomInt(2, 10); } init( TOKEN_CACHE_SIZE, 100 ); init( ENCRYPT_KEY_CACHE_LOGGING_INTERVAL, 5.0 ); - init( ENCRYPT_KEY_CACHE_LOGGING_SAMPLE_SIZE, 1000 ); + init( ENCRYPT_KEY_CACHE_LOGGING_SKETCH_ACCURACY, 0.01 ); // Refer to EncryptUtil::EncryptAuthTokenAlgo for more details init( ENCRYPT_HEADER_AUTH_TOKEN_ENABLED, true ); if ( randomize && BUGGIFY ) { ENCRYPT_HEADER_AUTH_TOKEN_ENABLED = !ENCRYPT_HEADER_AUTH_TOKEN_ENABLED; } init( ENCRYPT_HEADER_AUTH_TOKEN_ALGO, 1 ); if ( randomize && BUGGIFY ) { ENCRYPT_HEADER_AUTH_TOKEN_ALGO = getRandomAuthTokenAlgo(); } diff --git a/flow/include/flow/Knobs.h b/flow/include/flow/Knobs.h index 0ba1e3b4ff..ee163c1038 100644 --- a/flow/include/flow/Knobs.h +++ b/flow/include/flow/Knobs.h @@ -364,7 +364,7 @@ public: int64_t ENCRYPT_CIPHER_KEY_CACHE_TTL; int64_t ENCRYPT_KEY_REFRESH_INTERVAL; double ENCRYPT_KEY_CACHE_LOGGING_INTERVAL; - double ENCRYPT_KEY_CACHE_LOGGING_SAMPLE_SIZE; + double ENCRYPT_KEY_CACHE_LOGGING_SKETCH_ACCURACY; bool ENCRYPT_HEADER_AUTH_TOKEN_ENABLED; int ENCRYPT_HEADER_AUTH_TOKEN_ALGO; From 2c889c411a4103876b85a6148da3e73b5da36910 Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Mon, 14 Nov 2022 10:51:04 -0800 Subject: [PATCH 38/57] add assert of errorGuarantee --- fdbrpc/include/fdbrpc/DDSketch.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fdbrpc/include/fdbrpc/DDSketch.h b/fdbrpc/include/fdbrpc/DDSketch.h index 551c087169..88096b56ce 100644 --- a/fdbrpc/include/fdbrpc/DDSketch.h +++ b/fdbrpc/include/fdbrpc/DDSketch.h @@ -79,7 +79,9 @@ class DDSketchBase { public: explicit DDSketchBase(double errorGuarantee) : errorGuarantee(errorGuarantee), populationSize(0), zeroPopulationSize(0), minValue(defaultMin()), - maxValue(defaultMax()), sum(T()) {} + maxValue(defaultMax()), sum(T()) { + ASSERT(errorGuarantee > 0 && errorGuarantee < 1); + } DDSketchBase& addSample(T sample) { // Call it addSample for now, while it is not a sample anymore From 40fa959f8dfab3433e124eb21e8788b7ed255c75 Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Mon, 14 Nov 2022 10:51:20 -0800 Subject: [PATCH 39/57] assert -> ASSERT --- fdbrpc/include/fdbrpc/DDSketch.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fdbrpc/include/fdbrpc/DDSketch.h b/fdbrpc/include/fdbrpc/DDSketch.h index 88096b56ce..68fa63651f 100644 --- a/fdbrpc/include/fdbrpc/DDSketch.h +++ b/fdbrpc/include/fdbrpc/DDSketch.h @@ -92,7 +92,7 @@ public: zeroPopulationSize++; } else { size_t index = static_cast(this)->getIndex(sample); - assert(index >= 0 && index < buckets.size()); + ASSERT(index >= 0 && index < buckets.size()); try { buckets.at(index)++; } catch (std::out_of_range const& e) { @@ -116,7 +116,7 @@ public: T median() { return percentile(0.5); } T percentile(double percentile) { - assert(percentile >= 0 && percentile <= 1); + ASSERT(percentile >= 0 && percentile <= 1); if (populationSize == 0) return T(); @@ -157,7 +157,7 @@ public: count += *rit; } } - assert(found); + ASSERT(found); if (!found) return -1; return static_cast(this)->getValue(index); } @@ -181,7 +181,7 @@ public: DDSketchBase& mergeWith(const DDSketchBase& anotherSketch) { // Must have the same guarantee - assert(fabs(errorGuarantee - anotherSketch.errorGuarantee) < EPS && + ASSERT(fabs(errorGuarantee - anotherSketch.errorGuarantee) < EPS && anotherSketch.buckets.size() == buckets.size()); for (size_t i = 0; i < anotherSketch.buckets.size(); i++) { buckets[i] += anotherSketch.buckets[i]; @@ -211,13 +211,13 @@ public: explicit DDSketch(double errorGuarantee = 0.01) : DDSketchBase, T>(errorGuarantee), gamma((1.0 + errorGuarantee) / (1.0 - errorGuarantee)), multiplier(fastLogger::correctingFactor * log(2) / log(gamma)) { - assert(errorGuarantee > 0); + ASSERT(errorGuarantee > 0); offset = getIndex(1.0 / DDSketchBase, T>::EPS); this->setBucketSize(2 * offset); } size_t getIndex(T sample) { - static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems"); + static_ASSERT(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems"); return ceil(fastLogger::fastlog(sample) * multiplier) + offset; } From 4254429e73f2a1623b7785334ec54b995e3c5430 Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Mon, 14 Nov 2022 11:15:35 -0800 Subject: [PATCH 40/57] capitlization --- fdbrpc/include/fdbrpc/DDSketch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbrpc/include/fdbrpc/DDSketch.h b/fdbrpc/include/fdbrpc/DDSketch.h index 68fa63651f..d657ef9f23 100644 --- a/fdbrpc/include/fdbrpc/DDSketch.h +++ b/fdbrpc/include/fdbrpc/DDSketch.h @@ -217,7 +217,7 @@ public: } size_t getIndex(T sample) { - static_ASSERT(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems"); + static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems"); return ceil(fastLogger::fastlog(sample) * multiplier) + offset; } From 47a8cebfb3e1ad168d2128aabc2a7fadd961815a Mon Sep 17 00:00:00 2001 From: Kevin Hoxha Date: Mon, 14 Nov 2022 11:41:06 -0800 Subject: [PATCH 41/57] ddsketch: Make sure that all ctors use 0 < error < 1 --- fdbclient/NativeAPI.actor.cpp | 62 ++++++------------- fdbrpc/include/fdbrpc/DDSketch.h | 13 +++- fdbrpc/include/fdbrpc/TSSComparison.h | 6 +- .../workloads/ReadWriteWorkload.actor.h | 8 +-- 4 files changed, 35 insertions(+), 54 deletions(-) diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 70b1c261a3..8198318154 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -1016,9 +1016,7 @@ ACTOR static Future monitorClientDBInfoChange(DatabaseContext* cx, proxiesChangeTrigger->trigger(); } } - when(wait(actors.getResult())) { - UNSTOPPABLE_ASSERT(false); - } + when(wait(actors.getResult())) { UNSTOPPABLE_ASSERT(false); } } } } @@ -1839,13 +1837,13 @@ DatabaseContext::DatabaseContext(const Error& err) ccBG("BlobGranuleReadMetrics"), bgReadInputBytes("BGReadInputBytes", ccBG), bgReadOutputBytes("BGReadOutputBytes", ccBG), bgReadSnapshotRows("BGReadSnapshotRows", ccBG), bgReadRowsCleared("BGReadRowsCleared", ccBG), bgReadRowsInserted("BGReadRowsInserted", ccBG), - bgReadRowsUpdated("BGReadRowsUpdated", ccBG), bgLatencies(1000), bgGranulesPerRequest(1000), - usedAnyChangeFeeds(false), ccFeed("ChangeFeedClientMetrics"), feedStreamStarts("FeedStreamStarts", ccFeed), + bgReadRowsUpdated("BGReadRowsUpdated", ccBG), bgLatencies(), bgGranulesPerRequest(), usedAnyChangeFeeds(false), + ccFeed("ChangeFeedClientMetrics"), feedStreamStarts("FeedStreamStarts", ccFeed), feedMergeStreamStarts("FeedMergeStreamStarts", ccFeed), feedErrors("FeedErrors", ccFeed), feedNonRetriableErrors("FeedNonRetriableErrors", ccFeed), feedPops("FeedPops", ccFeed), - feedPopsFallback("FeedPopsFallback", ccFeed), latencies(1000), readLatencies(1000), commitLatencies(1000), - GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), sharedStatePtr(nullptr), - transactionTracingSample(false), smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT), + feedPopsFallback("FeedPopsFallback", ccFeed), latencies(), readLatencies(), commitLatencies(), GRVLatencies(), + mutationsPerCommit(), bytesPerCommit(), sharedStatePtr(nullptr), transactionTracingSample(false), + smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT), connectToDatabaseEventCacheHolder(format("ConnectToDatabase/%s", dbId.toString().c_str())) {} // Static constructor used by server processes to create a DatabaseContext @@ -3423,9 +3421,7 @@ ACTOR Future> getValue(Reference trState, std::vector{ transaction_too_old(), future_version() }); } choose { - when(wait(trState->cx->connectionFileChanged())) { - throw transaction_too_old(); - } + when(wait(trState->cx->connectionFileChanged())) { throw transaction_too_old(); } when(GetValueReply _reply = wait(loadBalance( trState->cx.getPtr(), locationInfo.locations, @@ -3572,9 +3568,7 @@ ACTOR Future getKey(Reference trState, state GetKeyReply reply; try { choose { - when(wait(trState->cx->connectionFileChanged())) { - throw transaction_too_old(); - } + when(wait(trState->cx->connectionFileChanged())) { throw transaction_too_old(); } when(GetKeyReply _reply = wait(loadBalance( trState->cx.getPtr(), locationInfo.locations, @@ -3738,9 +3732,7 @@ ACTOR Future watchValue(Database cx, Reference p TaskPriority::DefaultPromiseEndpoint))) { resp = r; } - when(wait(cx->connectionRecord ? cx->connectionRecord->onChange() : Never())) { - wait(Never()); - } + when(wait(cx->connectionRecord ? cx->connectionRecord->onChange() : Never())) { wait(Never()); } } if (watchValueID.present()) { g_traceBatch.addEvent("WatchValueDebug", watchValueID.get().first(), "NativeAPI.watchValue.After"); @@ -4059,9 +4051,7 @@ Future getExactRange(Reference trState, state GetKeyValuesFamilyReply rep; try { choose { - when(wait(trState->cx->connectionFileChanged())) { - throw transaction_too_old(); - } + when(wait(trState->cx->connectionFileChanged())) { throw transaction_too_old(); } when(GetKeyValuesFamilyReply _rep = wait(loadBalance( trState->cx.getPtr(), locations[shard].locations, @@ -4960,9 +4950,7 @@ ACTOR Future getRangeStreamFragment(Reference trState, return Void(); } - when(GetKeyValuesStreamReply _rep = waitNext(replyStream.getFuture())) { - rep = _rep; - } + when(GetKeyValuesStreamReply _rep = waitNext(replyStream.getFuture())) { rep = _rep; } } ++trState->cx->transactionPhysicalReadsCompleted; } catch (Error& e) { @@ -5455,9 +5443,7 @@ ACTOR Future watch(Reference watch, loop { choose { // NativeAPI watchValue future finishes or errors - when(wait(watch->watchFuture)) { - break; - } + when(wait(watch->watchFuture)) { break; } when(wait(cx->connectionFileChanged())) { CODE_PROBE(true, "Recreated a watch after switch"); @@ -7042,9 +7028,7 @@ ACTOR Future getConsistentReadVersion(SpanContext parentSpa state Future onProxiesChanged = cx->onProxiesChanged(); choose { - when(wait(onProxiesChanged)) { - onProxiesChanged = cx->onProxiesChanged(); - } + when(wait(onProxiesChanged)) { onProxiesChanged = cx->onProxiesChanged(); } when(GetReadVersionReply v = wait(basicLoadBalance(cx->getGrvProxies(UseProvisionalProxies( flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES)), @@ -7470,9 +7454,7 @@ ACTOR Future getClusterProtocolImpl( needToConnect = false; } choose { - when(wait(coordinator->onChange())) { - needToConnect = true; - } + when(wait(coordinator->onChange())) { needToConnect = true; } when(ProtocolVersion pv = wait(protocolVersion)) { if (!expectedVersion.present() || expectedVersion.get() != pv) { @@ -9032,12 +9014,8 @@ ACTOR Future> getCheckpointMetaData(Database cx, } choose { - when(wait(cx->connectionFileChanged())) { - cx->invalidateCache(KeyRef(), keys); - } - when(wait(waitForAll(futures))) { - break; - } + when(wait(cx->connectionFileChanged())) { cx->invalidateCache(KeyRef(), keys); } + when(wait(waitForAll(futures))) { break; } when(wait(delay(timeout))) { TraceEvent("GetCheckpointTimeout").detail("Range", keys).detail("Version", version); } @@ -9684,12 +9662,8 @@ ACTOR Future changeFeedWhenAtLatest(Reference self, Versio // only allowed to use empty versions if you're caught up Future waitEmptyVersion = (self->notAtLatest.get() == 0) ? changeFeedWaitLatest(self, version) : Never(); choose { - when(wait(waitEmptyVersion)) { - break; - } - when(wait(lastReturned)) { - break; - } + when(wait(waitEmptyVersion)) { break; } + when(wait(lastReturned)) { break; } when(wait(self->refresh.getFuture())) {} when(wait(self->notAtLatest.onChange())) {} } diff --git a/fdbrpc/include/fdbrpc/DDSketch.h b/fdbrpc/include/fdbrpc/DDSketch.h index 68fa63651f..d17508622e 100644 --- a/fdbrpc/include/fdbrpc/DDSketch.h +++ b/fdbrpc/include/fdbrpc/DDSketch.h @@ -96,7 +96,12 @@ public: try { buckets.at(index)++; } catch (std::out_of_range const& e) { - fmt::print(stderr, "ERROR: Invalid DDSketch bucket index ({}) at {}/{} for sample: {}\n", e.what(), index, buckets.size(), sample); + fmt::print(stderr, + "ERROR: Invalid DDSketch bucket index ({}) at {}/{} for sample: {}\n", + e.what(), + index, + buckets.size(), + sample); } } @@ -158,7 +163,8 @@ public: } } ASSERT(found); - if (!found) return -1; + if (!found) + return -1; return static_cast(this)->getValue(index); } @@ -213,11 +219,12 @@ public: multiplier(fastLogger::correctingFactor * log(2) / log(gamma)) { ASSERT(errorGuarantee > 0); offset = getIndex(1.0 / DDSketchBase, T>::EPS); + ASSERT(offset > 0); this->setBucketSize(2 * offset); } size_t getIndex(T sample) { - static_ASSERT(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems"); + static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Do not support non-little-endian systems"); return ceil(fastLogger::fastlog(sample) * multiplier) + offset; } diff --git a/fdbrpc/include/fdbrpc/TSSComparison.h b/fdbrpc/include/fdbrpc/TSSComparison.h index 7fcc84499b..e3b20cb6e5 100644 --- a/fdbrpc/include/fdbrpc/TSSComparison.h +++ b/fdbrpc/include/fdbrpc/TSSComparison.h @@ -105,9 +105,9 @@ struct TSSMetrics : ReferenceCounted, NonCopyable { TSSMetrics() : cc("TSSClientMetrics"), requests("Requests", cc), streamComparisons("StreamComparisons", cc), ssErrors("SSErrors", cc), tssErrors("TSSErrors", cc), tssTimeouts("TSSTimeouts", cc), - mismatches("Mismatches", cc), SSgetValueLatency(1000), SSgetKeyLatency(1000), SSgetKeyValuesLatency(1000), - SSgetMappedKeyValuesLatency(1000), TSSgetValueLatency(1000), TSSgetKeyLatency(1000), - TSSgetKeyValuesLatency(1000), TSSgetMappedKeyValuesLatency(1000) {} + mismatches("Mismatches", cc), SSgetValueLatency(), SSgetKeyLatency(), SSgetKeyValuesLatency(), + SSgetMappedKeyValuesLatency(), TSSgetValueLatency(), TSSgetKeyLatency(), TSSgetKeyValuesLatency(), + TSSgetMappedKeyValuesLatency() {} }; template diff --git a/fdbserver/include/fdbserver/workloads/ReadWriteWorkload.actor.h b/fdbserver/include/fdbserver/workloads/ReadWriteWorkload.actor.h index 5323235795..12ff6a1844 100644 --- a/fdbserver/include/fdbserver/workloads/ReadWriteWorkload.actor.h +++ b/fdbserver/include/fdbserver/workloads/ReadWriteWorkload.actor.h @@ -47,7 +47,7 @@ DESCR struct ReadMetric { // Common ReadWrite test settings struct ReadWriteCommon : KVWorkload { - static constexpr int sampleSize = 10000; + static constexpr double sampleError = 0.01; friend struct ReadWriteCommonImpl; // general test setting @@ -88,9 +88,9 @@ struct ReadWriteCommon : KVWorkload { explicit ReadWriteCommon(WorkloadContext const& wcx) : KVWorkload(wcx), totalReadsMetric("ReadWrite.TotalReads"_sr), totalRetriesMetric("ReadWrite.TotalRetries"_sr), - aTransactions("A Transactions"), bTransactions("B Transactions"), retries("Retries"), latencies(sampleSize), - readLatencies(sampleSize), commitLatencies(sampleSize), GRVLatencies(sampleSize), fullReadLatencies(sampleSize), - readLatencyTotal(0), readLatencyCount(0), loadTime(0.0), clientBegin(0) { + aTransactions("A Transactions"), bTransactions("B Transactions"), retries("Retries"), latencies(sampleError), + readLatencies(sampleError), commitLatencies(sampleError), GRVLatencies(sampleError), + fullReadLatencies(sampleError), readLatencyTotal(0), readLatencyCount(0), loadTime(0.0), clientBegin(0) { transactionSuccessMetric.init("ReadWrite.SuccessfulTransaction"_sr); transactionFailureMetric.init("ReadWrite.FailedTransaction"_sr); From 458daa33925e7d908a7c1123a23deabcfc47dcf1 Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Mon, 14 Nov 2022 11:47:32 -0800 Subject: [PATCH 42/57] Apply suggestions from code review Co-authored-by: Trevor Clinkenbeard --- fdbserver/include/fdbserver/workloads/MockDDTest.h | 2 +- fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp | 2 +- fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fdbserver/include/fdbserver/workloads/MockDDTest.h b/fdbserver/include/fdbserver/workloads/MockDDTest.h index 133f0b582e..ec7a449a6f 100644 --- a/fdbserver/include/fdbserver/workloads/MockDDTest.h +++ b/fdbserver/include/fdbserver/workloads/MockDDTest.h @@ -1,5 +1,5 @@ /* - * MockDDTest.g + * MockDDTest.h * * This source file is part of the FoundationDB open source project * diff --git a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp index 04b73e9a09..2e364054a7 100644 --- a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp +++ b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp @@ -261,7 +261,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { return Void(); } - void verifyServerKeyDest(MoveKeysParams& params) { + void verifyServerKeyDest(MoveKeysParams& params) const { // check destination servers for (auto& id : params.destinationTeam) { ASSERT(mgs->serverIsDestForShard(id, params.keys)); diff --git a/fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp b/fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp index 5988d15c64..f0d7a542bb 100644 --- a/fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp +++ b/fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp @@ -177,8 +177,8 @@ struct MockDDTrackerShardEvaluatorWorkload : public MockDDTestWorkload { } void getMetrics(std::vector& m) override { - for (auto& p : rsReasonCounts) { - m.push_back(PerfMetric(RelocateReason(p.first).toString(), p.second, Averaged::False)); + for (const auto& [reason, count] : rsReasonCounts) { + m.push_back(PerfMetric(RelocateReason(reason).toString(), count, Averaged::False)); } } }; From 410b4375d2ae1aa1483d3ddef2ca61a72a732e97 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Mon, 14 Nov 2022 13:08:56 -0800 Subject: [PATCH 43/57] change fdbcli_tests.py --- fdbcli/tests/fdbcli_tests.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fdbcli/tests/fdbcli_tests.py b/fdbcli/tests/fdbcli_tests.py index da52842d23..3ba5e54da2 100755 --- a/fdbcli/tests/fdbcli_tests.py +++ b/fdbcli/tests/fdbcli_tests.py @@ -771,7 +771,7 @@ def tenant_list(logger): output = run_fdbcli_command('tenant list') assert output == '1. tenant\n 2. tenant2' - output = run_fdbcli_command('tenant list a z 1') + output = run_fdbcli_command('tenant list a z limit=1') assert output == '1. tenant' output = run_fdbcli_command('tenant list a tenant2') @@ -786,9 +786,15 @@ def tenant_list(logger): output = run_fdbcli_command_and_get_error('tenant list b a') assert output == 'ERROR: end must be larger than begin' - output = run_fdbcli_command_and_get_error('tenant list a b 12x') + output = run_fdbcli_command_and_get_error('tenant list a b limit=12x') assert output == 'ERROR: invalid limit `12x\'' + output = run_fdbcli_command_and_get_error('tenant list a b offset=13y') + assert output == 'ERROR: invalid offset `13y\'' + + output = run_fdbcli_command_and_get_error('tenant list a b state=14z') + assert output == 'ERROR: unrecognized tenant state(s) `14z\'' + @enable_logging() def tenant_get(logger): setup_tenants(['tenant', 'tenant2 tenant_group=tenant_group2']) From f997e737589030ef36e954f866f29d8f24918f0e Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Mon, 14 Nov 2022 12:31:13 -0800 Subject: [PATCH 44/57] rename variable and solve some light comments --- fdbclient/FDBTypes.cpp | 2 +- fdbclient/ServerKnobs.cpp | 4 +- fdbclient/include/fdbclient/ServerKnobs.h | 2 +- .../fdbclient/StorageServerInterface.h | 18 ++++----- fdbserver/BlobManager.actor.cpp | 12 +++--- fdbserver/BlobWorker.actor.cpp | 2 +- fdbserver/DDShardTracker.actor.cpp | 40 +++++++++---------- fdbserver/DDTxnProcessor.actor.cpp | 2 +- fdbserver/DataDistribution.actor.cpp | 6 +-- fdbserver/MockGlobalState.actor.cpp | 20 +++++----- fdbserver/StorageMetrics.actor.cpp | 27 +++++++------ .../fdbserver/DataDistribution.actor.h | 1 + .../include/fdbserver/StorageMetrics.actor.h | 2 +- .../include/fdbserver/workloads/MockDDTest.h | 4 +- fdbserver/storageserver.actor.cpp | 11 ++--- .../IDDTxnProcessorApiCorrectness.actor.cpp | 12 +++--- .../MockDDTrackerShardEvaluator.actor.cpp | 4 +- 17 files changed, 88 insertions(+), 81 deletions(-) diff --git a/fdbclient/FDBTypes.cpp b/fdbclient/FDBTypes.cpp index e83630596b..9e9401df65 100644 --- a/fdbclient/FDBTypes.cpp +++ b/fdbclient/FDBTypes.cpp @@ -66,7 +66,7 @@ Key randomKeyBetween(const KeyRangeRef& keys) { break; } } - ASSERT(pos < end.size()); // otherwise, begin >= end + ASSERT_LT(pos, end.size()); // otherwise, begin >= end // find the lowest char in range begin[pos+1, begin.size()) that is not \xff (255) int lowest = begin.size() - 1; diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index d0662bc5ce..a29efbe76a 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -224,7 +224,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi shards. The bandwidth sample maintained by the storage server needs to be accurate enough to reliably measure this minimum bandwidth. See - BYTES_WRITE_UNITS_PER_SAMPLE. If this number is too low, the storage server needs to spend more memory and time on sampling. + BYTES_WRITTEN_UNITS_PER_SAMPLE. If this number is too low, the storage server needs to spend more memory and time on sampling. */ init( SHARD_SPLIT_BYTES_PER_KSEC, 250 * 1000 * 1000 ); if( buggifySmallBandwidthSplit ) SHARD_SPLIT_BYTES_PER_KSEC = 50 * 1000 * 1000; @@ -757,7 +757,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS, 1000.0 / STORAGE_METRICS_AVERAGE_INTERVAL ); // milliHz! init( SPLIT_JITTER_AMOUNT, 0.05 ); if( randomize && BUGGIFY ) SPLIT_JITTER_AMOUNT = 0.2; init( IOPS_UNITS_PER_SAMPLE, 10000 * 1000 / STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS / 100 ); - init( BYTES_WRITE_UNITS_PER_SAMPLE, SHARD_MIN_BYTES_PER_KSEC / STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS / 25 ); + init( BYTES_WRITTEN_UNITS_PER_SAMPLE, SHARD_MIN_BYTES_PER_KSEC / STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS / 25 ); init( BYTES_READ_UNITS_PER_SAMPLE, 100000 ); // 100K bytes init( READ_HOT_SUB_RANGE_CHUNK_SIZE, 10000000); // 10MB init( EMPTY_READ_PENALTY, 20 ); // 20 bytes diff --git a/fdbclient/include/fdbclient/ServerKnobs.h b/fdbclient/include/fdbclient/ServerKnobs.h index 3064a09c01..6ebee8765b 100644 --- a/fdbclient/include/fdbclient/ServerKnobs.h +++ b/fdbclient/include/fdbclient/ServerKnobs.h @@ -706,7 +706,7 @@ public: double STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; double SPLIT_JITTER_AMOUNT; int64_t IOPS_UNITS_PER_SAMPLE; - int64_t BYTES_WRITE_UNITS_PER_SAMPLE; + int64_t BYTES_WRITTEN_UNITS_PER_SAMPLE; int64_t BYTES_READ_UNITS_PER_SAMPLE; int64_t READ_HOT_SUB_RANGE_CHUNK_SIZE; int64_t EMPTY_READ_PENALTY; diff --git a/fdbclient/include/fdbclient/StorageServerInterface.h b/fdbclient/include/fdbclient/StorageServerInterface.h index a1b6e0ce08..5b27b20776 100644 --- a/fdbclient/include/fdbclient/StorageServerInterface.h +++ b/fdbclient/include/fdbclient/StorageServerInterface.h @@ -634,7 +634,7 @@ struct GetShardStateRequest { struct StorageMetrics { constexpr static FileIdentifier file_identifier = 13622226; int64_t bytes = 0; // total storage - int64_t writeBytesPerKSecond = 0; // bytes write to SQ + int64_t bytesWrittenPerKSecond = 0; // bytes write to SQ // FIXME: currently, iosPerKSecond is not used in DataDistribution calculations. int64_t iosPerKSecond = 0; @@ -643,33 +643,33 @@ struct StorageMetrics { static const int64_t infinity = 1LL << 60; bool allLessOrEqual(const StorageMetrics& rhs) const { - return bytes <= rhs.bytes && writeBytesPerKSecond <= rhs.writeBytesPerKSecond && + return bytes <= rhs.bytes && bytesWrittenPerKSecond <= rhs.bytesWrittenPerKSecond && iosPerKSecond <= rhs.iosPerKSecond && bytesReadPerKSecond <= rhs.bytesReadPerKSecond; } void operator+=(const StorageMetrics& rhs) { bytes += rhs.bytes; - writeBytesPerKSecond += rhs.writeBytesPerKSecond; + bytesWrittenPerKSecond += rhs.bytesWrittenPerKSecond; iosPerKSecond += rhs.iosPerKSecond; bytesReadPerKSecond += rhs.bytesReadPerKSecond; } void operator-=(const StorageMetrics& rhs) { bytes -= rhs.bytes; - writeBytesPerKSecond -= rhs.writeBytesPerKSecond; + bytesWrittenPerKSecond -= rhs.bytesWrittenPerKSecond; iosPerKSecond -= rhs.iosPerKSecond; bytesReadPerKSecond -= rhs.bytesReadPerKSecond; } template void operator*=(F f) { bytes *= f; - writeBytesPerKSecond *= f; + bytesWrittenPerKSecond *= f; iosPerKSecond *= f; bytesReadPerKSecond *= f; } - bool allZero() const { return !bytes && !writeBytesPerKSecond && !iosPerKSecond && !bytesReadPerKSecond; } + bool allZero() const { return !bytes && !bytesWrittenPerKSecond && !iosPerKSecond && !bytesReadPerKSecond; } template void serialize(Ar& ar) { - serializer(ar, bytes, writeBytesPerKSecond, iosPerKSecond, bytesReadPerKSecond); + serializer(ar, bytes, bytesWrittenPerKSecond, iosPerKSecond, bytesReadPerKSecond); } void negate() { operator*=(-1.0); } @@ -697,14 +697,14 @@ struct StorageMetrics { } bool operator==(StorageMetrics const& rhs) const { - return bytes == rhs.bytes && writeBytesPerKSecond == rhs.writeBytesPerKSecond && + return bytes == rhs.bytes && bytesWrittenPerKSecond == rhs.bytesWrittenPerKSecond && iosPerKSecond == rhs.iosPerKSecond && bytesReadPerKSecond == rhs.bytesReadPerKSecond; } std::string toString() const { return format("Bytes: %lld, BWritePerKSec: %lld, iosPerKSec: %lld, BReadPerKSec: %lld", bytes, - writeBytesPerKSecond, + bytesWrittenPerKSecond, iosPerKSecond, bytesReadPerKSecond); } diff --git a/fdbserver/BlobManager.actor.cpp b/fdbserver/BlobManager.actor.cpp index 140705be74..243bd44ac4 100644 --- a/fdbserver/BlobManager.actor.cpp +++ b/fdbserver/BlobManager.actor.cpp @@ -636,12 +636,12 @@ ACTOR Future splitRange(Reference bmDat // only split on bytes and write rate state StorageMetrics splitMetrics; splitMetrics.bytes = SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES; - splitMetrics.writeBytesPerKSecond = SERVER_KNOBS->SHARD_SPLIT_BYTES_PER_KSEC; + splitMetrics.bytesWrittenPerKSecond = SERVER_KNOBS->SHARD_SPLIT_BYTES_PER_KSEC; if (writeHot) { - splitMetrics.writeBytesPerKSecond = - std::min(splitMetrics.writeBytesPerKSecond, estimated.writeBytesPerKSecond / 2); - splitMetrics.writeBytesPerKSecond = - std::max(splitMetrics.writeBytesPerKSecond, SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC); + splitMetrics.bytesWrittenPerKSecond = + std::min(splitMetrics.bytesWrittenPerKSecond, estimated.bytesWrittenPerKSecond / 2); + splitMetrics.bytesWrittenPerKSecond = + std::max(splitMetrics.bytesWrittenPerKSecond, SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC); } splitMetrics.iosPerKSecond = splitMetrics.infinity; splitMetrics.bytesReadPerKSecond = splitMetrics.infinity; @@ -2617,7 +2617,7 @@ ACTOR Future attemptMerges(Reference bmData, wait(bmData->db->getStorageMetrics(std::get<1>(candidates[i]), CLIENT_KNOBS->TOO_MANY)); if (metrics.bytes >= SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES || - metrics.writeBytesPerKSecond >= SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) { + metrics.bytesWrittenPerKSecond >= SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) { // This granule cannot be merged with any neighbors. // If current candidates up to here can be merged, merge them and skip over this one attemptStartMerge(bmData, currentCandidates); diff --git a/fdbserver/BlobWorker.actor.cpp b/fdbserver/BlobWorker.actor.cpp index 4797730254..5e81bd5e72 100644 --- a/fdbserver/BlobWorker.actor.cpp +++ b/fdbserver/BlobWorker.actor.cpp @@ -1632,7 +1632,7 @@ ACTOR Future granuleCheckMergeCandidate(Reference bwData, // FIXME: maybe separate knob and/or value for write rate? if (currentMetrics.bytes >= SERVER_KNOBS->BG_SNAPSHOT_FILE_TARGET_BYTES / 2 || - currentMetrics.writeBytesPerKSecond >= SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) { + currentMetrics.bytesWrittenPerKSecond >= SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) { wait(delayJittered(SERVER_KNOBS->BG_MERGE_CANDIDATE_THRESHOLD_SECONDS / 2.0)); CODE_PROBE(true, "wait and check later to see if granule got smaller or colder"); continue; diff --git a/fdbserver/DDShardTracker.actor.cpp b/fdbserver/DDShardTracker.actor.cpp index 7457ba81b4..d879cf7cc1 100644 --- a/fdbserver/DDShardTracker.actor.cpp +++ b/fdbserver/DDShardTracker.actor.cpp @@ -41,9 +41,9 @@ enum BandwidthStatus { BandwidthStatusLow, BandwidthStatusNormal, BandwidthStatu enum ReadBandwidthStatus { ReadBandwidthStatusNormal, ReadBandwidthStatusHigh }; BandwidthStatus getBandwidthStatus(StorageMetrics const& metrics) { - if (metrics.writeBytesPerKSecond > SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC) + if (metrics.bytesWrittenPerKSecond > SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC) return BandwidthStatusHigh; - else if (metrics.writeBytesPerKSecond < SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) + else if (metrics.bytesWrittenPerKSecond < SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC) return BandwidthStatusLow; return BandwidthStatusNormal; @@ -176,7 +176,7 @@ ShardSizeBounds getShardSizeBounds(KeyRangeRef shard, int64_t maxShardSize) { bounds.max.bytes = maxShardSize; } - bounds.max.writeBytesPerKSecond = bounds.max.infinity; + bounds.max.bytesWrittenPerKSecond = bounds.max.infinity; bounds.max.iosPerKSecond = bounds.max.infinity; bounds.max.bytesReadPerKSecond = bounds.max.infinity; @@ -187,14 +187,14 @@ ShardSizeBounds getShardSizeBounds(KeyRangeRef shard, int64_t maxShardSize) { bounds.min.bytes = maxShardSize / SERVER_KNOBS->SHARD_BYTES_RATIO; } - bounds.min.writeBytesPerKSecond = 0; + bounds.min.bytesWrittenPerKSecond = 0; bounds.min.iosPerKSecond = 0; bounds.min.bytesReadPerKSecond = 0; // The permitted error is 1/3 of the general-case minimum bytes (even in the special case where this is the last // shard) bounds.permittedError.bytes = bounds.max.bytes / SERVER_KNOBS->SHARD_BYTES_RATIO / 3; - bounds.permittedError.writeBytesPerKSecond = bounds.permittedError.infinity; + bounds.permittedError.bytesWrittenPerKSecond = bounds.permittedError.infinity; bounds.permittedError.iosPerKSecond = bounds.permittedError.infinity; bounds.permittedError.bytesReadPerKSecond = bounds.permittedError.infinity; @@ -222,18 +222,18 @@ ShardSizeBounds calculateShardSizeBounds(const KeyRange& keys, std::max(int64_t(bytes - (SERVER_KNOBS->MIN_SHARD_BYTES * 0.1)), (int64_t)0)); bounds.permittedError.bytes = bytes * 0.1; if (bandwidthStatus == BandwidthStatusNormal) { // Not high or low - bounds.max.writeBytesPerKSecond = SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC; - bounds.min.writeBytesPerKSecond = SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC; - bounds.permittedError.writeBytesPerKSecond = bounds.min.writeBytesPerKSecond / 4; + bounds.max.bytesWrittenPerKSecond = SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC; + bounds.min.bytesWrittenPerKSecond = SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC; + bounds.permittedError.bytesWrittenPerKSecond = bounds.min.bytesWrittenPerKSecond / 4; } else if (bandwidthStatus == BandwidthStatusHigh) { // > 10MB/sec for 100MB shard, proportionally lower // for smaller shard, > 200KB/sec no matter what - bounds.max.writeBytesPerKSecond = bounds.max.infinity; - bounds.min.writeBytesPerKSecond = SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC; - bounds.permittedError.writeBytesPerKSecond = bounds.min.writeBytesPerKSecond / 4; + bounds.max.bytesWrittenPerKSecond = bounds.max.infinity; + bounds.min.bytesWrittenPerKSecond = SERVER_KNOBS->SHARD_MAX_BYTES_PER_KSEC; + bounds.permittedError.bytesWrittenPerKSecond = bounds.min.bytesWrittenPerKSecond / 4; } else if (bandwidthStatus == BandwidthStatusLow) { // < 10KB/sec - bounds.max.writeBytesPerKSecond = SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC; - bounds.min.writeBytesPerKSecond = 0; - bounds.permittedError.writeBytesPerKSecond = bounds.max.writeBytesPerKSecond / 4; + bounds.max.bytesWrittenPerKSecond = SERVER_KNOBS->SHARD_MIN_BYTES_PER_KSEC; + bounds.min.bytesWrittenPerKSecond = 0; + bounds.permittedError.bytesWrittenPerKSecond = bounds.max.bytesWrittenPerKSecond / 4; } else { ASSERT(false); } @@ -306,12 +306,12 @@ ACTOR Future trackShardMetrics(DataDistributionTracker::SafeAccessor self, /*TraceEvent("ShardSizeUpdate") .detail("Keys", keys) .detail("UpdatedSize", metrics.metrics.bytes) - .detail("Bandwidth", metrics.metrics.writeBytesPerKSecond) + .detail("WriteBandwidth", metrics.metrics.bytesWrittenPerKSecond) .detail("BandwidthStatus", getBandwidthStatus(metrics)) .detail("BytesLower", bounds.min.bytes) .detail("BytesUpper", bounds.max.bytes) - .detail("BandwidthLower", bounds.min.writeBytesPerKSecond) - .detail("BandwidthUpper", bounds.max.writeBytesPerKSecond) + .detail("WriteBandwidthLower", bounds.min.bytesWrittenPerKSecond) + .detail("WriteBandwidthUpper", bounds.max.bytesWrittenPerKSecond) .detail("ShardSizePresent", shardSize->get().present()) .detail("OldShardSize", shardSize->get().present() ? shardSize->get().get().metrics.bytes : 0) .detail("TrackerID", trackerID);*/ @@ -879,7 +879,7 @@ ACTOR Future shardSplitter(DataDistributionTracker* self, StorageMetrics splitMetrics; splitMetrics.bytes = shardBounds.max.bytes / 2; - splitMetrics.writeBytesPerKSecond = + splitMetrics.bytesWrittenPerKSecond = keys.begin >= keyServersKeys.begin ? splitMetrics.infinity : SERVER_KNOBS->SHARD_SPLIT_BYTES_PER_KSEC; splitMetrics.iosPerKSecond = splitMetrics.infinity; splitMetrics.bytesReadPerKSecond = splitMetrics.infinity; // Don't split by readBandwidth @@ -902,7 +902,7 @@ ACTOR Future shardSplitter(DataDistributionTracker* self, bandwidthStatus == BandwidthStatusHigh ? "High" : bandwidthStatus == BandwidthStatusNormal ? "Normal" : "Low") - .detail("BytesPerKSec", metrics.writeBytesPerKSecond) + .detail("BytesWrittenPerKSec", metrics.bytesWrittenPerKSecond) .detail("NumShards", numShards); if (numShards > 1) { @@ -1203,7 +1203,7 @@ ACTOR Future shardTracker(DataDistributionTracker::SafeAccessor self, .detail("TrackerID", trackerID) .detail("MaxBytes", self()->maxShardSize->get().get()) .detail("ShardSize", shardSize->get().get().bytes) - .detail("BytesPerKSec", shardSize->get().get().writeBytesPerKSecond);*/ + .detail("BytesPerKSec", shardSize->get().get().bytesWrittenPerKSecond);*/ try { loop { diff --git a/fdbserver/DDTxnProcessor.actor.cpp b/fdbserver/DDTxnProcessor.actor.cpp index 3382caa76b..313ba6baa2 100644 --- a/fdbserver/DDTxnProcessor.actor.cpp +++ b/fdbserver/DDTxnProcessor.actor.cpp @@ -709,7 +709,7 @@ struct DDMockTxnProcessorImpl { loop { wait(delayJittered(1.0)); DDMockTxnProcessor* selfP = self; - KeyRangeRef cloneRef; + KeyRangeRef cloneRef = range; if (std::all_of(ids.begin(), ids.end(), [selfP, cloneRef](const UID& id) { auto& server = selfP->mgs->allServers.at(id); return server.allShardStatusIn(cloneRef, { MockShardStatus::FETCHED, MockShardStatus::COMPLETED }); diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 0732dec1da..c3d008218f 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -56,12 +56,12 @@ ShardSizeBounds ShardSizeBounds::shardSizeBoundsBeforeTrack() { return ShardSizeBounds{ .max = StorageMetrics{ .bytes = -1, - .writeBytesPerKSecond = StorageMetrics::infinity, + .bytesWrittenPerKSecond = StorageMetrics::infinity, .iosPerKSecond = StorageMetrics::infinity, .bytesReadPerKSecond = StorageMetrics::infinity }, - .min = StorageMetrics{ .bytes = -1, .writeBytesPerKSecond = 0, .iosPerKSecond = 0, .bytesReadPerKSecond = 0 }, + .min = StorageMetrics{ .bytes = -1, .bytesWrittenPerKSecond = 0, .iosPerKSecond = 0, .bytesReadPerKSecond = 0 }, .permittedError = StorageMetrics{ .bytes = -1, - .writeBytesPerKSecond = StorageMetrics::infinity, + .bytesWrittenPerKSecond = StorageMetrics::infinity, .iosPerKSecond = StorageMetrics::infinity, .bytesReadPerKSecond = StorageMetrics::infinity } }; diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index aabe9f379c..b3b2c05d15 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -320,7 +320,7 @@ Future MockStorageServer::run() { TraceEvent("MockStorageServerStart").detail("Address", ssi.address()); addActor(serveStorageMetricsRequests(this, ssi)); - addActor(MockStorageServerImpl::serveMockStorageServer(this)); + // addActor(MockStorageServerImpl::serveMockStorageServer(this)); return actors.getResult(); } @@ -411,13 +411,15 @@ void MockStorageServer::notifyWriteMetrics(KeyRef const& key, int64_t size) { // update write bandwidth and iops as mock the cost of writing a mutation StorageMetrics s; // FIXME: remove the / 2 and double the related knobs. - s.writeBytesPerKSecond = mvccStorageBytes(size) / 2; + s.bytesWrittenPerKSecond = mvccStorageBytes(size) / 2; s.iosPerKSecond = 1; metrics.notify(key, s); } void MockStorageServer::signalFetchKeys(const KeyRangeRef& range, int64_t rangeTotalBytes) { - fetchKeysRequests.send({ range, rangeTotalBytes }); + if (!allShardStatusEqual(range, MockShardStatus::COMPLETED)) { + actors.add(MockStorageServerImpl::waitFetchKeysFinish(this, { range, rangeTotalBytes })); + } } Future MockStorageServer::fetchKeys(const MockStorageServer::FetchKeysParams& param) { @@ -999,22 +1001,22 @@ TEST_CASE("/MockGlobalState/MockStorageServer/DataOpsSet") { // insert { - mgs->set("a"_sr, 1 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); - mgs->set("b"_sr, 2 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); - mgs->set("c"_sr, 3 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE, true); + mgs->set("a"_sr, 1 * SERVER_KNOBS->BYTES_WRITTEN_UNITS_PER_SAMPLE, true); + mgs->set("b"_sr, 2 * SERVER_KNOBS->BYTES_WRITTEN_UNITS_PER_SAMPLE, true); + mgs->set("c"_sr, 3 * SERVER_KNOBS->BYTES_WRITTEN_UNITS_PER_SAMPLE, true); for (auto& server : mgs->allServers) { - ASSERT_EQ(server.second.usedDiskSpace, 3 + 6 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE); + ASSERT_EQ(server.second.usedDiskSpace, 3 + 6 * SERVER_KNOBS->BYTES_WRITTEN_UNITS_PER_SAMPLE); } ShardSizeBounds bounds = ShardSizeBounds::shardSizeBoundsBeforeTrack(); std::pair, int> res = wait( mgs->waitStorageMetrics(KeyRangeRef("a"_sr, "bc"_sr), bounds.min, bounds.max, bounds.permittedError, 1, 1)); - int64_t testSize = 2 + 3 * SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE; + int64_t testSize = 2 + 3 * SERVER_KNOBS->BYTES_WRITTEN_UNITS_PER_SAMPLE; // SOMEDAY: how to integrate with isKeyValueInSample() better? if (res.first.get().bytes > 0) { // If sampled ASSERT_EQ(res.first.get().bytes, testSize); - ASSERT_GT(res.first.get().writeBytesPerKSecond, 0); + ASSERT_GT(res.first.get().bytesWrittenPerKSecond, 0); } } return Void(); diff --git a/fdbserver/StorageMetrics.actor.cpp b/fdbserver/StorageMetrics.actor.cpp index ea314eff77..c947c46049 100644 --- a/fdbserver/StorageMetrics.actor.cpp +++ b/fdbserver/StorageMetrics.actor.cpp @@ -75,7 +75,7 @@ KeyRef StorageMetricSample::splitEstimate(KeyRangeRef range, int64_t offset, boo StorageMetrics StorageServerMetrics::getMetrics(KeyRangeRef const& keys) const { StorageMetrics result; result.bytes = byteSample.getEstimate(keys); - result.writeBytesPerKSecond = + result.bytesWrittenPerKSecond = bytesWriteSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; result.iosPerKSecond = iopsSample.getEstimate(keys) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; result.bytesReadPerKSecond = @@ -88,7 +88,7 @@ StorageMetrics StorageServerMetrics::getMetrics(KeyRangeRef const& keys) const { void StorageServerMetrics::notify(KeyRef key, StorageMetrics& metrics) { ASSERT(metrics.bytes == 0); // ShardNotifyMetrics if (g_network->isSimulated()) { - CODE_PROBE(metrics.writeBytesPerKSecond != 0, "ShardNotifyMetrics bytes"); + CODE_PROBE(metrics.bytesWrittenPerKSecond != 0, "ShardNotifyMetrics bytes"); CODE_PROBE(metrics.iosPerKSecond != 0, "ShardNotifyMetrics ios"); CODE_PROBE(metrics.bytesReadPerKSecond != 0, "ShardNotifyMetrics bytesRead", probe::decoration::rare); } @@ -97,9 +97,10 @@ void StorageServerMetrics::notify(KeyRef key, StorageMetrics& metrics) { StorageMetrics notifyMetrics; - if (metrics.writeBytesPerKSecond) - notifyMetrics.writeBytesPerKSecond = bytesWriteSample.addAndExpire(key, metrics.writeBytesPerKSecond, expire) * - SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; + if (metrics.bytesWrittenPerKSecond) + notifyMetrics.bytesWrittenPerKSecond = + bytesWriteSample.addAndExpire(key, metrics.bytesWrittenPerKSecond, expire) * + SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; if (metrics.iosPerKSecond) notifyMetrics.iosPerKSecond = iopsSample.addAndExpire(key, metrics.iosPerKSecond, expire) * SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; @@ -177,7 +178,7 @@ void StorageServerMetrics::notifyNotReadable(KeyRangeRef keys) { void StorageServerMetrics::poll() { { StorageMetrics m; - m.writeBytesPerKSecond = SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; + m.bytesWrittenPerKSecond = SERVER_KNOBS->STORAGE_METRICS_AVERAGE_INTERVAL_PER_KSECONDS; bytesWriteSample.poll(waitMetricsMap, m); } { @@ -250,7 +251,7 @@ void StorageServerMetrics::splitMetrics(SplitMetricsRequest req) const { if (remaining.bytes < 2 * minSplitBytes) break; KeyRef key = req.keys.end; - bool hasUsed = used.bytes != 0 || used.writeBytesPerKSecond != 0 || used.iosPerKSecond != 0; + bool hasUsed = used.bytes != 0 || used.bytesWrittenPerKSecond != 0 || used.iosPerKSecond != 0; key = getSplitKey(remaining.bytes, estimated.bytes, req.limits.bytes, @@ -276,10 +277,10 @@ void StorageServerMetrics::splitMetrics(SplitMetricsRequest req) const { lastKey, key, hasUsed); - key = getSplitKey(remaining.writeBytesPerKSecond, - estimated.writeBytesPerKSecond, - req.limits.writeBytesPerKSecond, - used.writeBytesPerKSecond, + key = getSplitKey(remaining.bytesWrittenPerKSecond, + estimated.bytesWrittenPerKSecond, + req.limits.bytesWrittenPerKSecond, + used.bytesWrittenPerKSecond, req.limits.infinity, req.isLastShard, bytesWriteSample, @@ -328,12 +329,12 @@ void StorageServerMetrics::getStorageMetrics(GetStorageMetricsRequest req, rep.available.bytes = sb.available; rep.available.iosPerKSecond = 10e6; - rep.available.writeBytesPerKSecond = 100e9; + rep.available.bytesWrittenPerKSecond = 100e9; rep.available.bytesReadPerKSecond = 100e9; rep.capacity.bytes = sb.total; rep.capacity.iosPerKSecond = 10e6; - rep.capacity.writeBytesPerKSecond = 100e9; + rep.capacity.bytesWrittenPerKSecond = 100e9; rep.capacity.bytesReadPerKSecond = 100e9; rep.bytesInputRate = bytesInputRate; diff --git a/fdbserver/include/fdbserver/DataDistribution.actor.h b/fdbserver/include/fdbserver/DataDistribution.actor.h index 14fd6b6334..3636411a57 100644 --- a/fdbserver/include/fdbserver/DataDistribution.actor.h +++ b/fdbserver/include/fdbserver/DataDistribution.actor.h @@ -83,6 +83,7 @@ public: } operator int() const { return (int)value; } constexpr static int8_t typeCount() { return (int)__COUNT; } + bool operator<(const RelocateReason& reason) { return (int)value < (int)reason.value; } private: Value value; diff --git a/fdbserver/include/fdbserver/StorageMetrics.actor.h b/fdbserver/include/fdbserver/StorageMetrics.actor.h index 0fb2ab3fa1..69024ea27b 100644 --- a/fdbserver/include/fdbserver/StorageMetrics.actor.h +++ b/fdbserver/include/fdbserver/StorageMetrics.actor.h @@ -86,7 +86,7 @@ struct StorageServerMetrics { StorageServerMetrics() : byteSample(0), iopsSample(SERVER_KNOBS->IOPS_UNITS_PER_SAMPLE), - bytesWriteSample(SERVER_KNOBS->BYTES_WRITE_UNITS_PER_SAMPLE), + bytesWriteSample(SERVER_KNOBS->BYTES_WRITTEN_UNITS_PER_SAMPLE), bytesReadSample(SERVER_KNOBS->BYTES_READ_UNITS_PER_SAMPLE) {} StorageMetrics getMetrics(KeyRangeRef const& keys) const; diff --git a/fdbserver/include/fdbserver/workloads/MockDDTest.h b/fdbserver/include/fdbserver/workloads/MockDDTest.h index ec7a449a6f..b14f65f7e2 100644 --- a/fdbserver/include/fdbserver/workloads/MockDDTest.h +++ b/fdbserver/include/fdbserver/workloads/MockDDTest.h @@ -39,8 +39,10 @@ struct MockDDTestWorkload : public TestWorkload { Reference mock; KeyRange getRandomRange(double offset) const; - MockDDTestWorkload(WorkloadContext const& wcx); Future setup(Database const& cx) override; + +protected: + MockDDTestWorkload(WorkloadContext const& wcx); }; #endif // FOUNDATIONDB_MOCKDDTEST_H diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 30656166f2..2726709f90 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -2118,7 +2118,7 @@ ACTOR Future getValueQ(StorageServer* data, GetValueRequest req) { /* StorageMetrics m; - m.writeBytesPerKSecond = req.key.size() + (v.present() ? v.get().size() : 0); + m.bytesWrittenPerKSecond = req.key.size() + (v.present() ? v.get().size() : 0); m.iosPerKSecond = 1; data->metrics.notify(req.key, m); */ @@ -5793,7 +5793,7 @@ void applyMutation(StorageServer* self, // Clear split keys are added to arena StorageMetrics metrics; // FIXME: remove the / 2 and double the related knobs. - metrics.writeBytesPerKSecond = mvccStorageBytes(m) / 2; // comparable to counter.bytesInput / 2 + metrics.bytesWrittenPerKSecond = mvccStorageBytes(m) / 2; // comparable to counter.bytesInput / 2 metrics.iosPerKSecond = 1; self->metrics.notify(m.param1, metrics); @@ -10290,12 +10290,13 @@ ACTOR Future waitMetrics(StorageServerMetrics* self, WaitMetricsRequest re // all the messages for one clear or set have been dispatched. /*StorageMetrics m = getMetrics( data, req.keys ); - bool b = ( m.bytes != metrics.bytes || m.writeBytesPerKSecond != metrics.writeBytesPerKSecond + bool b = ( m.bytes != metrics.bytes || m.bytesWrittenPerKSecond != + metrics.bytesWrittenPerKSecond || m.iosPerKSecond != metrics.iosPerKSecond ); if (b) { printf("keys: '%s' - '%s' @%p\n", printable(req.keys.begin).c_str(), printable(req.keys.end).c_str(), this); printf("waitMetrics: desync %d (%lld %lld %lld) != (%lld %lld %lld); +(%lld %lld %lld)\n", - b, m.bytes, m.writeBytesPerKSecond, m.iosPerKSecond, metrics.bytes, - metrics.writeBytesPerKSecond, metrics.iosPerKSecond, c.bytes, c.writeBytesPerKSecond, + b, m.bytes, m.bytesWrittenPerKSecond, m.iosPerKSecond, metrics.bytes, + metrics.bytesWrittenPerKSecond, metrics.iosPerKSecond, c.bytes, c.bytesWrittenPerKSecond, c.iosPerKSecond); }*/ diff --git a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp index 2e364054a7..8b42750d3c 100644 --- a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp +++ b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp @@ -300,7 +300,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { wait(self->real->testRawFinishMovement(params, emptyTssMapping)); break; } catch (Error& e) { - if (e.code() != error_code_movekeys_conflict && e.code() != error_code_operation_failed) + if (e.code() != error_code_movekeys_conflict) throw; wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); // Keep trying to get the moveKeysLock @@ -327,12 +327,12 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { state MoveKeysLock lock = wait(takeMoveKeysLock(self->real->context(), UID())); KeyRange keys = self->getRandomKeys(); - std::vector destTeams = self->getRandomTeam(); - std::sort(destTeams.begin(), destTeams.end()); + std::vector destTeam = self->getRandomTeam(); + std::sort(destTeam.begin(), destTeam.end()); return MoveKeysParams{ deterministicRandom()->randomUniqueID(), keys, - destTeams, - destTeams, + destTeam, + destTeam, lock, Promise(), nullptr, @@ -365,7 +365,7 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { wait(self->real->moveKeys(params)); break; } catch (Error& e) { - if (e.code() != error_code_movekeys_conflict && e.code() != error_code_operation_failed) + if (e.code() != error_code_movekeys_conflict) throw; wait(delay(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY)); // Keep trying to get the moveKeysLock diff --git a/fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp b/fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp index f0d7a542bb..9d007e7419 100644 --- a/fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp +++ b/fdbserver/workloads/MockDDTrackerShardEvaluator.actor.cpp @@ -37,7 +37,7 @@ struct MockDDTrackerShardEvaluatorWorkload : public MockDDTestWorkload { uint64_t mockDbSize = 0; const int keySize = 16; - std::map rsReasonCounts; + std::map rsReasonCounts; // --- test configs --- @@ -133,7 +133,7 @@ struct MockDDTrackerShardEvaluatorWorkload : public MockDDTestWorkload { ACTOR static Future relocateShardReporter(MockDDTrackerShardEvaluatorWorkload* self, FutureStream input) { loop choose { - when(RelocateShard rs = waitNext(input)) { ++self->rsReasonCounts[(int)rs.reason]; } + when(RelocateShard rs = waitNext(input)) { ++self->rsReasonCounts[rs.reason]; } } } From 89b38624979546827003f6b68d9c62297bdf0d9e Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Mon, 14 Nov 2022 13:11:48 -0800 Subject: [PATCH 45/57] fix randomKeyBetween bug --- fdbclient/FDBTypes.cpp | 7 +- fdbserver/MockGlobalState.actor.cpp | 64 +++++++++++-------- fdbserver/include/fdbserver/MockGlobalState.h | 4 -- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/fdbclient/FDBTypes.cpp b/fdbclient/FDBTypes.cpp index 9e9401df65..0fff0611e4 100644 --- a/fdbclient/FDBTypes.cpp +++ b/fdbclient/FDBTypes.cpp @@ -51,12 +51,17 @@ KeyRef keyBetween(const KeyRangeRef& keys) { } Key randomKeyBetween(const KeyRangeRef& keys) { + if (keys.empty() || keys.singleKeyRange()) { + return keys.end; + } + KeyRef begin = keys.begin; KeyRef end = keys.end; ASSERT(begin < end); if (begin.size() < end.size()) { // randomly append a char - uint8_t newChar = deterministicRandom()->randomInt(0, end[begin.size()] + 1); + uint8_t maxChar = end[begin.size()] > 0 ? end[begin.size()] : end[begin.size()] + 1; + uint8_t newChar = deterministicRandom()->randomInt(0, maxChar); return begin.withSuffix(StringRef(&newChar, 1)); } diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index b3b2c05d15..b4bdaf2918 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -115,36 +115,50 @@ public: return Void(); } - ACTOR static Future serveMockStorageServer(MockStorageServer* self) { - state ActorCollection actors; - loop choose { - when(MockStorageServer::FetchKeysParams params = waitNext(self->fetchKeysRequests.getFuture())) { - if (!self->allShardStatusEqual(params.keys, MockShardStatus::COMPLETED)) { - actors.add(waitFetchKeysFinish(self, params)); - } - } - when(wait(actors.getResult())) { ASSERT(false); } - } - } + // Randomly generate keys and kv size between the fetch range, updating the byte sample. + // Once the fetchKeys return, the shard status will become FETCHED. ACTOR static Future waitFetchKeysFinish(MockStorageServer* self, MockStorageServer::FetchKeysParams params) { // between each chunk delay for random time, and finally set the fetchComplete signal. ASSERT(params.totalRangeBytes > 0); state int chunkCount = std::ceil(params.totalRangeBytes * 1.0 / SERVER_KNOBS->FETCH_BLOCK_BYTES); + state int64_t currentTotal = 0; state Key lastKey = params.keys.begin; state int i = 0; - for (; i < chunkCount; ++i) { + for (; i < chunkCount && currentTotal < params.totalRangeBytes; ++i) { wait(delayJittered(0.01)); - int remainBytes = (chunkCount == 1 ? params.totalRangeBytes : SERVER_KNOBS->FETCH_BLOCK_BYTES); + int remainedBytes = (chunkCount == 1 ? params.totalRangeBytes : SERVER_KNOBS->FETCH_BLOCK_BYTES); - while (remainBytes >= lastKey.size()) { - int maxSize = std::min(remainBytes, 130000) + 1; + while (remainedBytes >= lastKey.size()) { + Key nextKey; + // try 10 times + for (int j = 0; j < 10; j++) { + nextKey = randomKeyBetween(KeyRangeRef(lastKey, params.keys.end)); + if (nextKey < params.keys.end) + break; + } + + // NOTE: in this case, we accumulate the bytes on lastKey on purpose (shall we?) + if (nextKey == params.keys.end) { + auto bytes = params.totalRangeBytes - currentTotal; + self->byteSampleApplySet(lastKey, bytes); + self->usedDiskSpace += bytes; + currentTotal = params.totalRangeBytes; + TraceEvent(SevWarn, "MockFetchKeysInaccurateSample") + .detail("Range", params.keys) + .detail("LastKey", lastKey) + .detail("Size", bytes); + break; // break the most outside loop + } + + int maxSize = std::min(remainedBytes, 130000) + 1; int randomSize = deterministicRandom()->randomInt(lastKey.size(), maxSize); - self->usedDiskSpace += randomSize; + currentTotal += randomSize; + self->byteSampleApplySet(lastKey, randomSize); - remainBytes -= randomSize; - lastKey = randomKeyBetween(KeyRangeRef(lastKey, params.keys.end)); + remainedBytes -= randomSize; + lastKey = nextKey; } } @@ -214,16 +228,15 @@ void MockStorageServer::setShardStatus(const KeyRangeRef& range, MockShardStatus auto oldStatus = it.value().status; if (isStatusTransitionValid(oldStatus, status)) { it.value() = ShardInfo{ status, newSize }; - } else if (oldStatus == MockShardStatus::COMPLETED && + } else if ((oldStatus == MockShardStatus::COMPLETED || oldStatus == MockShardStatus::FETCHED) && (status == MockShardStatus::INFLIGHT || status == MockShardStatus::FETCHED)) { CODE_PROBE(true, "Shard already on server"); } else { - TraceEvent(SevError, "MockShardStatusTransitionError") + TraceEvent(SevError, "MockShardStatusTransitionError", id) .detail("From", oldStatus) .detail("To", status) - .detail("ID", id) - .detail("KeyBegin", range.begin.toHexString()) - .detail("KeyEnd", range.begin.toHexString()); + .detail("KeyBegin", range.begin) + .detail("KeyEnd", range.begin); } } serverKeys.coalesce(range); @@ -320,7 +333,6 @@ Future MockStorageServer::run() { TraceEvent("MockStorageServerStart").detail("Address", ssi.address()); addActor(serveStorageMetricsRequests(this, ssi)); - // addActor(MockStorageServerImpl::serveMockStorageServer(this)); return actors.getResult(); } @@ -422,10 +434,6 @@ void MockStorageServer::signalFetchKeys(const KeyRangeRef& range, int64_t rangeT } } -Future MockStorageServer::fetchKeys(const MockStorageServer::FetchKeysParams& param) { - return MockStorageServerImpl::waitFetchKeysFinish(this, param); -} - void MockStorageServer::byteSampleApplySet(KeyRef const& key, int64_t kvSize) { // Update byteSample in memory and notify waiting metrics ByteSampleInfo sampleInfo = isKeyValueInSample(key, kvSize); diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index 4ea121697d..21be352e48 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -191,10 +191,6 @@ protected: // Update the storage metrics as if we write a k-v pair of `size` bytes. void notifyWriteMetrics(KeyRef const& key, int64_t size); - // Randomly generate keys and kv size between the fetch range, updating the byte sample. - // Once the fetchKeys return, the shard status will become FETCHED. - Future fetchKeys(const FetchKeysParams&); - // Update byte sample as if set a key value pair of which the size is kvSize void byteSampleApplySet(KeyRef const& key, int64_t kvSize); From a93eda244fdb3c41d8bcfb291137d3068cf52141 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Mon, 14 Nov 2022 16:09:31 -0800 Subject: [PATCH 46/57] sscanf on argument parsing --- fdbcli/TenantCommands.actor.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fdbcli/TenantCommands.actor.cpp b/fdbcli/TenantCommands.actor.cpp index daacb80fbd..0de598f75b 100644 --- a/fdbcli/TenantCommands.actor.cpp +++ b/fdbcli/TenantCommands.actor.cpp @@ -106,14 +106,18 @@ bool parseTenantListOptions(std::vector const& tokens, } value = token; if (tokencmp(param, "limit")) { - limit = std::stoi(value.get().toString()); - if (limit <= 0) { + int limit = 0; + int n = 0; + if (sscanf(value.get().toString().c_str(), "%d%n", &limit, &n) != 1 || n != value.get().size() || + limit <= 0) { fmt::print(stderr, "ERROR: invalid limit `{}'\n", token.toString().c_str()); return false; } } else if (tokencmp(param, "offset")) { - offset = std::stoi(value.get().toString()); - if (offset < 0) { + int offset = 0; + int n = 0; + if (sscanf(value.get().toString().c_str(), "%d%n", &offset, &n) != 1 || n != value.get().size() || + offset < 0) { fmt::print(stderr, "ERROR: invalid offset `{}'\n", token.toString().c_str()); return false; } From b699ba4c23a6681becfa91e7aa955e79980bb1b8 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Mon, 14 Nov 2022 14:25:54 -0800 Subject: [PATCH 47/57] Increase memtable and writebuffer size for rocksdb simulation test memtable and writebuffer size are too small in simualtion, which causes thousands of sst files and at least 6 levels of ssts. Both makes compaction slower in simulation and contribute to timeout errors. After increasing the size, failure rate (timeout failures) when we only run rocksdb and sharded rocksdb engines in simulation drops from 10 out of 332339 tests to 10 out of 497532 tests. For apple dev who wants to look into the joshua details, before the change, joshua ensemble id is 20221111-223720-mengxudebugrocks-505ede1c55664ddf after the change, joshua ensemble id is 20221114-192042-mengxurocksdebugknobchange-1e4c047d112e9a38 --- fdbclient/ServerKnobs.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 431278ee22..af9639cf0a 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -390,19 +390,22 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi // If true, do not process and store RocksDB logs init( ROCKSDB_MUTE_LOGS, true ); // Use a smaller memtable in simulation to avoid OOMs. - int64_t memtableBytes = isSimulated ? 32 * 1024 : 512 * 1024 * 1024; + // TODO: change it to bigger value. it was 32K when timeout + int64_t memtableBytes = isSimulated ? 1024 * 1024 : 512 * 1024 * 1024; init( ROCKSDB_MEMTABLE_BYTES, memtableBytes ); init( ROCKSDB_LEVEL_STYLE_COMPACTION, true ); init( ROCKSDB_UNSAFE_AUTO_FSYNC, false ); init( ROCKSDB_PERIODIC_COMPACTION_SECONDS, 0 ); init( ROCKSDB_PREFIX_LEN, 0 ); // If rocksdb block cache size is 0, the default 8MB is used. - int64_t blockCacheSize = isSimulated ? 0 : 1024 * 1024 * 1024 /* 1GB */; + int64_t blockCacheSize = isSimulated ? 16 * 1024 * 1024 : 1024 * 1024 * 1024 /* 1GB */; init( ROCKSDB_BLOCK_CACHE_SIZE, blockCacheSize ); init( ROCKSDB_METRICS_DELAY, 60.0 ); - init( ROCKSDB_READ_VALUE_TIMEOUT, isSimulated ? 5.0 : 200.0 ); - init( ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, isSimulated ? 5.0 : 200.0 ); - init( ROCKSDB_READ_RANGE_TIMEOUT, isSimulated ? 5.0 : 200.0 ); + // // In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have + // very high load and single read thread cannot process all the load within the timeouts. + init( ROCKSDB_READ_VALUE_TIMEOUT, 200.0 ); if (isSimulated) ROCKSDB_READ_VALUE_TIMEOUT = 5 * 60; + init( ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, 200.0 ); if (isSimulated) ROCKSDB_READ_VALUE_PREFIX_TIMEOUT = 5 * 60; + init( ROCKSDB_READ_RANGE_TIMEOUT, 200.0 ); if (isSimulated) ROCKSDB_READ_RANGE_TIMEOUT = 5 * 60; init( ROCKSDB_READ_QUEUE_WAIT, 1.0 ); init( ROCKSDB_READ_QUEUE_HARD_MAX, 1000 ); init( ROCKSDB_READ_QUEUE_SOFT_MAX, 500 ); From 68eb129c71329837934c5d82265298add723ff31 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Mon, 14 Nov 2022 16:17:49 -0800 Subject: [PATCH 48/57] RocksDB:Use knob to control readValueTimeout value in simulation --- fdbclient/ServerKnobs.cpp | 4 ++-- fdbserver/KeyValueStoreRocksDB.actor.cpp | 16 +++++----------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index af9639cf0a..8b32417aff 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -390,7 +390,6 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi // If true, do not process and store RocksDB logs init( ROCKSDB_MUTE_LOGS, true ); // Use a smaller memtable in simulation to avoid OOMs. - // TODO: change it to bigger value. it was 32K when timeout int64_t memtableBytes = isSimulated ? 1024 * 1024 : 512 * 1024 * 1024; init( ROCKSDB_MEMTABLE_BYTES, memtableBytes ); init( ROCKSDB_LEVEL_STYLE_COMPACTION, true ); @@ -401,7 +400,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi int64_t blockCacheSize = isSimulated ? 16 * 1024 * 1024 : 1024 * 1024 * 1024 /* 1GB */; init( ROCKSDB_BLOCK_CACHE_SIZE, blockCacheSize ); init( ROCKSDB_METRICS_DELAY, 60.0 ); - // // In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have + // ROCKSDB_READ_VALUE_TIMEOUT, ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, ROCKSDB_READ_RANGE_TIMEOUT knobs: + // In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have // very high load and single read thread cannot process all the load within the timeouts. init( ROCKSDB_READ_VALUE_TIMEOUT, 200.0 ); if (isSimulated) ROCKSDB_READ_VALUE_TIMEOUT = 5 * 60; init( ROCKSDB_READ_VALUE_PREFIX_TIMEOUT, 200.0 ); if (isSimulated) ROCKSDB_READ_VALUE_PREFIX_TIMEOUT = 5 * 60; diff --git a/fdbserver/KeyValueStoreRocksDB.actor.cpp b/fdbserver/KeyValueStoreRocksDB.actor.cpp index f2525b49a2..94e07b0798 100644 --- a/fdbserver/KeyValueStoreRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreRocksDB.actor.cpp @@ -1402,17 +1402,11 @@ struct RocksDBKeyValueStore : IKeyValueStore { ThreadReturnPromiseStream>* metricPromiseStream) : id(id), db(db), cf(cf), sharedState(sharedState), readIterPool(readIterPool), perfContextMetrics(perfContextMetrics), metricPromiseStream(metricPromiseStream), threadIndex(threadIndex) { - if (g_network->isSimulated()) { - // In simulation, increasing the read operation timeouts to 5 minutes, as some of the tests have - // very high load and single read thread cannot process all the load within the timeouts. - readValueTimeout = 5 * 60; - readValuePrefixTimeout = 5 * 60; - readRangeTimeout = 5 * 60; - } else { - readValueTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_TIMEOUT; - readValuePrefixTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_PREFIX_TIMEOUT; - readRangeTimeout = SERVER_KNOBS->ROCKSDB_READ_RANGE_TIMEOUT; - } + + readValueTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_TIMEOUT; + readValuePrefixTimeout = SERVER_KNOBS->ROCKSDB_READ_VALUE_PREFIX_TIMEOUT; + readRangeTimeout = SERVER_KNOBS->ROCKSDB_READ_RANGE_TIMEOUT; + if (SERVER_KNOBS->ROCKSDB_PERFCONTEXT_ENABLE) { // Enable perf context on the same thread with the db thread rocksdb::SetPerfLevel(rocksdb::PerfLevel::kEnableTimeExceptForMutex); From 2985b255fa508484c8758cc7038382386f102386 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Mon, 14 Nov 2022 16:39:13 -0800 Subject: [PATCH 49/57] remove unnecessary redeclaration --- fdbcli/TenantCommands.actor.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/fdbcli/TenantCommands.actor.cpp b/fdbcli/TenantCommands.actor.cpp index 0de598f75b..52cc94001d 100644 --- a/fdbcli/TenantCommands.actor.cpp +++ b/fdbcli/TenantCommands.actor.cpp @@ -106,7 +106,6 @@ bool parseTenantListOptions(std::vector const& tokens, } value = token; if (tokencmp(param, "limit")) { - int limit = 0; int n = 0; if (sscanf(value.get().toString().c_str(), "%d%n", &limit, &n) != 1 || n != value.get().size() || limit <= 0) { @@ -114,7 +113,6 @@ bool parseTenantListOptions(std::vector const& tokens, return false; } } else if (tokencmp(param, "offset")) { - int offset = 0; int n = 0; if (sscanf(value.get().toString().c_str(), "%d%n", &offset, &n) != 1 || n != value.get().size() || offset < 0) { From 2f53c6ebd8182f4f971a2b3559ff78a94e4ddbb1 Mon Sep 17 00:00:00 2001 From: Jon Fu Date: Tue, 15 Nov 2022 10:32:19 -0800 Subject: [PATCH 50/57] remove extra test check --- fdbcli/tests/fdbcli_tests.py | 2 +- fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fdbcli/tests/fdbcli_tests.py b/fdbcli/tests/fdbcli_tests.py index 3ba5e54da2..a647752123 100755 --- a/fdbcli/tests/fdbcli_tests.py +++ b/fdbcli/tests/fdbcli_tests.py @@ -793,7 +793,7 @@ def tenant_list(logger): assert output == 'ERROR: invalid offset `13y\'' output = run_fdbcli_command_and_get_error('tenant list a b state=14z') - assert output == 'ERROR: unrecognized tenant state(s) `14z\'' + assert output == 'ERROR: unrecognized tenant state(s) `14z\'.' @enable_logging() def tenant_get(logger): diff --git a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp index b2ac5ab39b..56c988243f 100644 --- a/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp +++ b/fdbserver/workloads/MetaclusterManagementWorkload.actor.cpp @@ -404,10 +404,6 @@ struct MetaclusterManagementWorkload : TestWorkload { wait(store(checkEntry2, MetaclusterAPI::getTenant(self->managementDb, tenant)) && store(tenantList, MetaclusterAPI::listTenants(self->managementDb, ""_sr, "\xff\xff"_sr, 10e6, 0, filters))); - if (tenantList.empty()) { - ASSERT(checkEntry2.tenantState != checkState); - return Void(); - } bool found = false; for (auto pair : tenantList) { ASSERT(pair.second.tenantState == checkState); From 70129c85cb25f9bbeda8da50e49a39614eb32345 Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Tue, 15 Nov 2022 12:20:28 -0800 Subject: [PATCH 51/57] formatting --- fdbrpc/include/fdbrpc/Stats.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbrpc/include/fdbrpc/Stats.h b/fdbrpc/include/fdbrpc/Stats.h index c0564780bb..5cbb2cccd0 100644 --- a/fdbrpc/include/fdbrpc/Stats.h +++ b/fdbrpc/include/fdbrpc/Stats.h @@ -218,7 +218,7 @@ class LatencySample { public: LatencySample(std::string name, UID id, double loggingInterval, double accuracy) : name(name), id(id), sampleStart(now()), sketch(accuracy), - latencySampleEventHolder(makeReference(id.toString() + "/" + name)) { + latencySampleEventHolder(makeReference(id.toString() + "/" + name)) { assert(accuracy > 0); if (accuracy <= 0) { fmt::print(stderr, "ERROR: LatencySample {} has invalid accuracy ({})", name, accuracy); From 214db4d17ee8ec38bb7b4b865dc29d8c2a7b2ddb Mon Sep 17 00:00:00 2001 From: Sam Gwydir Date: Tue, 15 Nov 2022 13:38:55 -0800 Subject: [PATCH 52/57] formatting --- fdbserver/TLogServer.actor.cpp | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index d9796923e3..f85eea5bba 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -1445,9 +1445,7 @@ ACTOR Future updateStorage(TLogData* self) { ACTOR Future updateStorageLoop(TLogData* self) { wait(delay(0, TaskPriority::UpdateStorage)); - loop { - wait(updateStorage(self)); - } + loop { wait(updateStorage(self)); } } void commitMessages(TLogData* self, @@ -1608,9 +1606,7 @@ ACTOR Future waitForMessagesForTag(Reference self, Tag reqTag, Ve // we want the caller to finish first, otherwise the data structure it is building might not be complete wait(delay(0.0)); } - when(wait(delay(timeout))) { - self->blockingPeekTimeouts += 1; - } + when(wait(delay(timeout))) { self->blockingPeekTimeouts += 1; } } return Void(); } @@ -2795,9 +2791,7 @@ ACTOR Future pullAsyncData(TLogData* self, while (!endVersion.present() || logData->version.get() < endVersion.get()) { loop { choose { - when(wait(r ? r->getMore(TaskPriority::TLogCommit) : Never())) { - break; - } + when(wait(r ? r->getMore(TaskPriority::TLogCommit) : Never())) { break; } when(wait(dbInfoChange)) { if (logData->logSystem->get()) { r = logData->logSystem->get()->peek(logData->logId, tagAt, endVersion, tags, true); @@ -3278,9 +3272,7 @@ ACTOR Future restorePersistentState(TLogData* self, choose { when(wait(updateStorage(self))) {} - when(wait(allRemoved)) { - throw worker_removed(); - } + when(wait(allRemoved)) { throw worker_removed(); } } } } else { @@ -3291,9 +3283,7 @@ ACTOR Future restorePersistentState(TLogData* self, } } } - when(wait(allRemoved)) { - throw worker_removed(); - } + when(wait(allRemoved)) { throw worker_removed(); } } } } catch (Error& e) { @@ -3639,9 +3629,7 @@ ACTOR Future tLog(IKeyValueStore* persistentData, forwardPromise(req.reply, self.tlogCache.get(req.recruitmentID)); } } - when(wait(error)) { - throw internal_error(); - } + when(wait(error)) { throw internal_error(); } when(wait(activeSharedChange)) { if (activeSharedTLog->get() == tlogId) { TraceEvent("SharedTLogNowActive", self.dbgid).detail("NowActive", activeSharedTLog->get()); From 8971b5907c946a9e384dedae3301f2c4031c647f Mon Sep 17 00:00:00 2001 From: Xiaoxi Wang Date: Wed, 16 Nov 2022 13:18:40 -0800 Subject: [PATCH 53/57] add comments; mark some methods const --- fdbserver/DDTxnProcessor.actor.cpp | 1 + fdbserver/MockGlobalState.actor.cpp | 4 ++-- fdbserver/include/fdbserver/MockGlobalState.h | 4 ++-- fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp | 5 ++++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/fdbserver/DDTxnProcessor.actor.cpp b/fdbserver/DDTxnProcessor.actor.cpp index 313ba6baa2..9907f22784 100644 --- a/fdbserver/DDTxnProcessor.actor.cpp +++ b/fdbserver/DDTxnProcessor.actor.cpp @@ -984,6 +984,7 @@ ACTOR Future rawFinishMovement(std::shared_ptr mgs, // remove destination servers from source servers ASSERT_EQ(srcTeams.size(), 0); for (auto& id : srcTeams.front().servers) { + // the only caller moveKeys will always make sure the UID are sorted if (!std::binary_search(params.destinationTeam.begin(), params.destinationTeam.end(), id)) { mgs->allServers.at(id).removeShard(params.keys); } diff --git a/fdbserver/MockGlobalState.actor.cpp b/fdbserver/MockGlobalState.actor.cpp index c94207975d..0959fa77b1 100644 --- a/fdbserver/MockGlobalState.actor.cpp +++ b/fdbserver/MockGlobalState.actor.cpp @@ -167,7 +167,7 @@ public: } }; -bool MockStorageServer::allShardStatusEqual(const KeyRangeRef& range, MockShardStatus status) { +bool MockStorageServer::allShardStatusEqual(const KeyRangeRef& range, MockShardStatus status) const { auto ranges = serverKeys.intersectingRanges(range); ASSERT(!ranges.empty()); // at least the range is allKeys @@ -178,7 +178,7 @@ bool MockStorageServer::allShardStatusEqual(const KeyRangeRef& range, MockShardS return true; } -bool MockStorageServer::allShardStatusIn(const KeyRangeRef& range, const std::set& status) { +bool MockStorageServer::allShardStatusIn(const KeyRangeRef& range, const std::set& status) const { auto ranges = serverKeys.intersectingRanges(range); ASSERT(!ranges.empty()); // at least the range is allKeys diff --git a/fdbserver/include/fdbserver/MockGlobalState.h b/fdbserver/include/fdbserver/MockGlobalState.h index 21be352e48..4a4ff34fec 100644 --- a/fdbserver/include/fdbserver/MockGlobalState.h +++ b/fdbserver/include/fdbserver/MockGlobalState.h @@ -107,8 +107,8 @@ public: decltype(serverKeys)::Ranges getAllRanges() { return serverKeys.ranges(); } - bool allShardStatusEqual(const KeyRangeRef& range, MockShardStatus status); - bool allShardStatusIn(const KeyRangeRef& range, const std::set& status); + bool allShardStatusEqual(const KeyRangeRef& range, MockShardStatus status) const; + bool allShardStatusIn(const KeyRangeRef& range, const std::set& status) const; // change the status of range. This function may result in split to make the shard boundary align with range.begin // and range.end. In this case, if restrictSize==true, the sum of the split shard size is strictly equal to the old diff --git a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp index 5518cefa62..80248b09b4 100644 --- a/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp +++ b/fdbserver/workloads/IDDTxnProcessorApiCorrectness.actor.cpp @@ -319,7 +319,10 @@ struct IDDTxnProcessorApiWorkload : TestWorkload { verifyInitDataEqual(self->realInitDD, mockInitData); TraceEvent(SevDebug, relocateShardInterval.end(), relocateShardInterval.pairID); - self->mock->setupMockGlobalState(self->realInitDD); // in case SS remove or recruit + // The simulator have chances generating a scenario when after the first setupMockGlobalState call, there is a + // new storage server join the cluster, there's no way for mock DD to know the new storage server without + // calling setupMockGlobalState again. + self->mock->setupMockGlobalState(self->realInitDD); return Void(); } From c6ebdd8ae8037815991e4e79ed2a53b8cd38d9cc Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Wed, 16 Nov 2022 15:36:55 -0800 Subject: [PATCH 54/57] Send error when LogRouterPeekPopped happens Otherwise, the remote tlog won't get a response and the parallel peek requests will never be cleared, blocking subsequent peeks. As a result, remote tlog will no longer be able to pop the log router, which in turn can no longer peek tlogs. The whole remote side will become blocked. --- fdbserver/LogRouter.actor.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fdbserver/LogRouter.actor.cpp b/fdbserver/LogRouter.actor.cpp index 51796f9fc0..6309b47094 100644 --- a/fdbserver/LogRouter.actor.cpp +++ b/fdbserver/LogRouter.actor.cpp @@ -533,14 +533,19 @@ Future logRouterPeekMessages(PromiseType replyPromise, // kills logRouterPeekStream actor, otherwise that actor becomes stuck throw operation_obsolete(); } - replyPromise.send(Never()); - if (reqSequence.present()) { - auto& trackerData = self->peekTracker[peekId]; - auto& sequenceData = trackerData.sequence_version[sequence + 1]; - if (!sequenceData.isSet()) { - sequenceData.send(std::make_pair(reqBegin, reqOnlySpilled)); - } + if (std::is_same>::value) { + replyPromise.sendError(operation_obsolete()); + } else { + replyPromise.send(Never()); } + + /*if (reqSequence.present()) { + auto& trackerData = self->peekTracker[peekId]; + auto& sequenceData = trackerData.sequence_version[sequence + 1]; + if (!sequenceData.isSet()) { + sequenceData.send(std::make_pair(reqBegin, reqOnlySpilled)); + } + }*/ return Void(); } From f285a91f6c440c26d4490bfc34545e6122640de8 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 17 Nov 2022 11:36:36 -0800 Subject: [PATCH 55/57] Add more debug events --- fdbserver/LogRouter.actor.cpp | 35 +++++++++------ fdbserver/LogSystemPeekCursor.actor.cpp | 58 ++++++++++++++++++++----- fdbserver/TLogServer.actor.cpp | 3 ++ 3 files changed, 72 insertions(+), 24 deletions(-) diff --git a/fdbserver/LogRouter.actor.cpp b/fdbserver/LogRouter.actor.cpp index 6309b47094..3c0cc68a70 100644 --- a/fdbserver/LogRouter.actor.cpp +++ b/fdbserver/LogRouter.actor.cpp @@ -28,6 +28,7 @@ #include "flow/ActorCollection.h" #include "flow/Arena.h" #include "flow/Histogram.h" +#include "flow/Trace.h" #include "flow/network.h" #include "flow/DebugTrace.h" #include "flow/actorcompiler.h" // This must be the last #include. @@ -448,6 +449,14 @@ Future logRouterPeekMessages(PromiseType replyPromise, state int sequence = -1; state UID peekId; + DebugLogTraceEvent("LogRouterPeek0", self->dbgid) + .detail("ReturnIfBlocked", reqReturnIfBlocked) + .detail("Tag", reqTag.toString()) + .detail("Seq", reqSequence.present() ? reqSequence.get().second : -1) + .detail("SeqCursor", reqSequence.present() ? reqSequence.get().first : UID()) + .detail("Ver", self->version.get()) + .detail("Begin", reqBegin); + if (reqSequence.present()) { try { peekId = reqSequence.get().first; @@ -481,6 +490,13 @@ Future logRouterPeekMessages(PromiseType replyPromise, reqOnlySpilled = prevPeekData.second; wait(yield()); } catch (Error& e) { + DebugLogTraceEvent("LogRouterPeekError", self->dbgid) + .error(e) + .detail("Tag", reqTag.toString()) + .detail("Seq", reqSequence.present() ? reqSequence.get().second : -1) + .detail("SeqCursor", reqSequence.present() ? reqSequence.get().first : UID()) + .detail("Begin", reqBegin); + if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) { replyPromise.sendError(e); return Void(); @@ -490,12 +506,6 @@ Future logRouterPeekMessages(PromiseType replyPromise, } } - DebugLogTraceEvent("LogRouterPeek0", self->dbgid) - .detail("ReturnIfBlocked", reqReturnIfBlocked) - .detail("Tag", reqTag.toString()) - .detail("Ver", self->version.get()) - .detail("Begin", reqBegin); - if (reqReturnIfBlocked && self->version.get() < reqBegin) { replyPromise.sendError(end_of_stream()); if (reqSequence.present()) { @@ -528,24 +538,22 @@ Future logRouterPeekMessages(PromiseType replyPromise, TraceEvent(SevWarnAlways, "LogRouterPeekPopped", self->dbgid) .detail("Begin", reqBegin) .detail("Popped", poppedVer) + .detail("Tag", reqTag.toString()) + .detail("Seq", reqSequence.present() ? reqSequence.get().second : -1) + .detail("SeqCursor", reqSequence.present() ? reqSequence.get().first : UID()) .detail("Start", self->startVersion); if (std::is_same>::value) { // kills logRouterPeekStream actor, otherwise that actor becomes stuck throw operation_obsolete(); } if (std::is_same>::value) { + // Send error to avoid a race condition that the peer is really retrying, + // otherwise, the peer could be blocked forever. replyPromise.sendError(operation_obsolete()); } else { replyPromise.send(Never()); } - /*if (reqSequence.present()) { - auto& trackerData = self->peekTracker[peekId]; - auto& sequenceData = trackerData.sequence_version[sequence + 1]; - if (!sequenceData.isSet()) { - sequenceData.send(std::make_pair(reqBegin, reqOnlySpilled)); - } - }*/ return Void(); } @@ -686,6 +694,7 @@ ACTOR Future logRouterPop(LogRouterData* self, TLogPopRequest req) { if (!tagData) { tagData = self->createTagData(req.tag, req.to, req.durableKnownCommittedVersion); } else if (req.to > tagData->popped) { + DebugLogTraceEvent("LogRouterPop", self->dbgid).detail("Tag", req.tag.toString()).detail("PopVersion", req.to); tagData->popped = req.to; tagData->durableKnownCommittedVersion = req.durableKnownCommittedVersion; wait(tagData->eraseMessagesBefore(req.to, self, TaskPriority::TLogPop)); diff --git a/fdbserver/LogSystemPeekCursor.actor.cpp b/fdbserver/LogSystemPeekCursor.actor.cpp index 4a0164aac7..d636e71d1f 100644 --- a/fdbserver/LogSystemPeekCursor.actor.cpp +++ b/fdbserver/LogSystemPeekCursor.actor.cpp @@ -62,6 +62,8 @@ ILogSystem::ServerPeekCursor::ServerPeekCursor(Referenceresults.minKnownCommittedVersion = 0; DebugLogTraceEvent(SevDebug, "SPC_Starting", randomID) .detail("Tag", tag.toString()) + .detail("Parallel", parallelGetMore) + .detail("Interf", interf && interf->get().present() ? interf->get().id() : UID()) .detail("UsePeekStream", usePeekStream) .detail("Begin", begin) .detail("End", end); @@ -111,7 +113,9 @@ bool ILogSystem::ServerPeekCursor::hasMessage() const { } void ILogSystem::ServerPeekCursor::nextMessage() { - //TraceEvent("SPC_NextMessage", randomID).detail("MessageVersion", messageVersion.toString()); + DebugLogTraceEvent("SPC_NextMessage", randomID) + .detail("Tag", tag.toString()) + .detail("MessageVersion", messageVersion.toString()); ASSERT(hasMsg); if (rd.empty()) { messageVersion.reset(std::min(results.end, end.version)); @@ -143,11 +147,13 @@ void ILogSystem::ServerPeekCursor::nextMessage() { rd.rewind(); rd.readBytes(messageAndTags.getHeaderSize()); hasMsg = true; - //TraceEvent("SPC_NextMessageB", randomID).detail("MessageVersion", messageVersion.toString()); + DebugLogTraceEvent("SPC_NextMessageB", randomID) + .detail("Tag", tag.toString()) + .detail("MessageVersion", messageVersion.toString()); } StringRef ILogSystem::ServerPeekCursor::getMessage() { - //TraceEvent("SPC_GetMessage", randomID); + DebugLogTraceEvent("SPC_GetMessage", randomID).detail("Tag", tag.toString()); StringRef message = messageAndTags.getMessageWithoutTags(); rd.readBytes(message.size()); // Consumes the message. return message; @@ -260,6 +266,14 @@ ACTOR Future serverPeekParallelGetMore(ILogSystem::ServerPeekCursor* self, } loop { + DebugLogTraceEvent("SPC_GetMoreP", self->randomID) + .detail("Tag", self->tag.toString()) + .detail("Has", self->hasMessage()) + .detail("Begin", self->messageVersion.version) + .detail("Parallel", self->parallelGetMore) + .detail("Seq", self->sequence) + .detail("Sizes", self->futureResults.size()) + .detail("Interf", self->interf->get().present() ? self->interf->get().id() : UID()); state Version expectedBegin = self->messageVersion.version; try { if (self->parallelGetMore || self->onlySpilled) { @@ -294,7 +308,12 @@ ACTOR Future serverPeekParallelGetMore(ILogSystem::ServerPeekCursor* self, expectedBegin = res.end; self->futureResults.pop_front(); updateCursorWithReply(self, res); - //TraceEvent("SPC_GetMoreB", self->randomID).detail("Has", self->hasMessage()).detail("End", res.end).detail("Popped", res.popped.present() ? res.popped.get() : 0); + DebugLogTraceEvent("SPC_GetMoreReply", self->randomID) + .detail("Has", self->hasMessage()) + .detail("Tag", self->tag.toString()) + .detail("End", res.end) + .detail("Size", self->futureResults.size()) + .detail("Popped", res.popped.present() ? res.popped.get() : 0); return Void(); } when(wait(self->interfaceChanged)) { @@ -306,11 +325,17 @@ ACTOR Future serverPeekParallelGetMore(ILogSystem::ServerPeekCursor* self, } } } catch (Error& e) { + DebugLogTraceEvent("PeekCursorError", self->randomID) + .error(e) + .detail("Tag", self->tag.toString()) + .detail("Begin", self->messageVersion.version) + .detail("Interf", self->interf->get().present() ? self->interf->get().id() : UID()); + if (e.code() == error_code_end_of_stream) { self->end.reset(self->messageVersion.version); return Void(); } else if (e.code() == error_code_timed_out || e.code() == error_code_operation_obsolete) { - TraceEvent("PeekCursorTimedOut", self->randomID).error(e); + TraceEvent ev("PeekCursorTimedOut", self->randomID); // We *should* never get timed_out(), as it means the TLog got stuck while handling a parallel peek, // and thus we've likely just wasted 10min. // timed_out() is sent by cleanupPeekTrackers as value PEEK_TRACKER_EXPIRATION_TIME @@ -326,6 +351,11 @@ ACTOR Future serverPeekParallelGetMore(ILogSystem::ServerPeekCursor* self, self->randomID = deterministicRandom()->randomUniqueID(); self->sequence = 0; self->futureResults.clear(); + ev.error(e) + .detail("Tag", self->tag.toString()) + .detail("Begin", self->messageVersion.version) + .detail("NewID", self->randomID) + .detail("Interf", self->interf->get().present() ? self->interf->get().id() : UID()); } else { throw e; } @@ -415,7 +445,11 @@ ACTOR Future serverPeekGetMore(ILogSystem::ServerPeekCursor* self, TaskPri taskID)) : Never())) { updateCursorWithReply(self, res); - //TraceEvent("SPC_GetMoreB", self->randomID).detail("Has", self->hasMessage()).detail("End", res.end).detail("Popped", res.popped.present() ? res.popped.get() : 0); + DebugLogTraceEvent("SPC_GetMoreB", self->randomID) + .detail("Tag", self->tag.toString()) + .detail("Has", self->hasMessage()) + .detail("End", res.end) + .detail("Popped", res.popped.present() ? res.popped.get() : 0); return Void(); } when(wait(self->interf->onChange())) { self->onlySpilled = false; } @@ -431,11 +465,13 @@ ACTOR Future serverPeekGetMore(ILogSystem::ServerPeekCursor* self, TaskPri } Future ILogSystem::ServerPeekCursor::getMore(TaskPriority taskID) { - // TraceEvent("SPC_GetMore", randomID) - // .detail("HasMessage", hasMessage()) - // .detail("More", !more.isValid() || more.isReady()) - // .detail("MessageVersion", messageVersion.toString()) - // .detail("End", end.toString()); + DebugLogTraceEvent("SPC_GetMore", randomID) + .detail("Tag", tag.toString()) + .detail("HasMessage", hasMessage()) + .detail("More", !more.isValid() || more.isReady()) + .detail("Parallel", parallelGetMore) + .detail("MessageVersion", messageVersion.toString()) + .detail("End", end.toString()); if (hasMessage() && !parallelGetMore) return Void(); if (!more.isValid() || more.isReady()) { diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index 428ebc79a8..793f01628e 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -2187,6 +2187,9 @@ ACTOR Future doQueueCommit(TLogData* self, if (logData->logSystem->get() && (!logData->isPrimary || logData->logRouterPoppedVersion < logData->logRouterPopToVersion)) { logData->logRouterPoppedVersion = ver; + DebugLogTraceEvent("LogPop", self->dbgid) + .detail("Tag", logData->remoteTag.toString()) + .detail("Version", knownCommittedVersion); logData->logSystem->get()->pop(ver, logData->remoteTag, knownCommittedVersion, logData->locality); } From d865e77f062f6aab9458b4e229400956cbe8c2ff Mon Sep 17 00:00:00 2001 From: neethuhaneesha Date: Thu, 17 Nov 2022 15:39:22 -0800 Subject: [PATCH 56/57] RocksDB 7.7.3 version upgrade (#8858) --- cmake/CompileRocksDB.cmake | 6 +++--- fdbserver/KeyValueStoreRocksDB.actor.cpp | 9 +++------ fdbserver/KeyValueStoreShardedRocksDB.actor.cpp | 9 +++------ fdbserver/RocksDBCheckpointUtils.actor.cpp | 6 +++--- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/cmake/CompileRocksDB.cmake b/cmake/CompileRocksDB.cmake index 3fdea389ab..f257443c80 100644 --- a/cmake/CompileRocksDB.cmake +++ b/cmake/CompileRocksDB.cmake @@ -1,6 +1,6 @@ # FindRocksDB -find_package(RocksDB 6.27.3) +find_package(RocksDB 7.7.3) include(ExternalProject) @@ -49,8 +49,8 @@ if(ROCKSDB_FOUND) ${BINARY_DIR}/librocksdb.a) else() ExternalProject_Add(rocksdb - URL https://github.com/facebook/rocksdb/archive/refs/tags/v6.27.3.tar.gz - URL_HASH SHA256=ee29901749b9132692b26f0a6c1d693f47d1a9ed8e3771e60556afe80282bf58 + URL https://github.com/facebook/rocksdb/archive/refs/tags/v7.7.3.tar.gz + URL_HASH SHA256=b8ac9784a342b2e314c821f6d701148912215666ac5e9bdbccd93cf3767cb611 CMAKE_ARGS ${RocksDB_CMAKE_ARGS} BUILD_BYPRODUCTS /librocksdb.a INSTALL_COMMAND "" diff --git a/fdbserver/KeyValueStoreRocksDB.actor.cpp b/fdbserver/KeyValueStoreRocksDB.actor.cpp index aaaf8dd807..510dd5029b 100644 --- a/fdbserver/KeyValueStoreRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreRocksDB.actor.cpp @@ -68,12 +68,9 @@ #ifdef SSD_ROCKSDB_EXPERIMENTAL -// Enforcing rocksdb version to be 6.27.3 or greater. -static_assert(ROCKSDB_MAJOR >= 6, "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version"); -static_assert(ROCKSDB_MAJOR == 6 ? ROCKSDB_MINOR >= 27 : true, - "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version"); -static_assert((ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR == 27) ? ROCKSDB_PATCH >= 3 : true, - "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version"); +// Enforcing rocksdb version to be 7.7.3. +static_assert((ROCKSDB_MAJOR == 7 && ROCKSDB_MINOR == 7 && ROCKSDB_PATCH == 3), + "Unsupported rocksdb version. Update the rocksdb to 7.7.3 version"); namespace { using rocksdb::BackgroundErrorReason; diff --git a/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp b/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp index 53edcc1d95..83f99fa3ab 100644 --- a/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp +++ b/fdbserver/KeyValueStoreShardedRocksDB.actor.cpp @@ -41,12 +41,9 @@ #ifdef SSD_ROCKSDB_EXPERIMENTAL -// Enforcing rocksdb version to be 6.27.3 or greater. -static_assert(ROCKSDB_MAJOR >= 6, "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version"); -static_assert(ROCKSDB_MAJOR == 6 ? ROCKSDB_MINOR >= 27 : true, - "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version"); -static_assert((ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR == 27) ? ROCKSDB_PATCH >= 3 : true, - "Unsupported rocksdb version. Update the rocksdb to 6.27.3 version"); +// Enforcing rocksdb version to be 7.7.3. +static_assert((ROCKSDB_MAJOR == 7 && ROCKSDB_MINOR == 7 && ROCKSDB_PATCH == 3), + "Unsupported rocksdb version. Update the rocksdb to 7.7.3 version"); const std::string rocksDataFolderSuffix = "-data"; const std::string METADATA_SHARD_ID = "kvs-metadata"; diff --git a/fdbserver/RocksDBCheckpointUtils.actor.cpp b/fdbserver/RocksDBCheckpointUtils.actor.cpp index 53f41085e8..006a67aefc 100644 --- a/fdbserver/RocksDBCheckpointUtils.actor.cpp +++ b/fdbserver/RocksDBCheckpointUtils.actor.cpp @@ -43,9 +43,9 @@ #include "flow/actorcompiler.h" // has to be last include #ifdef SSD_ROCKSDB_EXPERIMENTAL -// Enforcing rocksdb version to be 6.22.1 or greater. -static_assert(ROCKSDB_MAJOR == 6 && ROCKSDB_MINOR >= 22 && ROCKSDB_PATCH >= 1, - "Unsupported rocksdb version. Update the rocksdb to at least 6.22.1 version"); +// Enforcing rocksdb version to be 7.7.3. +static_assert((ROCKSDB_MAJOR == 7 && ROCKSDB_MINOR == 7 && ROCKSDB_PATCH == 3), + "Unsupported rocksdb version. Update the rocksdb to 7.7.3 version"); namespace { From f6612ebd00d3391d2a675890470e54be11c97cfd Mon Sep 17 00:00:00 2001 From: Zhe Wu Date: Thu, 17 Nov 2022 10:56:50 -0800 Subject: [PATCH 57/57] When selecting physical shard and it is not available for the remote team, retry selecting a new dst physical shard --- fdbserver/DDRelocationQueue.actor.cpp | 14 ++++++++++---- fdbserver/DDShardTracker.actor.cpp | 12 +++++++----- .../include/fdbserver/DataDistribution.actor.h | 12 ++++++------ 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/fdbserver/DDRelocationQueue.actor.cpp b/fdbserver/DDRelocationQueue.actor.cpp index ea5eef4848..0716a3d86f 100644 --- a/fdbserver/DDRelocationQueue.actor.cpp +++ b/fdbserver/DDRelocationQueue.actor.cpp @@ -1548,14 +1548,20 @@ ACTOR Future dataDistributionRelocator(DDQueue* self, if (enableShardMove && tciIndex == 1) { ASSERT(physicalShardIDCandidate != UID().first() && physicalShardIDCandidate != anonymousShardId.first()); - Optional remoteTeamWithPhysicalShard = + std::pair, bool> remoteTeamWithPhysicalShard = self->physicalShardCollection->tryGetAvailableRemoteTeamWith( physicalShardIDCandidate, metrics, debugID); - // TODO: when we know that `physicalShardIDCandidate` exists, remote team must also exists. - if (remoteTeamWithPhysicalShard.present()) { + if (!remoteTeamWithPhysicalShard.second) { + // Physical shard with `physicalShardIDCandidate` is not available. Retry selecting new + // dst physical shard. + self->retryFindDstReasonCount[DDQueue::RetryFindDstReason::NoAvailablePhysicalShard]++; + foundTeams = false; + break; + } + if (remoteTeamWithPhysicalShard.first.present()) { // Exists a remoteTeam in the mapping that has the physicalShardIDCandidate // use the remoteTeam with the physicalShard as the bestTeam - req = GetTeamRequest(remoteTeamWithPhysicalShard.get().servers); + req = GetTeamRequest(remoteTeamWithPhysicalShard.first.get().servers); } } diff --git a/fdbserver/DDShardTracker.actor.cpp b/fdbserver/DDShardTracker.actor.cpp index fddbf25f14..69bb6c853a 100644 --- a/fdbserver/DDShardTracker.actor.cpp +++ b/fdbserver/DDShardTracker.actor.cpp @@ -1756,7 +1756,7 @@ InOverSizePhysicalShard PhysicalShardCollection::isInOverSizePhysicalShard(KeyRa } // May return a problematic remote team -Optional PhysicalShardCollection::tryGetAvailableRemoteTeamWith( +std::pair, bool> PhysicalShardCollection::tryGetAvailableRemoteTeamWith( uint64_t inputPhysicalShardID, StorageMetrics const& moveInMetrics, uint64_t debugID) { @@ -1764,10 +1764,10 @@ Optional PhysicalShardCollection::tryGetAvail ASSERT(SERVER_KNOBS->ENABLE_DD_PHYSICAL_SHARD); ASSERT(inputPhysicalShardID != anonymousShardId.first() && inputPhysicalShardID != UID().first()); if (physicalShardInstances.count(inputPhysicalShardID) == 0) { - return Optional(); + return { Optional(), true }; } if (!checkPhysicalShardAvailable(inputPhysicalShardID, moveInMetrics)) { - return Optional(); + return { Optional(), false }; } for (auto team : physicalShardInstances[inputPhysicalShardID].teams) { if (team.primary == false) { @@ -1777,10 +1777,12 @@ Optional PhysicalShardCollection::tryGetAvail .detail("TeamSize", team.servers.size()) .detail("PhysicalShardsOfTeam", convertIDsToString(teamPhysicalShardIDs[team])) .detail("DebugID", debugID);*/ - return team; + return { team, true }; } } - UNREACHABLE(); + // In this case, the physical shard may not be populated in the remote region yet, e.g., we are making a + // configuration change to turn a single region cluster into HA mode. + return { Optional(), true }; } // The update of PhysicalShardToTeams, Collection, keyRangePhysicalShardIDMap should be atomic diff --git a/fdbserver/include/fdbserver/DataDistribution.actor.h b/fdbserver/include/fdbserver/DataDistribution.actor.h index 9054ab55a3..40143c3109 100644 --- a/fdbserver/include/fdbserver/DataDistribution.actor.h +++ b/fdbserver/include/fdbserver/DataDistribution.actor.h @@ -284,12 +284,12 @@ public: const std::unordered_set& excludedPhysicalShards, uint64_t debugID); - // Step 2: get a remote team which has the input physical shard - // Return empty if no such remote team - // May return a problematic remote team, and re-selection is required for this case - Optional tryGetAvailableRemoteTeamWith(uint64_t inputPhysicalShardID, - StorageMetrics const& moveInMetrics, - uint64_t debugID); + // Step 2: get a remote team which has the input physical shard. + // Second field in the returned pair indicates whether this physical shard is available or not. + // Return empty if no such remote team. + // May return a problematic remote team, and re-selection is required for this case. + std::pair, bool> + tryGetAvailableRemoteTeamWith(uint64_t inputPhysicalShardID, StorageMetrics const& moveInMetrics, uint64_t debugID); // Invariant: // (1) If forceToUseNewPhysicalShard is set, use the bestTeams selected by getTeam(), and create a new physical // shard for the teams