foundationdb/fdbclient/SpecialKeySpace.actor.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

2733 lines
117 KiB
C++
Raw Normal View History

2020-04-09 05:50:55 +08:00
/*
* SpecialKeySpace.actor.cpp
*
* This source file is part of the FoundationDB open source project
*
2022-03-22 04:36:23 +08:00
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
2020-04-09 05:50:55 +08:00
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2021-02-18 17:27:14 +08:00
#include "boost/lexical_cast.hpp"
#include "boost/algorithm/string.hpp"
#include <time.h>
#include <msgpack.hpp>
#include <exception>
#include "fdbclient/ActorLineageProfiler.h"
#include "fdbclient/ClusterConnectionMemoryRecord.h"
#include "fdbclient/FDBOptions.g.h"
#include "fdbclient/Knobs.h"
#include "fdbclient/ProcessInterface.h"
#include "fdbclient/GlobalConfig.actor.h"
#include "fdbclient/SpecialKeySpace.actor.h"
#include "flow/Arena.h"
#include "flow/UnitTest.h"
#include "fdbclient/ManagementAPI.actor.h"
#include "fdbclient/StatusClient.h"
#include "flow/actorcompiler.h" // This must be the last #include.
2020-12-04 06:06:11 +08:00
namespace {
const std::string kTracingTransactionIdKey = "transaction_id";
const std::string kTracingTokenKey = "token";
2021-01-30 03:45:52 +08:00
static bool isAlphaNumeric(const std::string& key) {
// [A-Za-z0-9_]+
if (!key.size())
return false;
for (const char& c : key) {
if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'))
return false;
}
return true;
2020-12-04 06:06:11 +08:00
}
2021-01-30 03:45:52 +08:00
} // namespace
2020-12-04 06:06:11 +08:00
const KeyRangeRef TenantMapRangeImpl::submoduleRange = KeyRangeRef("tenant_map/"_sr, "tenant_map0"_sr);
std::unordered_map<SpecialKeySpace::MODULE, KeyRange> SpecialKeySpace::moduleToBoundary = {
{ SpecialKeySpace::MODULE::TRANSACTION,
KeyRangeRef(LiteralStringRef("\xff\xff/transaction/"), LiteralStringRef("\xff\xff/transaction0")) },
{ SpecialKeySpace::MODULE::WORKERINTERFACE,
KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"), LiteralStringRef("\xff\xff/worker_interfaces0")) },
{ SpecialKeySpace::MODULE::STATUSJSON, singleKeyRange(LiteralStringRef("\xff\xff/status/json")) },
{ SpecialKeySpace::MODULE::CONNECTIONSTRING, singleKeyRange(LiteralStringRef("\xff\xff/connection_string")) },
2020-05-19 01:38:23 +08:00
{ SpecialKeySpace::MODULE::CLUSTERFILEPATH, singleKeyRange(LiteralStringRef("\xff\xff/cluster_file_path")) },
2020-05-19 05:23:17 +08:00
{ SpecialKeySpace::MODULE::METRICS,
KeyRangeRef(LiteralStringRef("\xff\xff/metrics/"), LiteralStringRef("\xff\xff/metrics0")) },
{ SpecialKeySpace::MODULE::MANAGEMENT,
KeyRangeRef(LiteralStringRef("\xff\xff/management/"), LiteralStringRef("\xff\xff/management0")) },
{ SpecialKeySpace::MODULE::ERRORMSG, singleKeyRange(LiteralStringRef("\xff\xff/error_message")) },
{ SpecialKeySpace::MODULE::CONFIGURATION,
KeyRangeRef(LiteralStringRef("\xff\xff/configuration/"), LiteralStringRef("\xff\xff/configuration0")) },
{ SpecialKeySpace::MODULE::GLOBALCONFIG,
KeyRangeRef(LiteralStringRef("\xff\xff/global_config/"), LiteralStringRef("\xff\xff/global_config0")) },
{ SpecialKeySpace::MODULE::TRACING,
KeyRangeRef(LiteralStringRef("\xff\xff/tracing/"), LiteralStringRef("\xff\xff/tracing0")) },
{ SpecialKeySpace::MODULE::ACTORLINEAGE,
KeyRangeRef(LiteralStringRef("\xff\xff/actor_lineage/"), LiteralStringRef("\xff\xff/actor_lineage0")) },
{ SpecialKeySpace::MODULE::ACTOR_PROFILER_CONF,
KeyRangeRef(LiteralStringRef("\xff\xff/actor_profiler_conf/"),
LiteralStringRef("\xff\xff/actor_profiler_conf0")) }
};
std::unordered_map<std::string, KeyRange> SpecialKeySpace::managementApiCommandToRange = {
{ "exclude",
KeyRangeRef(LiteralStringRef("excluded/"), LiteralStringRef("excluded0"))
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "failed",
KeyRangeRef(LiteralStringRef("failed/"), LiteralStringRef("failed0"))
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "excludedlocality",
KeyRangeRef(LiteralStringRef("excluded_locality/"), LiteralStringRef("excluded_locality0"))
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "failedlocality",
KeyRangeRef(LiteralStringRef("failed_locality/"), LiteralStringRef("failed_locality0"))
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "lock", singleKeyRange(LiteralStringRef("db_locked")).withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "consistencycheck",
singleKeyRange(LiteralStringRef("consistency_check_suspended"))
2021-01-30 03:45:52 +08:00
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "coordinators",
KeyRangeRef(LiteralStringRef("coordinators/"), LiteralStringRef("coordinators0"))
.withPrefix(moduleToBoundary[MODULE::CONFIGURATION].begin) },
{ "advanceversion",
singleKeyRange(LiteralStringRef("min_required_commit_version"))
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
Add fdbcli command to read/write version epoch (#6480) * Initialize cluster version at wall-clock time Previously, new clusters would begin at version 0. After this change, clusters will initialize at a version matching wall-clock time. Instead of using the Unix epoch (or Windows epoch), FDB clusters will use a new epoch, defaulting to January 1, 2010, 01:00:00+00:00. In the future, this base epoch will be modifiable through fdbcli, allowing administrators to advance the cluster version. Basing the version off of time allows different FDB clusters to share data without running into version issues. * Send version epoch to master * Cleanup * Update fdbserver/storageserver.actor.cpp Co-authored-by: A.J. Beamon <aj.beamon@snowflake.com> * Jump directly to expected version if possible * Fix initial version issue on storage servers * Add random recovery offset to start version in simulation * Type fixes * Disable reference time by default Enable on a cluster using the fdbcli command `versionepoch add 0`. * Use correct recoveryTransactionVersion when recovering * Allow version epoch to be adjusted forwards (to decrease the version) * Set version epoch in simulation * Add quiet database check to ensure small version offset * Fix initial version issue on storage servers * Disable reference time by default Enable on a cluster using the fdbcli command `versionepoch add 0`. * Add fdbcli command to read/write version epoch * Cause recovery when version epoch is set * Handle optional version epoch key * Add ability to clear the version epoch This causes version advancement to revert to the old methodology whereas versions attempt to advance by about a million versions per second, instead of trying to match the clock. * Update transaction access * Modify version epoch to use microseconds instead of seconds * Modify fdbcli version target API Move commands from `versionepoch` to `targetversion` top level command. * Add fdbcli tests for * Temporarily disable targetversion cli tests * Fix version epoch fetch issue * Fix Arena issue * Reduce max version jump in simulation to 1,000,000 * Rework fdbcli API It now requires two commands to fully switch a cluster to using the version epoch. First, enable the version epoch with `versionepoch enable` or `versionepoch set <versionepoch>`. At this point, versions will be given out at a faster or slower rate in an attempt to reach the expected version. Then, run `versionepoch commit` to perform a one time jump to the expected version. This is essentially irreversible. * Temporarily disable old targetversion tests * Cleanup * Move version epoch buggify to sequencer This will cause some issues with the QuietDatabase check for the version offset - namely, it won't do anything, since the version epoch is not being written to the txnStateStore in simulation. This will get fixed in the future. Co-authored-by: A.J. Beamon <aj.beamon@snowflake.com>
2022-04-09 03:33:19 +08:00
{ "versionepoch",
singleKeyRange(LiteralStringRef("version_epoch")).withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "profile",
KeyRangeRef(LiteralStringRef("profiling/"), LiteralStringRef("profiling0"))
2021-03-27 03:24:45 +08:00
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "maintenance",
KeyRangeRef(LiteralStringRef("maintenance/"), LiteralStringRef("maintenance0"))
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "datadistribution",
KeyRangeRef(LiteralStringRef("data_distribution/"), LiteralStringRef("data_distribution0"))
.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) },
{ "tenantmap", TenantMapRangeImpl::submoduleRange.withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin) }
};
std::unordered_map<std::string, KeyRange> SpecialKeySpace::actorLineageApiCommandToRange = {
{ "state",
KeyRangeRef(LiteralStringRef("state/"), LiteralStringRef("state0"))
.withPrefix(moduleToBoundary[MODULE::ACTORLINEAGE].begin) },
{ "time",
KeyRangeRef(LiteralStringRef("time/"), LiteralStringRef("time0"))
.withPrefix(moduleToBoundary[MODULE::ACTORLINEAGE].begin) }
};
std::set<std::string> SpecialKeySpace::options = { "excluded/force",
"failed/force",
"excluded_locality/force",
"failed_locality/force" };
std::set<std::string> SpecialKeySpace::tracingOptions = { kTracingTransactionIdKey, kTracingTokenKey };
2021-05-04 04:14:16 +08:00
RangeResult rywGetRange(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr, const RangeResult& res);
// This function will move the given KeySelector as far as possible to the standard form:
// orEqual == false && offset == 1 (Standard form)
// If the corresponding key is not in the underlying key range, it will move over the range
// The cache object is used to cache the first read result from the rpc call during the key resolution,
// then when we need to do key resolution or result filtering,
// we, instead of rpc call, read from this cache object have consistent results
ACTOR Future<Void> moveKeySelectorOverRangeActor(const SpecialKeyRangeReadImpl* skrImpl,
ReadYourWritesTransaction* ryw,
KeySelector* ks,
2021-05-04 04:14:16 +08:00
Optional<RangeResult>* cache) {
// should be removed before calling
ASSERT(!ks->orEqual);
// never being called if KeySelector is already normalized
ASSERT(ks->offset != 1);
state Key startKey(skrImpl->getKeyRange().begin);
state Key endKey(skrImpl->getKeyRange().end);
2021-05-04 04:14:16 +08:00
state RangeResult result;
if (ks->offset < 1) {
// less than the given key
if (skrImpl->getKeyRange().contains(ks->getKey()))
endKey = ks->getKey();
} else {
// greater than the given key
if (skrImpl->getKeyRange().contains(ks->getKey()))
startKey = ks->getKey();
}
// Note : startKey never equals endKey here
ASSERT(startKey < endKey);
DisabledTraceEvent(SevDebug, "NormalizeKeySelector")
.detail("OriginalKey", ks->getKey())
.detail("OriginalOffset", ks->offset)
.detail("SpecialKeyRangeStart", skrImpl->getKeyRange().begin)
.detail("SpecialKeyRangeEnd", skrImpl->getKeyRange().end);
GetRangeLimits limitsHint(ks->offset >= 1 ? ks->offset : 1 - ks->offset);
if (skrImpl->isAsync()) {
const SpecialKeyRangeAsyncImpl* ptr = dynamic_cast<const SpecialKeyRangeAsyncImpl*>(skrImpl);
RangeResult result_ = wait(ptr->getRange(ryw, KeyRangeRef(startKey, endKey), limitsHint, cache));
result = result_;
} else {
RangeResult result_ = wait(skrImpl->getRange(ryw, KeyRangeRef(startKey, endKey), limitsHint));
result = result_;
}
2020-03-24 14:55:56 +08:00
if (result.size() == 0) {
TraceEvent(SevDebug, "ZeroElementsIntheRange").detail("Start", startKey).detail("End", endKey);
2020-03-24 14:55:56 +08:00
return Void();
}
2020-04-01 00:33:25 +08:00
// Note : KeySelector::setKey has byte limit according to the knobs, customize it if needed
if (ks->offset < 1) {
if (result.size() >= 1 - ks->offset) {
ks->setKey(KeyRef(ks->arena(), result[result.size() - (1 - ks->offset)].key));
ks->offset = 1;
} else {
ks->setKey(KeyRef(ks->arena(), result[0].key));
ks->offset += result.size();
}
} else {
if (result.size() >= ks->offset) {
ks->setKey(KeyRef(ks->arena(), result[ks->offset - 1].key));
ks->offset = 1;
} else {
// TODO : the keyAfter will just return if key == \xff\xff
ks->setKey(KeyRef(ks->arena(), keyAfter(result[result.size() - 1].key)));
ks->offset -= result.size();
}
}
DisabledTraceEvent(SevDebug, "NormalizeKeySelector")
.detail("NormalizedKey", ks->getKey())
.detail("NormalizedOffset", ks->offset)
.detail("SpecialKeyRangeStart", skrImpl->getKeyRange().begin)
.detail("SpecialKeyRangeEnd", skrImpl->getKeyRange().end);
return Void();
}
// This function will normalize the given KeySelector to a standard KeySelector:
// orEqual == false && offset == 1 (Standard form)
// If the corresponding key is outside the whole space, it will move to the begin or the end
// It does have overhead here since we query all keys twice in the worst case.
// However, moving the KeySelector while handling other parameters like limits makes the code much more complex and hard
// to maintain; Thus, separate each part to make the code easy to understand and more compact
2020-06-18 02:28:52 +08:00
// Boundary is the range of the legal key space, which, by default is the range of the module
2020-06-18 03:47:54 +08:00
// And (\xff\xff, \xff\xff\xff) if SPECIAL_KEY_SPACE_RELAXED is turned on
ACTOR Future<Void> normalizeKeySelectorActor(SpecialKeySpace* sks,
ReadYourWritesTransaction* ryw,
KeySelector* ks,
KeyRangeRef boundary,
int* actualOffset,
2021-05-04 04:14:16 +08:00
RangeResult* result,
Optional<RangeResult>* cache) {
// If offset < 1, where we need to move left, iter points to the range containing at least one smaller key
2020-12-07 06:04:16 +08:00
// (It's a wasting of time to walk through the range whose begin key is same as ks->key)
// (rangeContainingKeyBefore itself handles the case where ks->key == Key())
// Otherwise, we only need to move right if offset > 1, iter points to the range containing the key
// Since boundary.end is always a key in the RangeMap, it is always safe to move right
state RangeMap<Key, SpecialKeyRangeReadImpl*, KeyRangeRef>::iterator iter =
ks->offset < 1 ? sks->getReadImpls().rangeContainingKeyBefore(ks->getKey())
: sks->getReadImpls().rangeContaining(ks->getKey());
while ((ks->offset < 1 && iter->begin() >= boundary.begin) || (ks->offset > 1 && iter->begin() < boundary.end)) {
if (iter->value() != nullptr) {
wait(moveKeySelectorOverRangeActor(iter->value(), ryw, ks, cache));
}
// Check if we can still move the iterator left
if (ks->offset < 1) {
if (iter == sks->getReadImpls().ranges().begin()) {
break;
} else {
--iter;
}
} else if (ks->offset > 1) {
// Always safe to move right
++iter;
}
}
*actualOffset = ks->offset;
if (!ks->isFirstGreaterOrEqual()) {
TraceEvent(SevDebug, "ReadToBoundary")
.detail("TerminateKey", ks->getKey())
.detail("TerminateOffset", ks->offset);
2021-01-30 03:45:52 +08:00
// If still not normalized after moving to the boundary,
// let key selector clamp up to the boundary
if (ks->offset < 1) {
result->readToBegin = true;
ks->setKey(boundary.begin);
2021-01-30 03:45:52 +08:00
} else {
result->readThroughEnd = true;
ks->setKey(boundary.end);
}
ks->offset = 1;
}
return Void();
}
2020-07-07 05:02:22 +08:00
SpecialKeySpace::SpecialKeySpace(KeyRef spaceStartKey, KeyRef spaceEndKey, bool testOnly)
: readImpls(nullptr, spaceEndKey),
modules(testOnly ? SpecialKeySpace::MODULE::TESTONLY : SpecialKeySpace::MODULE::UNKNOWN, spaceEndKey),
writeImpls(nullptr, spaceEndKey), range(KeyRangeRef(spaceStartKey, spaceEndKey)) {
2020-07-07 05:02:22 +08:00
// Default begin of KeyRangeMap is Key(), insert the range to update start key
readImpls.insert(range, nullptr);
writeImpls.insert(range, nullptr);
if (!testOnly) {
// testOnly is used in the correctness workload
modulesBoundaryInit();
}
2020-07-07 05:02:22 +08:00
}
void SpecialKeySpace::modulesBoundaryInit() {
for (const auto& pair : moduleToBoundary) {
ASSERT(range.contains(pair.second));
// Make sure the module is not overlapping with any registered read modules
// Note: same like ranges, one module's end cannot be another module's start, relax the condition if needed
ASSERT(modules.rangeContaining(pair.second.begin) == modules.rangeContaining(pair.second.end) &&
modules[pair.second.begin] == SpecialKeySpace::MODULE::UNKNOWN);
modules.insert(pair.second, pair.first);
// Note: Due to underlying implementation, the insertion here is important to make cross_module_read being
// handled correctly
readImpls.insert(pair.second, nullptr);
writeImpls.insert(pair.second, nullptr);
}
}
2021-05-04 04:14:16 +08:00
ACTOR Future<RangeResult> SpecialKeySpace::checkRYWValid(SpecialKeySpace* sks,
ReadYourWritesTransaction* ryw,
KeySelector begin,
KeySelector end,
GetRangeLimits limits,
2021-07-03 12:41:50 +08:00
Reverse reverse) {
2020-06-18 02:13:55 +08:00
ASSERT(ryw);
choose {
2021-05-04 04:14:16 +08:00
when(RangeResult result =
wait(SpecialKeySpace::getRangeAggregationActor(sks, ryw, begin, end, limits, reverse))) {
return result;
2020-05-15 14:49:57 +08:00
}
2020-06-18 02:13:55 +08:00
when(wait(ryw->resetFuture())) { throw internal_error(); }
2020-05-06 04:07:09 +08:00
}
}
2021-05-04 04:14:16 +08:00
ACTOR Future<RangeResult> SpecialKeySpace::getRangeAggregationActor(SpecialKeySpace* sks,
ReadYourWritesTransaction* ryw,
KeySelector begin,
KeySelector end,
GetRangeLimits limits,
2021-07-03 12:41:50 +08:00
Reverse reverse) {
// This function handles ranges which cover more than one keyrange and aggregates all results
// KeySelector, GetRangeLimits and reverse are all handled here
2021-05-04 04:14:16 +08:00
state RangeResult result;
state RangeResult pairs;
state RangeMap<Key, SpecialKeyRangeReadImpl*, KeyRangeRef>::iterator iter;
state int actualBeginOffset;
state int actualEndOffset;
state KeyRangeRef moduleBoundary;
// used to cache result from potential first read
2021-05-04 04:14:16 +08:00
state Optional<RangeResult> cache;
2020-04-03 15:26:11 +08:00
2020-06-18 03:47:54 +08:00
if (ryw->specialKeySpaceRelaxed()) {
moduleBoundary = sks->range;
} else {
auto beginIter = sks->getModules().rangeContaining(begin.getKey());
2020-06-18 03:47:54 +08:00
if (beginIter->begin() <= end.getKey() && end.getKey() <= beginIter->end()) {
if (beginIter->value() == SpecialKeySpace::MODULE::UNKNOWN)
throw special_keys_no_module_found();
else
moduleBoundary = beginIter->range();
} else {
TraceEvent(SevInfo, "SpecialKeyCrossModuleRead")
.detail("Begin", begin)
.detail("End", end)
.detail("BoundaryBegin", beginIter->begin())
.detail("BoundaryEnd", beginIter->end());
throw special_keys_cross_module_read();
}
}
wait(normalizeKeySelectorActor(sks, ryw, &begin, moduleBoundary, &actualBeginOffset, &result, &cache));
wait(normalizeKeySelectorActor(sks, ryw, &end, moduleBoundary, &actualEndOffset, &result, &cache));
// Handle all corner cases like what RYW does
// return if range inverted
if (actualBeginOffset >= actualEndOffset && begin.getKey() >= end.getKey()) {
2020-11-11 05:01:07 +08:00
TEST(true); // inverted range
return RangeResultRef(false, false);
}
2020-03-31 16:36:07 +08:00
// If touches begin or end, return with readToBegin and readThroughEnd flags
if (begin.getKey() == moduleBoundary.end || end.getKey() == moduleBoundary.begin) {
2020-11-11 05:01:07 +08:00
TEST(true); // query touches begin or end
return result;
}
state RangeMap<Key, SpecialKeyRangeReadImpl*, KeyRangeRef>::Ranges ranges =
2020-06-15 14:11:57 +08:00
sks->getReadImpls().intersectingRanges(KeyRangeRef(begin.getKey(), end.getKey()));
// TODO : workaround to write this two together to make the code compact
// The issue here is boost::iterator_range<> doest not provide rbegin(), rend()
iter = reverse ? ranges.end() : ranges.begin();
if (reverse) {
while (iter != ranges.begin()) {
--iter;
if (iter->value() == nullptr)
continue;
KeyRangeRef kr = iter->range();
KeyRef keyStart = kr.contains(begin.getKey()) ? begin.getKey() : kr.begin;
KeyRef keyEnd = kr.contains(end.getKey()) ? end.getKey() : kr.end;
if (iter->value()->isAsync() && cache.present()) {
const SpecialKeyRangeAsyncImpl* ptr = dynamic_cast<const SpecialKeyRangeAsyncImpl*>(iter->value());
RangeResult pairs_ = wait(ptr->getRange(ryw, KeyRangeRef(keyStart, keyEnd), limits, &cache));
pairs = pairs_;
} else {
RangeResult pairs_ = wait(iter->value()->getRange(ryw, KeyRangeRef(keyStart, keyEnd), limits));
pairs = pairs_;
}
result.arena().dependsOn(pairs.arena());
// limits handler
for (int i = pairs.size() - 1; i >= 0; --i) {
ASSERT(iter->range().contains(pairs[i].key));
result.push_back(result.arena(), pairs[i]);
// Note : behavior here is even the last k-v pair makes total bytes larger than specified, it's still
// returned. In other words, the total size of the returned value (less the last entry) will be less
// than byteLimit
limits.decrement(pairs[i]);
if (limits.isReached()) {
result.more = true;
result.readToBegin = false;
return result;
};
}
}
} else {
for (iter = ranges.begin(); iter != ranges.end(); ++iter) {
if (iter->value() == nullptr)
continue;
KeyRangeRef kr = iter->range();
KeyRef keyStart = kr.contains(begin.getKey()) ? begin.getKey() : kr.begin;
KeyRef keyEnd = kr.contains(end.getKey()) ? end.getKey() : kr.end;
if (iter->value()->isAsync() && cache.present()) {
const SpecialKeyRangeAsyncImpl* ptr = dynamic_cast<const SpecialKeyRangeAsyncImpl*>(iter->value());
RangeResult pairs_ = wait(ptr->getRange(ryw, KeyRangeRef(keyStart, keyEnd), limits, &cache));
pairs = pairs_;
} else {
RangeResult pairs_ = wait(iter->value()->getRange(ryw, KeyRangeRef(keyStart, keyEnd), limits));
pairs = pairs_;
}
result.arena().dependsOn(pairs.arena());
// limits handler
for (int i = 0; i < pairs.size(); ++i) {
ASSERT(iter->range().contains(pairs[i].key));
result.push_back(result.arena(), pairs[i]);
// Note : behavior here is even the last k-v pair makes total bytes larger than specified, it's still
// returned. In other words, the total size of the returned value (less the last entry) will be less
// than byteLimit
limits.decrement(pairs[i]);
if (limits.isReached()) {
result.more = true;
result.readThroughEnd = false;
return result;
};
}
}
}
return result;
}
2021-05-04 04:14:16 +08:00
Future<RangeResult> SpecialKeySpace::getRange(ReadYourWritesTransaction* ryw,
KeySelector begin,
KeySelector end,
GetRangeLimits limits,
2021-07-03 12:41:50 +08:00
Reverse reverse) {
// validate limits here
if (!limits.isValid())
return range_limits_invalid();
if (limits.isReached()) {
TEST(true); // read limit 0
2021-05-04 04:14:16 +08:00
return RangeResult();
}
2020-03-31 16:44:02 +08:00
// make sure orEqual == false
begin.removeOrEqual(begin.arena());
end.removeOrEqual(end.arena());
if (begin.offset >= end.offset && begin.getKey() >= end.getKey()) {
2020-05-15 14:49:57 +08:00
TEST(true); // range inverted
2021-05-04 04:14:16 +08:00
return RangeResult();
2020-05-15 14:49:57 +08:00
}
2020-06-18 02:13:55 +08:00
return checkRYWValid(this, ryw, begin, end, limits, reverse);
}
ACTOR Future<Optional<Value>> SpecialKeySpace::getActor(SpecialKeySpace* sks,
ReadYourWritesTransaction* ryw,
KeyRef key) {
// use getRange to workaround this
2021-05-04 04:14:16 +08:00
RangeResult result = wait(sks->getRange(ryw,
KeySelector(firstGreaterOrEqual(key)),
KeySelector(firstGreaterOrEqual(keyAfter(key))),
GetRangeLimits(CLIENT_KNOBS->TOO_MANY),
Reverse::False));
ASSERT(result.size() <= 1);
if (result.size()) {
return Optional<Value>(result[0].value);
} else {
return Optional<Value>();
}
}
Future<Optional<Value>> SpecialKeySpace::get(ReadYourWritesTransaction* ryw, const Key& key) {
return getActor(this, ryw, key);
}
2020-07-07 05:02:22 +08:00
void SpecialKeySpace::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
if (!ryw->specialKeySpaceChangeConfiguration())
throw special_keys_write_disabled();
2020-07-07 05:02:22 +08:00
auto impl = writeImpls[key];
2020-07-18 03:36:50 +08:00
if (impl == nullptr) {
TraceEvent(SevDebug, "SpecialKeySpaceNoWriteModuleFound")
.detail("Key", key.toString())
.detail("Value", value.toString());
2020-07-18 03:36:50 +08:00
throw special_keys_no_write_module_found();
}
2020-07-07 05:02:22 +08:00
return impl->set(ryw, key, value);
}
void SpecialKeySpace::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
if (!ryw->specialKeySpaceChangeConfiguration())
throw special_keys_write_disabled();
2020-07-07 05:02:22 +08:00
if (range.empty())
return;
auto begin = writeImpls[range.begin];
auto end = writeImpls.rangeContainingKeyBefore(range.end)->value();
if (begin != end) {
TraceEvent(SevDebug, "SpecialKeySpaceCrossModuleClear").detail("Range", range);
2020-07-07 05:02:22 +08:00
throw special_keys_cross_module_clear(); // ban cross module clear
} else if (begin == nullptr) {
TraceEvent(SevDebug, "SpecialKeySpaceNoWriteModuleFound").detail("Range", range);
2020-07-07 05:02:22 +08:00
throw special_keys_no_write_module_found();
}
2020-07-07 05:02:22 +08:00
return begin->clear(ryw, range);
}
void SpecialKeySpace::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
if (!ryw->specialKeySpaceChangeConfiguration())
throw special_keys_write_disabled();
2020-07-07 05:02:22 +08:00
auto impl = writeImpls[key];
if (impl == nullptr)
throw special_keys_no_write_module_found();
return impl->clear(ryw, key);
}
bool validateSnakeCaseNaming(const KeyRef& k) {
KeyRef key(k);
// Remove prefix \xff\xff
ASSERT(key.startsWith(specialKeys.begin));
key = key.removePrefix(specialKeys.begin);
// Suffix can be \xff\xff or \x00 in single key range
if (key.endsWith(specialKeys.begin))
key = key.removeSuffix(specialKeys.end);
else if (key.endsWith(LiteralStringRef("\x00")))
key = key.removeSuffix(LiteralStringRef("\x00"));
for (const char& c : key.toString()) {
// only small letters, numbers, '/', '_' is allowed
ASSERT((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '/' || c == '_');
}
return true;
}
void SpecialKeySpace::registerKeyRange(SpecialKeySpace::MODULE module,
SpecialKeySpace::IMPLTYPE type,
const KeyRangeRef& kr,
SpecialKeyRangeReadImpl* impl) {
// Not allowed to register an empty range
ASSERT(!kr.empty());
2020-07-07 05:02:22 +08:00
// module boundary check
if (module == SpecialKeySpace::MODULE::TESTONLY) {
2020-07-07 07:42:42 +08:00
ASSERT(normalKeys.contains(kr));
2020-10-23 09:27:58 +08:00
} else {
2020-07-07 05:02:22 +08:00
ASSERT(moduleToBoundary.at(module).contains(kr));
// validate keys follow snake case naming style
ASSERT(validateSnakeCaseNaming(kr.begin) && validateSnakeCaseNaming(kr.end));
}
2020-07-07 05:02:22 +08:00
// make sure the registered range is not overlapping with existing ones
// Note: kr.end should not be the same as another range's begin, although it should work even they are the same
for (auto iter = readImpls.rangeContaining(kr.begin); true; ++iter) {
ASSERT(iter->value() == nullptr);
if (iter == readImpls.rangeContaining(kr.end)) {
// Note: relax the condition that the end can be another range's start, if needed
break;
}
2020-07-07 05:02:22 +08:00
}
readImpls.insert(kr, impl);
// if rw, it means the module can do both read and write
if (type == SpecialKeySpace::IMPLTYPE::READWRITE) {
2020-07-07 05:02:22 +08:00
// since write impls are always subset of read impls,
// no need to check overlapped registration
auto rwImpl = dynamic_cast<SpecialKeyRangeRWImpl*>(impl);
ASSERT(rwImpl);
writeImpls.insert(kr, rwImpl);
}
}
Key SpecialKeySpace::decode(const KeyRef& key) {
auto impl = writeImpls[key];
ASSERT(impl != nullptr);
return impl->decode(key);
}
KeyRange SpecialKeySpace::decode(const KeyRangeRef& kr) {
// Only allow to decode key range in the same underlying impl range
auto begin = writeImpls.rangeContaining(kr.begin);
ASSERT(begin->value() != nullptr);
auto end = writeImpls.rangeContainingKeyBefore(kr.end);
ASSERT(begin == end);
return KeyRangeRef(begin->value()->decode(kr.begin), begin->value()->decode(kr.end));
}
ACTOR Future<Void> commitActor(SpecialKeySpace* sks, ReadYourWritesTransaction* ryw) {
state RangeMap<Key, std::pair<bool, Optional<Value>>, KeyRangeRef>::Ranges ranges =
ryw->getSpecialKeySpaceWriteMap().containedRanges(specialKeys);
state RangeMap<Key, std::pair<bool, Optional<Value>>, KeyRangeRef>::iterator iter = ranges.begin();
state std::vector<SpecialKeyRangeRWImpl*> writeModulePtrs;
2021-05-04 23:24:31 +08:00
std::unordered_set<SpecialKeyRangeRWImpl*> deduplicate;
while (iter != ranges.end()) {
std::pair<bool, Optional<Value>> entry = iter->value();
if (entry.first) {
2020-07-17 09:15:35 +08:00
auto modulePtr = sks->getRWImpls().rangeContaining(iter->begin())->value();
2021-05-04 23:24:31 +08:00
auto [_, inserted] = deduplicate.insert(modulePtr);
if (inserted) {
writeModulePtrs.push_back(modulePtr);
}
}
++iter;
}
state std::vector<SpecialKeyRangeRWImpl*>::const_iterator it;
for (it = writeModulePtrs.begin(); it != writeModulePtrs.end(); ++it) {
Optional<std::string> msg = wait((*it)->commit(ryw));
if (msg.present()) {
ryw->setSpecialKeySpaceErrorMsg(msg.get());
TraceEvent(SevDebug, "SpecialKeySpaceManagementAPIError")
2020-07-17 09:15:35 +08:00
.detail("Reason", msg.get())
.detail("Range", (*it)->getKeyRange().toString());
throw special_keys_api_failure();
}
}
return Void();
}
Future<Void> SpecialKeySpace::commit(ReadYourWritesTransaction* ryw) {
return commitActor(this, ryw);
}
SKSCTestImpl::SKSCTestImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> SKSCTestImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
ASSERT(range.contains(kr));
auto resultFuture = ryw->getRange(kr, CLIENT_KNOBS->TOO_MANY);
// all keys are written to RYW, since GRV is set, the read should happen locally
ASSERT(resultFuture.isReady());
auto result = resultFuture.getValue();
ASSERT(!result.more && result.size() < CLIENT_KNOBS->TOO_MANY);
auto kvs = resultFuture.getValue();
return rywGetRange(ryw, kr, kvs);
}
Future<Optional<std::string>> SKSCTestImpl::commit(ReadYourWritesTransaction* ryw) {
ASSERT(false);
return Optional<std::string>();
}
ReadConflictRangeImpl::ReadConflictRangeImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
2021-05-04 04:14:16 +08:00
ACTOR static Future<RangeResult> getReadConflictRangeImpl(ReadYourWritesTransaction* ryw, KeyRange kr) {
2020-04-29 03:44:34 +08:00
wait(ryw->pendingReads());
2020-04-30 05:43:37 +08:00
return ryw->getReadConflictRangeIntersecting(kr);
}
Future<RangeResult> ReadConflictRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
return getReadConflictRangeImpl(ryw, kr);
}
WriteConflictRangeImpl::WriteConflictRangeImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
2020-04-29 01:34:10 +08:00
Future<RangeResult> WriteConflictRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
2020-04-30 05:43:37 +08:00
return ryw->getWriteConflictRangeIntersecting(kr);
2020-04-29 01:34:10 +08:00
}
ConflictingKeysImpl::ConflictingKeysImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
2020-04-07 04:23:41 +08:00
Future<RangeResult> ConflictingKeysImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
2021-05-04 04:14:16 +08:00
RangeResult result;
if (ryw->getTransactionState()->conflictingKeys) {
auto krMapPtr = ryw->getTransactionState()->conflictingKeys.get();
2020-04-07 04:38:18 +08:00
auto beginIter = krMapPtr->rangeContaining(kr.begin);
if (beginIter->begin() != kr.begin)
++beginIter;
auto endIter = krMapPtr->rangeContaining(kr.end);
2020-04-07 04:23:41 +08:00
for (auto it = beginIter; it != endIter; ++it) {
2020-07-25 07:31:16 +08:00
result.push_back_deep(result.arena(), KeyValueRef(it->begin(), it->value()));
2020-04-07 04:23:41 +08:00
}
if (endIter->begin() != kr.end)
2020-07-25 07:31:16 +08:00
result.push_back_deep(result.arena(), KeyValueRef(endIter->begin(), endIter->value()));
2020-04-07 04:23:41 +08:00
}
return result;
}
2021-05-04 04:14:16 +08:00
ACTOR Future<RangeResult> ddMetricsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
loop {
try {
auto keys = kr.removePrefix(ddStatsRange.begin);
Standalone<VectorRef<DDMetricsRef>> resultWithoutPrefix = wait(
waitDataDistributionMetricsList(ryw->getDatabase(), keys, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT));
2021-05-04 04:14:16 +08:00
RangeResult result;
for (const auto& ddMetricsRef : resultWithoutPrefix) {
// each begin key is the previous end key, thus we only encode the begin key in the result
KeyRef beginKey = ddMetricsRef.beginKey.withPrefix(ddStatsRange.begin, result.arena());
// Use json string encoded in utf-8 to encode the values, easy for adding more fields in the future
json_spirit::mObject statsObj;
statsObj["shard_bytes"] = ddMetricsRef.shardBytes;
std::string statsString =
json_spirit::write_string(json_spirit::mValue(statsObj), json_spirit::Output_options::raw_utf8);
ValueRef bytes(result.arena(), statsString);
result.push_back(result.arena(), KeyValueRef(beginKey, bytes));
}
return result;
} catch (Error& e) {
2020-10-22 07:58:23 +08:00
state Error err(e);
if (e.code() == error_code_dd_not_found) {
TraceEvent(SevWarnAlways, "DataDistributorNotPresent")
.detail("Operation", "DDMetricsReqestThroughSpecialKeys");
2020-10-22 07:58:23 +08:00
wait(delayJittered(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY));
continue;
}
2020-10-22 07:58:23 +08:00
throw err;
2020-05-19 01:38:23 +08:00
}
}
}
DDStatsRangeImpl::DDStatsRangeImpl(KeyRangeRef kr) : SpecialKeyRangeAsyncImpl(kr) {}
2020-05-19 01:38:23 +08:00
Future<RangeResult> DDStatsRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
return ddMetricsGetRangeActor(ryw, kr);
2020-05-19 01:38:23 +08:00
}
Key SpecialKeySpace::getManagementApiCommandOptionSpecialKey(const std::string& command, const std::string& option) {
Key prefix = LiteralStringRef("options/").withPrefix(moduleToBoundary[MODULE::MANAGEMENT].begin);
auto pair = command + "/" + option;
ASSERT(options.find(pair) != options.end());
return prefix.withSuffix(pair);
}
2020-07-17 09:15:35 +08:00
ManagementCommandsOptionsImpl::ManagementCommandsOptionsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> ManagementCommandsOptionsImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
2021-05-04 04:14:16 +08:00
RangeResult result;
2020-07-17 09:15:35 +08:00
// Since we only have limit number of options, a brute force loop here is enough
2020-07-29 02:39:29 +08:00
for (const auto& option : SpecialKeySpace::getManagementApiOptionsSet()) {
2020-07-17 09:15:35 +08:00
auto key = getKeyRange().begin.withSuffix(option);
// ignore all invalid keys
auto r = ryw->getSpecialKeySpaceWriteMap()[key];
if (kr.contains(key) && r.first && r.second.present()) {
2020-07-17 09:15:35 +08:00
result.push_back(result.arena(), KeyValueRef(key, ValueRef()));
result.arena().dependsOn(key.arena());
}
2020-07-17 09:15:35 +08:00
}
return result;
}
void ManagementCommandsOptionsImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
std::string option = key.removePrefix(getKeyRange().begin).toString();
// ignore all invalid keys
2020-07-29 02:39:29 +08:00
if (SpecialKeySpace::getManagementApiOptionsSet().find(option) !=
SpecialKeySpace::getManagementApiOptionsSet().end()) {
TraceEvent(SevDebug, "ManagementApiOption").detail("Option", option).detail("Key", key);
2020-07-17 09:15:35 +08:00
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(value)));
}
}
void ManagementCommandsOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
ryw->getSpecialKeySpaceWriteMap().rawErase(range);
2020-07-17 09:15:35 +08:00
}
void ManagementCommandsOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
std::string option = key.removePrefix(getKeyRange().begin).toString();
// ignore all invalid keys
2020-07-29 02:39:29 +08:00
if (SpecialKeySpace::getManagementApiOptionsSet().find(option) !=
SpecialKeySpace::getManagementApiOptionsSet().end()) {
2020-07-17 09:15:35 +08:00
ryw->getSpecialKeySpaceWriteMap().rawErase(singleKeyRange(key));
}
}
Future<Optional<std::string>> ManagementCommandsOptionsImpl::commit(ReadYourWritesTransaction* ryw) {
// Nothing to do, keys should be used by other impls' commit callback
return Optional<std::string>();
}
2021-05-04 04:14:16 +08:00
RangeResult rywGetRange(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr, const RangeResult& res) {
// "res" is the read result regardless of your writes, if ryw disabled, return immediately
if (ryw->readYourWritesDisabled())
return res;
// If ryw enabled, we update it with writes from the transaction
2021-05-04 04:14:16 +08:00
RangeResult result;
RangeMap<Key, std::pair<bool, Optional<Value>>, KeyRangeRef>::Ranges ranges =
ryw->getSpecialKeySpaceWriteMap().containedRanges(kr);
RangeMap<Key, std::pair<bool, Optional<Value>>, KeyRangeRef>::iterator iter = ranges.begin();
auto iter2 = res.begin();
result.arena().dependsOn(res.arena());
while (iter != ranges.end() || iter2 != res.end()) {
if (iter == ranges.end()) {
result.push_back(result.arena(), KeyValueRef(iter2->key, iter2->value));
++iter2;
} else if (iter2 == res.end()) {
// insert if it is a set entry
std::pair<bool, Optional<Value>> entry = iter->value();
if (entry.first && entry.second.present()) {
result.push_back_deep(result.arena(), KeyValueRef(iter->begin(), entry.second.get()));
}
++iter;
} else if (iter->range().contains(iter2->key)) {
std::pair<bool, Optional<Value>> entry = iter->value();
// if this is a valid range either for set or clear, move iter2 outside the range
if (entry.first) {
// insert if this is a set entry
if (entry.second.present())
result.push_back_deep(result.arena(), KeyValueRef(iter->begin(), entry.second.get()));
// move iter2 outside the range
while (iter2 != res.end() && iter->range().contains(iter2->key))
++iter2;
}
++iter;
} else if (iter->begin() > iter2->key) {
result.push_back(result.arena(), KeyValueRef(iter2->key, iter2->value));
++iter2;
} else if (iter->end() <= iter2->key) {
// insert if it is a set entry
std::pair<bool, Optional<Value>> entry = iter->value();
if (entry.first && entry.second.present()) {
result.push_back_deep(result.arena(), KeyValueRef(iter->begin(), entry.second.get()));
}
++iter;
}
}
return result;
}
2020-06-15 13:39:20 +08:00
// read from those readwrite modules in which special keys have one-to-one mapping with real persisted keys
2021-05-04 04:14:16 +08:00
ACTOR Future<RangeResult> rwModuleWithMappingGetRangeActor(ReadYourWritesTransaction* ryw,
const SpecialKeyRangeRWImpl* impl,
KeyRangeRef kr) {
RangeResult resultWithoutPrefix =
wait(ryw->getTransaction().getRange(ryw->getDatabase()->specialKeySpace->decode(kr), CLIENT_KNOBS->TOO_MANY));
ASSERT(!resultWithoutPrefix.more && resultWithoutPrefix.size() < CLIENT_KNOBS->TOO_MANY);
2021-05-04 04:14:16 +08:00
RangeResult result;
for (const KeyValueRef& kv : resultWithoutPrefix)
result.push_back_deep(result.arena(), KeyValueRef(impl->encode(kv.key), kv.value));
return rywGetRange(ryw, kr, result);
}
ExcludeServersRangeImpl::ExcludeServersRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
2020-06-15 13:39:20 +08:00
Future<RangeResult> ExcludeServersRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
ryw->setOption(FDBTransactionOptions::RAW_ACCESS);
return rwModuleWithMappingGetRangeActor(ryw, this, kr);
}
void ExcludeServersRangeImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
// ignore value
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(ValueRef())));
}
Key ExcludeServersRangeImpl::decode(const KeyRef& key) const {
return key.removePrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)
.withPrefix(LiteralStringRef("\xff/conf/"));
}
Key ExcludeServersRangeImpl::encode(const KeyRef& key) const {
return key.removePrefix(LiteralStringRef("\xff/conf/"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin);
}
bool parseNetWorkAddrFromKeys(ReadYourWritesTransaction* ryw,
bool failed,
std::vector<AddressExclusion>& addresses,
std::set<AddressExclusion>& exclusions,
Optional<std::string>& msg) {
KeyRangeRef range = failed ? SpecialKeySpace::getManagementApiCommandRange("failed")
: SpecialKeySpace::getManagementApiCommandRange("exclude");
auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(range);
auto iter = ranges.begin();
while (iter != ranges.end()) {
auto entry = iter->value();
// only check for exclude(set) operation, include(clear) are not checked
2020-08-21 05:41:17 +08:00
TraceEvent(SevDebug, "ParseNetworkAddress")
.detail("Valid", entry.first)
.detail("Set", entry.second.present())
.detail("Key", iter->begin().toString());
if (entry.first && entry.second.present()) {
Key address = iter->begin().removePrefix(range.begin);
auto a = AddressExclusion::parse(address);
if (!a.isValid()) {
std::string error = "ERROR: \'" + address.toString() + "\' is not a valid network endpoint address\n";
if (address.toString().find(":tls") != std::string::npos)
error += " Do not include the `:tls' suffix when naming a process\n";
2020-08-01 04:31:36 +08:00
msg = ManagementAPIError::toJsonString(
false, entry.second.present() ? (failed ? "exclude failed" : "exclude") : "include", error);
return false;
}
addresses.push_back(a);
exclusions.insert(a);
}
++iter;
}
return true;
}
ACTOR Future<bool> checkExclusion(Database db,
std::vector<AddressExclusion>* addresses,
std::set<AddressExclusion>* exclusions,
bool markFailed,
Optional<std::string>* msg) {
if (markFailed) {
state bool safe;
try {
bool _safe = wait(checkSafeExclusions(db, *addresses));
safe = _safe;
} catch (Error& e) {
2020-07-31 13:15:22 +08:00
if (e.code() == error_code_actor_cancelled)
throw;
TraceEvent("CheckSafeExclusionsError").error(e);
safe = false;
}
if (!safe) {
std::string temp = "ERROR: It is unsafe to exclude the specified servers at this time.\n"
2020-07-17 09:15:35 +08:00
"Please check that this exclusion does not bring down an entire storage team.\n"
"Please also ensure that the exclusion will keep a majority of coordinators alive.\n"
"You may add more storage processes or coordinators to make the operation safe.\n"
2020-08-01 04:31:36 +08:00
"Call set(\"0xff0xff/management/failed/<ADDRESS...>\", ...) to exclude without "
"performing safety checks.\n";
*msg = ManagementAPIError::toJsonString(false, markFailed ? "exclude failed" : "exclude", temp);
return false;
}
}
StatusObject status = wait(StatusClient::statusFetcher(db));
state std::string errorString =
"ERROR: Could not calculate the impact of this exclude on the total free space in the cluster.\n"
"Please try the exclude again in 30 seconds.\n"
2020-08-01 04:31:36 +08:00
"Call set(\"0xff0xff/management/options/exclude/force\", ...) first to exclude without checking free "
"space.\n";
StatusObjectReader statusObj(status);
StatusObjectReader statusObjCluster;
if (!statusObj.get("cluster", statusObjCluster)) {
*msg = ManagementAPIError::toJsonString(false, markFailed ? "exclude failed" : "exclude", errorString);
return false;
}
StatusObjectReader processesMap;
if (!statusObjCluster.get("processes", processesMap)) {
*msg = ManagementAPIError::toJsonString(false, markFailed ? "exclude failed" : "exclude", errorString);
return false;
}
state int ssTotalCount = 0;
state int ssExcludedCount = 0;
state double worstFreeSpaceRatio = 1.0;
try {
for (auto proc : processesMap.obj()) {
bool storageServer = false;
StatusArray rolesArray = proc.second.get_obj()["roles"].get_array();
for (StatusObjectReader role : rolesArray) {
if (role["role"].get_str() == "storage") {
storageServer = true;
break;
}
}
// Skip non-storage servers in free space calculation
if (!storageServer)
continue;
StatusObjectReader process(proc.second);
std::string addrStr;
if (!process.get("address", addrStr)) {
*msg = ManagementAPIError::toJsonString(false, markFailed ? "exclude failed" : "exclude", errorString);
return false;
}
NetworkAddress addr = NetworkAddress::parse(addrStr);
bool excluded =
(process.has("excluded") && process.last().get_bool()) || addressExcluded(*exclusions, addr);
ssTotalCount++;
if (excluded)
ssExcludedCount++;
if (!excluded) {
StatusObjectReader disk;
if (!process.get("disk", disk)) {
2020-07-17 09:15:35 +08:00
*msg =
ManagementAPIError::toJsonString(false, markFailed ? "exclude failed" : "exclude", errorString);
return false;
}
int64_t total_bytes;
if (!disk.get("total_bytes", total_bytes)) {
2020-07-17 09:15:35 +08:00
*msg =
ManagementAPIError::toJsonString(false, markFailed ? "exclude failed" : "exclude", errorString);
return false;
}
int64_t free_bytes;
if (!disk.get("free_bytes", free_bytes)) {
2020-07-17 09:15:35 +08:00
*msg =
ManagementAPIError::toJsonString(false, markFailed ? "exclude failed" : "exclude", errorString);
return false;
}
worstFreeSpaceRatio = std::min(worstFreeSpaceRatio, double(free_bytes) / total_bytes);
}
}
} catch (...) // std::exception
{
*msg = ManagementAPIError::toJsonString(false, markFailed ? "exclude failed" : "exclude", errorString);
return false;
}
if (ssExcludedCount == ssTotalCount ||
(1 - worstFreeSpaceRatio) * ssTotalCount / (ssTotalCount - ssExcludedCount) > 0.9) {
std::string temp = "ERROR: This exclude may cause the total free space in the cluster to drop below 10%.\n"
2020-08-01 04:31:36 +08:00
"Call set(\"0xff0xff/management/options/exclude/force\", ...) first to exclude without "
"checking free space.\n";
*msg = ManagementAPIError::toJsonString(false, markFailed ? "exclude failed" : "exclude", temp);
return false;
}
return true;
}
void includeServers(ReadYourWritesTransaction* ryw) {
ryw->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
ryw->setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
ryw->setOption(FDBTransactionOptions::RAW_ACCESS);
// includeServers might be used in an emergency transaction, so make sure it is retry-self-conflicting and
// CAUSAL_WRITE_RISKY
ryw->setOption(FDBTransactionOptions::CAUSAL_WRITE_RISKY);
std::string versionKey = deterministicRandom()->randomUniqueID().toString();
// for exluded servers
auto ranges =
ryw->getSpecialKeySpaceWriteMap().containedRanges(SpecialKeySpace::getManagementApiCommandRange("exclude"));
auto iter = ranges.begin();
Transaction& tr = ryw->getTransaction();
while (iter != ranges.end()) {
auto entry = iter->value();
if (entry.first && !entry.second.present()) {
tr.addReadConflictRange(singleKeyRange(excludedServersVersionKey));
tr.set(excludedServersVersionKey, versionKey);
tr.clear(ryw->getDatabase()->specialKeySpace->decode(iter->range()));
}
++iter;
}
// for failed servers
ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(SpecialKeySpace::getManagementApiCommandRange("failed"));
iter = ranges.begin();
while (iter != ranges.end()) {
auto entry = iter->value();
if (entry.first && !entry.second.present()) {
tr.addReadConflictRange(singleKeyRange(failedServersVersionKey));
tr.set(failedServersVersionKey, versionKey);
tr.clear(ryw->getDatabase()->specialKeySpace->decode(iter->range()));
}
++iter;
}
}
2020-07-17 09:15:35 +08:00
ACTOR Future<Optional<std::string>> excludeCommitActor(ReadYourWritesTransaction* ryw, bool failed) {
// parse network addresses
state Optional<std::string> result;
state std::vector<AddressExclusion> addresses;
state std::set<AddressExclusion> exclusions;
if (!parseNetWorkAddrFromKeys(ryw, failed, addresses, exclusions, result))
return result;
2020-07-17 09:15:35 +08:00
// If force option is not set, we need to do safety check
auto force = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandOptionSpecialKey(
failed ? "failed" : "excluded", "force")];
2020-07-17 09:15:35 +08:00
// only do safety check when we have servers to be excluded and the force option key is not set
if (addresses.size() && !(force.first && force.second.present())) {
bool safe = wait(checkExclusion(ryw->getDatabase(), &addresses, &exclusions, failed, &result));
if (!safe)
return result;
}
excludeServers(ryw->getTransaction(), addresses, failed);
includeServers(ryw);
return result;
2020-06-15 13:39:20 +08:00
}
Future<Optional<std::string>> ExcludeServersRangeImpl::commit(ReadYourWritesTransaction* ryw) {
2020-07-17 09:15:35 +08:00
return excludeCommitActor(ryw, false);
}
2020-07-07 06:40:21 +08:00
FailedServersRangeImpl::FailedServersRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> FailedServersRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
ryw->setOption(FDBTransactionOptions::RAW_ACCESS);
return rwModuleWithMappingGetRangeActor(ryw, this, kr);
2020-07-07 06:40:21 +08:00
}
void FailedServersRangeImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
// ignore value
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(ValueRef())));
}
Key FailedServersRangeImpl::decode(const KeyRef& key) const {
return key.removePrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)
.withPrefix(LiteralStringRef("\xff/conf/"));
}
Key FailedServersRangeImpl::encode(const KeyRef& key) const {
return key.removePrefix(LiteralStringRef("\xff/conf/"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin);
}
2020-07-07 06:40:21 +08:00
Future<Optional<std::string>> FailedServersRangeImpl::commit(ReadYourWritesTransaction* ryw) {
2020-07-17 09:15:35 +08:00
return excludeCommitActor(ryw, true);
}
2021-05-04 04:14:16 +08:00
ACTOR Future<RangeResult> ExclusionInProgressActor(ReadYourWritesTransaction* ryw, KeyRef prefix, KeyRangeRef kr) {
state RangeResult result;
state Transaction& tr = ryw->getTransaction();
tr.setOption(FDBTransactionOptions::RAW_ACCESS);
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); // necessary?
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
state std::vector<AddressExclusion> excl = wait((getExcludedServers(&tr)));
state std::set<AddressExclusion> exclusions(excl.begin(), excl.end());
state std::set<NetworkAddress> inProgressExclusion;
// Just getting a consistent read version proves that a set of tlogs satisfying the exclusions has completed
// recovery Check that there aren't any storage servers with addresses violating the exclusions
2021-05-04 04:14:16 +08:00
state RangeResult serverList = wait(tr.getRange(serverListKeys, CLIENT_KNOBS->TOO_MANY));
ASSERT(!serverList.more && serverList.size() < CLIENT_KNOBS->TOO_MANY);
for (auto& s : serverList) {
auto addresses = decodeServerListValue(s.value).getKeyValues.getEndpoint().addresses;
if (addressExcluded(exclusions, addresses.address)) {
inProgressExclusion.insert(addresses.address);
}
if (addresses.secondaryAddress.present() && addressExcluded(exclusions, addresses.secondaryAddress.get())) {
inProgressExclusion.insert(addresses.secondaryAddress.get());
}
}
Optional<Standalone<StringRef>> value = wait(tr.get(logsKey));
ASSERT(value.present());
auto logs = decodeLogsValue(value.get());
for (auto const& log : logs.first) {
if (log.second == NetworkAddress() || addressExcluded(exclusions, log.second)) {
inProgressExclusion.insert(log.second);
}
}
for (auto const& log : logs.second) {
if (log.second == NetworkAddress() || addressExcluded(exclusions, log.second)) {
inProgressExclusion.insert(log.second);
}
}
// sort and remove :tls
std::set<std::string> inProgressAddresses;
for (auto const& address : inProgressExclusion) {
inProgressAddresses.insert(formatIpPort(address.ip, address.port));
}
for (auto const& address : inProgressAddresses) {
Key addrKey = prefix.withSuffix(address);
if (kr.contains(addrKey)) {
result.push_back(result.arena(), KeyValueRef(addrKey, ValueRef()));
result.arena().dependsOn(addrKey.arena());
}
}
return result;
}
ExclusionInProgressRangeImpl::ExclusionInProgressRangeImpl(KeyRangeRef kr) : SpecialKeyRangeAsyncImpl(kr) {}
Future<RangeResult> ExclusionInProgressRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
return ExclusionInProgressActor(ryw, getKeyRange().begin, kr);
2020-07-16 08:30:45 +08:00
}
2021-05-04 04:14:16 +08:00
ACTOR Future<RangeResult> getProcessClassActor(ReadYourWritesTransaction* ryw, KeyRef prefix, KeyRangeRef kr) {
ryw->setOption(FDBTransactionOptions::RAW_ACCESS);
std::vector<ProcessData> _workers = wait(getWorkers(&ryw->getTransaction()));
auto workers = _workers; // strip const
// Note : the sort by string is anti intuition, ex. 1.1.1.1:11 < 1.1.1.1:5
std::sort(workers.begin(), workers.end(), [](const ProcessData& lhs, const ProcessData& rhs) {
return formatIpPort(lhs.address.ip, lhs.address.port) < formatIpPort(rhs.address.ip, rhs.address.port);
});
2021-05-04 04:14:16 +08:00
RangeResult result;
for (auto& w : workers) {
// exclude :tls in keys even the network addresss is TLS
KeyRef k(prefix.withSuffix(formatIpPort(w.address.ip, w.address.port), result.arena()));
if (kr.contains(k)) {
ValueRef v(result.arena(), w.processClass.toString());
result.push_back(result.arena(), KeyValueRef(k, v));
}
}
return rywGetRange(ryw, kr, result);
}
ACTOR Future<Optional<std::string>> processClassCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef range) {
// enable related options
ryw->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
ryw->setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
ryw->setOption(FDBTransactionOptions::RAW_ACCESS);
std::vector<ProcessData> workers = wait(
getWorkers(&ryw->getTransaction())); // make sure we use the Transaction object to avoid used_during_commit()
auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(range);
auto iter = ranges.begin();
while (iter != ranges.end()) {
auto entry = iter->value();
// only loop through (set) operation, (clear) not exist
if (entry.first && entry.second.present()) {
// parse network address
Key address = iter->begin().removePrefix(range.begin);
AddressExclusion addr = AddressExclusion::parse(address);
// parse class type
ValueRef processClassType = entry.second.get();
ProcessClass processClass(processClassType.toString(), ProcessClass::DBSource);
// make sure we use the underlying Transaction object to avoid used_during_commit()
bool foundChange = false;
for (int i = 0; i < workers.size(); i++) {
if (addr.excludes(workers[i].address)) {
if (processClass.classType() != ProcessClass::InvalidClass)
ryw->getTransaction().set(processClassKeyFor(workers[i].locality.processId().get()),
processClassValue(processClass));
else
ryw->getTransaction().clear(processClassKeyFor(workers[i].locality.processId().get()));
foundChange = true;
}
}
if (foundChange)
ryw->getTransaction().set(processClassChangeKey, deterministicRandom()->randomUniqueID().toString());
}
++iter;
}
return Optional<std::string>();
}
ProcessClassRangeImpl::ProcessClassRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> ProcessClassRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
return getProcessClassActor(ryw, getKeyRange().begin, kr);
}
Future<Optional<std::string>> ProcessClassRangeImpl::commit(ReadYourWritesTransaction* ryw) {
// Validate network address and process class type
Optional<std::string> errorMsg;
auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(getKeyRange());
auto iter = ranges.begin();
while (iter != ranges.end()) {
auto entry = iter->value();
// only check for setclass(set) operation, (clear) are forbidden thus not exist
if (entry.first && entry.second.present()) {
// validate network address
Key address = iter->begin().removePrefix(range.begin);
AddressExclusion addr = AddressExclusion::parse(address);
if (!addr.isValid()) {
std::string error = "ERROR: \'" + address.toString() + "\' is not a valid network endpoint address\n";
if (address.toString().find(":tls") != std::string::npos)
error += " Do not include the `:tls' suffix when naming a process\n";
errorMsg = ManagementAPIError::toJsonString(false, "setclass", error);
return errorMsg;
}
// validate class type
ValueRef processClassType = entry.second.get();
ProcessClass processClass(processClassType.toString(), ProcessClass::DBSource);
if (processClass.classType() == ProcessClass::InvalidClass &&
processClassType != LiteralStringRef("default")) {
std::string error = "ERROR: \'" + processClassType.toString() + "\' is not a valid process class\n";
errorMsg = ManagementAPIError::toJsonString(false, "setclass", error);
return errorMsg;
}
}
++iter;
}
return processClassCommitActor(ryw, getKeyRange());
}
void throwSpecialKeyApiFailure(ReadYourWritesTransaction* ryw, std::string command, std::string message) {
auto msg = ManagementAPIError::toJsonString(false, command, message);
ryw->setSpecialKeySpaceErrorMsg(msg);
throw special_keys_api_failure();
}
2020-08-26 05:59:43 +08:00
void ProcessClassRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
return throwSpecialKeyApiFailure(ryw, "setclass", "Clear operation is meaningless thus forbidden for setclass");
}
2020-08-26 05:59:43 +08:00
void ProcessClassRangeImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
return throwSpecialKeyApiFailure(
ryw, "setclass", "Clear range operation is meaningless thus forbidden for setclass");
2020-08-21 05:51:41 +08:00
}
2021-05-04 04:14:16 +08:00
ACTOR Future<RangeResult> getProcessClassSourceActor(ReadYourWritesTransaction* ryw, KeyRef prefix, KeyRangeRef kr) {
ryw->setOption(FDBTransactionOptions::RAW_ACCESS);
std::vector<ProcessData> _workers = wait(getWorkers(&ryw->getTransaction()));
auto workers = _workers; // strip const
// Note : the sort by string is anti intuition, ex. 1.1.1.1:11 < 1.1.1.1:5
std::sort(workers.begin(), workers.end(), [](const ProcessData& lhs, const ProcessData& rhs) {
return formatIpPort(lhs.address.ip, lhs.address.port) < formatIpPort(rhs.address.ip, rhs.address.port);
});
2021-05-04 04:14:16 +08:00
RangeResult result;
for (auto& w : workers) {
// exclude :tls in keys even the network addresss is TLS
Key k(prefix.withSuffix(formatIpPort(w.address.ip, w.address.port)));
if (kr.contains(k)) {
Value v(w.processClass.sourceString());
result.push_back(result.arena(), KeyValueRef(k, v));
result.arena().dependsOn(k.arena());
result.arena().dependsOn(v.arena());
}
}
return result;
}
ProcessClassSourceRangeImpl::ProcessClassSourceRangeImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
Future<RangeResult> ProcessClassSourceRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
return getProcessClassSourceActor(ryw, getKeyRange().begin, kr);
}
2021-05-04 04:14:16 +08:00
ACTOR Future<RangeResult> getLockedKeyActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
Optional<Value> val = wait(ryw->getTransaction().get(databaseLockedKey));
2021-05-04 04:14:16 +08:00
RangeResult result;
if (val.present()) {
UID uid = UID::fromString(BinaryReader::fromStringRef<UID>(val.get().substr(10), Unversioned()).toString());
result.push_back_deep(result.arena(), KeyValueRef(kr.begin, Value(uid.toString())));
}
return result;
}
LockDatabaseImpl::LockDatabaseImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> LockDatabaseImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
2020-10-23 06:36:10 +08:00
// single key range, the queried range should always be the same as the underlying range
ASSERT(kr == getKeyRange());
auto lockEntry = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("lock")];
if (!ryw->readYourWritesDisabled() && lockEntry.first) {
// ryw enabled and we have written to the special key
2021-05-04 04:14:16 +08:00
RangeResult result;
if (lockEntry.second.present()) {
result.push_back_deep(result.arena(), KeyValueRef(kr.begin, lockEntry.second.get()));
}
return result;
} else {
return getLockedKeyActor(ryw, kr);
}
}
ACTOR Future<Optional<std::string>> lockDatabaseCommitActor(ReadYourWritesTransaction* ryw, UID uid) {
state Optional<std::string> msg;
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
Optional<Value> val = wait(ryw->getTransaction().get(databaseLockedKey));
if (val.present() && BinaryReader::fromStringRef<UID>(val.get().substr(10), Unversioned()) != uid) {
// check database not locked
// if locked already, throw error
throw database_locked();
} else if (!val.present()) {
// lock database
ryw->getTransaction().atomicOp(databaseLockedKey,
BinaryWriter::toValue(uid, Unversioned())
.withPrefix(LiteralStringRef("0123456789"))
.withSuffix(LiteralStringRef("\x00\x00\x00\x00")),
MutationRef::SetVersionstampedValue);
ryw->getTransaction().addWriteConflictRange(normalKeys);
}
return msg;
}
ACTOR Future<Optional<std::string>> unlockDatabaseCommitActor(ReadYourWritesTransaction* ryw) {
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
Optional<Value> val = wait(ryw->getTransaction().get(databaseLockedKey));
if (val.present()) {
ryw->getTransaction().clear(singleKeyRange(databaseLockedKey));
}
return Optional<std::string>();
}
Future<Optional<std::string>> LockDatabaseImpl::commit(ReadYourWritesTransaction* ryw) {
auto lockId = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("lock")].second;
if (lockId.present()) {
std::string uidStr = lockId.get().toString();
UID uid;
try {
uid = UID::fromString(uidStr);
} catch (Error& e) {
return Optional<std::string>(
ManagementAPIError::toJsonString(false, "lock", "Invalid UID hex string: " + uidStr));
}
return lockDatabaseCommitActor(ryw, uid);
} else {
return unlockDatabaseCommitActor(ryw);
}
}
2021-05-04 04:14:16 +08:00
ACTOR Future<RangeResult> getConsistencyCheckKeyActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
ryw->getTransaction().setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
Optional<Value> val = wait(ryw->getTransaction().get(fdbShouldConsistencyCheckBeSuspended));
bool ccSuspendSetting = val.present() ? BinaryReader::fromStringRef<bool>(val.get(), Unversioned()) : false;
2021-05-04 04:14:16 +08:00
RangeResult result;
if (ccSuspendSetting) {
result.push_back_deep(result.arena(), KeyValueRef(kr.begin, ValueRef()));
}
return result;
}
ConsistencyCheckImpl::ConsistencyCheckImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> ConsistencyCheckImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
2020-10-23 06:36:10 +08:00
// single key range, the queried range should always be the same as the underlying range
ASSERT(kr == getKeyRange());
auto entry = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("consistencycheck")];
if (!ryw->readYourWritesDisabled() && entry.first) {
// ryw enabled and we have written to the special key
2021-05-04 04:14:16 +08:00
RangeResult result;
if (entry.second.present()) {
result.push_back_deep(result.arena(), KeyValueRef(kr.begin, entry.second.get()));
}
return result;
} else {
return getConsistencyCheckKeyActor(ryw, kr);
}
}
Future<Optional<std::string>> ConsistencyCheckImpl::commit(ReadYourWritesTransaction* ryw) {
auto entry =
ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("consistencycheck")].second;
ryw->getTransaction().setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
ryw->getTransaction().set(fdbShouldConsistencyCheckBeSuspended,
BinaryWriter::toValue(entry.present(), Unversioned()));
return Optional<std::string>();
}
GlobalConfigImpl::GlobalConfigImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
2021-03-20 08:37:01 +08:00
// Returns key-value pairs for each value stored in the global configuration
// framework within the range specified. The special-key-space getrange
// function should only be used for informational purposes. All values are
// returned as strings regardless of their true type.
Future<RangeResult> GlobalConfigImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
2021-05-04 04:14:16 +08:00
RangeResult result;
2021-03-17 08:20:25 +08:00
KeyRangeRef modified =
KeyRangeRef(kr.begin.removePrefix(getKeyRange().begin), kr.end.removePrefix(getKeyRange().begin));
2022-05-07 02:29:17 +08:00
std::map<KeyRef, Reference<ConfigValue>> values = ryw->getDatabase()->globalConfig->get(modified);
2021-02-25 10:29:53 +08:00
for (const auto& [key, config] : values) {
Key prefixedKey = key.withPrefix(getKeyRange().begin);
2021-03-20 04:28:03 +08:00
if (config.isValid() && config->value.has_value()) {
if (config->value.type() == typeid(StringRef)) {
2021-03-17 08:20:25 +08:00
result.push_back_deep(result.arena(),
2021-03-20 04:28:03 +08:00
KeyValueRef(prefixedKey, std::any_cast<StringRef>(config->value).toString()));
} else if (config->value.type() == typeid(int64_t)) {
2021-03-17 08:20:25 +08:00
result.push_back_deep(result.arena(),
2021-03-20 04:28:03 +08:00
KeyValueRef(prefixedKey, std::to_string(std::any_cast<int64_t>(config->value))));
} else if (config->value.type() == typeid(bool)) {
result.push_back_deep(result.arena(),
KeyValueRef(prefixedKey, std::to_string(std::any_cast<bool>(config->value))));
2021-03-20 04:28:03 +08:00
} else if (config->value.type() == typeid(float)) {
2021-03-17 08:20:25 +08:00
result.push_back_deep(result.arena(),
2021-03-20 04:28:03 +08:00
KeyValueRef(prefixedKey, std::to_string(std::any_cast<float>(config->value))));
} else if (config->value.type() == typeid(double)) {
2021-03-17 08:20:25 +08:00
result.push_back_deep(result.arena(),
2021-03-20 04:28:03 +08:00
KeyValueRef(prefixedKey, std::to_string(std::any_cast<double>(config->value))));
} else {
ASSERT(false);
}
}
}
return result;
}
2021-03-20 08:37:01 +08:00
// Marks the key for insertion into global configuration.
void GlobalConfigImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(value)));
}
2021-03-20 08:37:01 +08:00
// Writes global configuration changes to durable memory. Also writes the
// changes made in the transaction to a recent history set, and updates the
// latest version which the global configuration was updated at.
ACTOR Future<Optional<std::string>> globalConfigCommitActor(GlobalConfigImpl* globalConfig,
ReadYourWritesTransaction* ryw) {
state Transaction& tr = ryw->getTransaction();
ryw->setOption(FDBTransactionOptions::RAW_ACCESS);
// History should only contain three most recent updates. If it currently
// has three items, remove the oldest to make room for a new item.
2021-05-04 04:14:16 +08:00
RangeResult history = wait(tr.getRange(globalConfigHistoryKeys, CLIENT_KNOBS->TOO_MANY));
constexpr int kGlobalConfigMaxHistorySize = 3;
if (history.size() > kGlobalConfigMaxHistorySize - 1) {
for (int i = 0; i < history.size() - (kGlobalConfigMaxHistorySize - 1); ++i) {
tr.clear(history[i].key);
}
}
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
Standalone<VectorRef<KeyValueRef>> insertions;
Standalone<VectorRef<KeyRangeRef>> clears;
// Transform writes from the special-key-space (\xff\xff/global_config/) to
// the system key space (\xff/globalConfig/), and writes mutations to
// latest version history.
state RangeMap<Key, std::pair<bool, Optional<Value>>, KeyRangeRef>::Ranges ranges =
ryw->getSpecialKeySpaceWriteMap().containedRanges(specialKeys);
state RangeMap<Key, std::pair<bool, Optional<Value>>, KeyRangeRef>::iterator iter = ranges.begin();
while (iter != ranges.end()) {
std::pair<bool, Optional<Value>> entry = iter->value();
if (entry.first) {
if (entry.second.present() && iter->begin().startsWith(globalConfig->getKeyRange().begin)) {
2021-02-25 03:49:25 +08:00
Key bareKey = iter->begin().removePrefix(globalConfig->getKeyRange().begin);
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
insertions.push_back_deep(insertions.arena(), KeyValueRef(bareKey, entry.second.get()));
} else if (!entry.second.present() && iter->range().begin.startsWith(globalConfig->getKeyRange().begin) &&
iter->range().end.startsWith(globalConfig->getKeyRange().begin)) {
2021-02-25 03:49:25 +08:00
KeyRef bareRangeBegin = iter->range().begin.removePrefix(globalConfig->getKeyRange().begin);
KeyRef bareRangeEnd = iter->range().end.removePrefix(globalConfig->getKeyRange().begin);
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
clears.push_back_deep(clears.arena(), KeyRangeRef(bareRangeBegin, bareRangeEnd));
}
}
++iter;
}
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
GlobalConfig::applyChanges(tr, insertions, clears);
return Optional<std::string>();
}
2021-03-20 08:37:01 +08:00
// Called when a transaction includes keys in the global configuration special-key-space range.
Future<Optional<std::string>> GlobalConfigImpl::commit(ReadYourWritesTransaction* ryw) {
return globalConfigCommitActor(this, ryw);
2020-12-04 06:06:11 +08:00
}
2021-03-20 08:37:01 +08:00
// Marks the range for deletion from global configuration.
void GlobalConfigImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
ryw->getSpecialKeySpaceWriteMap().insert(range, std::make_pair(true, Optional<Value>()));
}
2021-03-20 08:37:01 +08:00
// Marks the key for deletion from global configuration.
void GlobalConfigImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>()));
2020-12-04 06:06:11 +08:00
}
2021-02-20 16:43:54 +08:00
TracingOptionsImpl::TracingOptionsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> TracingOptionsImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
2021-05-04 04:14:16 +08:00
RangeResult result;
for (const auto& option : SpecialKeySpace::getTracingOptions()) {
auto key = getKeyRange().begin.withSuffix(option);
if (!kr.contains(key)) {
continue;
}
if (key.endsWith(kTracingTransactionIdKey)) {
2021-01-30 03:45:52 +08:00
result.push_back_deep(result.arena(),
KeyValueRef(key, ryw->getTransactionState()->spanContext.traceID.toString()));
} else if (key.endsWith(kTracingTokenKey)) {
2021-01-30 03:45:52 +08:00
result.push_back_deep(result.arena(),
KeyValueRef(key, std::to_string(ryw->getTransactionState()->spanContext.spanID)));
}
}
return result;
}
void TracingOptionsImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
2020-12-04 06:06:11 +08:00
if (ryw->getApproximateSize() > 0) {
ryw->setSpecialKeySpaceErrorMsg("tracing options must be set first");
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>()));
return;
2020-12-04 06:06:11 +08:00
}
if (key.endsWith(kTracingTransactionIdKey)) {
ryw->setTransactionID(UID::fromString(value.toString()));
2020-12-04 06:06:11 +08:00
} else if (key.endsWith(kTracingTokenKey)) {
if (value.toString() == "true") {
ryw->setToken(deterministicRandom()->randomUInt64());
} else if (value.toString() == "false") {
ryw->setToken(0);
} else {
ryw->setSpecialKeySpaceErrorMsg("token must be set to true/false");
throw special_keys_api_failure();
}
}
}
Future<Optional<std::string>> TracingOptionsImpl::commit(ReadYourWritesTransaction* ryw) {
if (ryw->getSpecialKeySpaceWriteMap().size() > 0) {
throw special_keys_api_failure();
}
return Optional<std::string>();
}
void TracingOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
2020-12-04 06:06:11 +08:00
ryw->setSpecialKeySpaceErrorMsg("clear range disabled");
throw special_keys_api_failure();
}
void TracingOptionsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
2020-12-04 06:06:11 +08:00
ryw->setSpecialKeySpaceErrorMsg("clear disabled");
throw special_keys_api_failure();
}
2021-01-30 03:45:52 +08:00
CoordinatorsImpl::CoordinatorsImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
2022-04-28 12:54:13 +08:00
ACTOR Future<RangeResult> coordinatorsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRef prefix, KeyRangeRef kr) {
state ClusterConnectionString cs = ryw->getDatabase()->getConnectionRecord()->getConnectionString();
state std::vector<NetworkAddress> coordinator_processes = wait(cs.tryResolveHostnames());
2021-05-04 04:14:16 +08:00
RangeResult result;
2021-01-30 03:45:52 +08:00
Key cluster_decription_key = prefix.withSuffix(LiteralStringRef("cluster_description"));
if (kr.contains(cluster_decription_key)) {
result.push_back_deep(result.arena(), KeyValueRef(cluster_decription_key, cs.clusterKeyName()));
}
// Note : the sort by string is anti intuition, ex. 1.1.1.1:11 < 1.1.1.1:5
// include :tls in keys if the network addresss is TLS
std::sort(coordinator_processes.begin(),
coordinator_processes.end(),
[](const NetworkAddress& lhs, const NetworkAddress& rhs) { return lhs.toString() < rhs.toString(); });
std::string processes_str;
2021-02-19 06:23:51 +08:00
for (const auto& w : coordinator_processes) {
if (processes_str.size())
processes_str += ",";
processes_str += w.toString();
}
Key processes_key = prefix.withSuffix(LiteralStringRef("processes"));
if (kr.contains(processes_key)) {
result.push_back_deep(result.arena(), KeyValueRef(processes_key, Value(processes_str)));
2021-01-30 03:45:52 +08:00
}
return rywGetRange(ryw, kr, result);
}
2022-04-28 12:54:13 +08:00
Future<RangeResult> CoordinatorsImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
KeyRef prefix(getKeyRange().begin);
return coordinatorsGetRangeActor(ryw, prefix, kr);
}
2021-01-30 03:45:52 +08:00
ACTOR static Future<Optional<std::string>> coordinatorsCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
2022-04-28 12:54:13 +08:00
state ClusterConnectionString conn; // We don't care about the Key here.
state std::vector<std::string> process_address_or_hostname_strs;
2021-01-30 03:45:52 +08:00
state Optional<std::string> msg;
state int index;
state bool parse_error = false;
2021-01-30 03:45:52 +08:00
2021-07-18 08:53:38 +08:00
// check update for coordinators
Key processes_key = LiteralStringRef("processes").withPrefix(kr.begin);
auto processes_entry = ryw->getSpecialKeySpaceWriteMap()[processes_key];
if (processes_entry.first) {
ASSERT(processes_entry.second.present()); // no clear should be seen here
auto processesStr = processes_entry.second.get().toString();
boost::split(process_address_or_hostname_strs, processesStr, [](char c) { return c == ','; });
if (!process_address_or_hostname_strs.size()) {
return ManagementAPIError::toJsonString(
false,
"coordinators",
"New coordinators\' processes are empty, please specify new processes\' network addresses with format "
"\"IP:PORT,IP:PORT,...,IP:PORT\" or \"HOSTNAME:PORT,HOSTNAME:PORT,...,HOSTNAME:PORT\"");
}
for (index = 0; index < process_address_or_hostname_strs.size(); index++) {
try {
if (Hostname::isHostname(process_address_or_hostname_strs[index])) {
conn.hostnames.push_back(Hostname::parse(process_address_or_hostname_strs[index]));
} else {
NetworkAddress a = NetworkAddress::parse(process_address_or_hostname_strs[index]);
if (!a.isValid()) {
parse_error = true;
} else {
conn.coords.push_back(a);
}
}
} catch (Error& e) {
2021-02-19 06:23:51 +08:00
TraceEvent(SevDebug, "SpecialKeysNetworkParseError").error(e);
parse_error = true;
}
2021-01-30 03:45:52 +08:00
if (parse_error) {
std::string error = "ERROR: \'" + process_address_or_hostname_strs[index] +
"\' is not a valid network endpoint address\n";
return ManagementAPIError::toJsonString(false, "coordinators", error);
2021-01-30 03:45:52 +08:00
}
}
}
std::string newName;
2021-01-30 03:45:52 +08:00
// check update for cluster_description
Key cluster_decription_key = LiteralStringRef("cluster_description").withPrefix(kr.begin);
auto entry = ryw->getSpecialKeySpaceWriteMap()[cluster_decription_key];
if (entry.first) {
// check valid description [a-zA-Z0-9_]+
if (entry.second.present() && isAlphaNumeric(entry.second.get().toString())) {
// do the name change
newName = entry.second.get().toString();
2021-01-30 03:45:52 +08:00
} else {
// throw the error
2022-04-28 12:54:13 +08:00
return ManagementAPIError::toJsonString(
false, "coordinators", "Cluster description must match [A-Za-z0-9_]+");
2021-01-30 03:45:52 +08:00
}
}
TraceEvent(SevDebug, "SKSChangeCoordinatorsStart")
.detail("NewConnectionString", conn.toString())
2021-01-30 03:45:52 +08:00
.detail("Description", entry.first ? entry.second.get().toString() : "");
Optional<CoordinatorsResult> r = wait(changeQuorumChecker(&ryw->getTransaction(), &conn, newName));
2021-01-30 03:45:52 +08:00
TraceEvent(SevDebug, "SKSChangeCoordinatorsFinish")
2021-02-17 05:06:25 +08:00
.detail("Result", r.present() ? static_cast<int>(r.get()) : -1); // -1 means success
2021-01-30 03:45:52 +08:00
if (r.present()) {
auto res = r.get();
bool retriable = false;
2021-09-14 06:54:36 +08:00
if (res == CoordinatorsResult::COORDINATOR_UNREACHABLE) {
retriable = true;
2021-01-30 03:45:52 +08:00
} else if (res == CoordinatorsResult::SUCCESS) {
TraceEvent(SevError, "SpecialKeysForCoordinators").detail("UnexpectedSuccessfulResult", "");
ASSERT(false);
}
2021-09-14 06:54:36 +08:00
msg = ManagementAPIError::toJsonString(retriable, "coordinators", ManagementAPI::generateErrorMessage(res));
2021-01-30 03:45:52 +08:00
}
return msg;
}
Future<Optional<std::string>> CoordinatorsImpl::commit(ReadYourWritesTransaction* ryw) {
return coordinatorsCommitActor(ryw, getKeyRange());
}
void CoordinatorsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
return throwSpecialKeyApiFailure(ryw, "coordinators", "Clear range is meaningless thus forbidden for coordinators");
}
void CoordinatorsImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
return throwSpecialKeyApiFailure(
ryw, "coordinators", "Clear operation is meaningless thus forbidden for coordinators");
2021-01-30 10:20:09 +08:00
}
CoordinatorsAutoImpl::CoordinatorsAutoImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
2021-05-04 04:14:16 +08:00
ACTOR static Future<RangeResult> CoordinatorsAutoImplActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
state RangeResult res;
state std::string autoCoordinatorsKey;
state Transaction& tr = ryw->getTransaction();
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
tr.setOption(FDBTransactionOptions::RAW_ACCESS);
tr.setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
Optional<Value> currentKey = wait(tr.get(coordinatorsKey));
if (!currentKey.present()) {
ryw->setSpecialKeySpaceErrorMsg(
ManagementAPIError::toJsonString(false, "auto_coordinators", "The coordinator key does not exist"));
throw special_keys_api_failure();
}
state ClusterConnectionString old(currentKey.get().toString());
state CoordinatorsResult result = CoordinatorsResult::SUCCESS;
2022-04-28 12:54:13 +08:00
std::vector<NetworkAddress> oldCoordinators = wait(old.tryResolveHostnames());
std::vector<NetworkAddress> _desiredCoordinators = wait(autoQuorumChange()->getDesiredCoordinators(
&tr,
2022-04-28 12:54:13 +08:00
oldCoordinators,
Reference<ClusterConnectionMemoryRecord>(new ClusterConnectionMemoryRecord(old)),
result));
if (result == CoordinatorsResult::NOT_ENOUGH_MACHINES) {
// we could get not_enough_machines if we happen to see the database while the cluster controller is updating
// the worker list, so make sure it happens twice before returning a failure
ryw->setSpecialKeySpaceErrorMsg(ManagementAPIError::toJsonString(
true,
"auto_coordinators",
"Too few fdbserver machines to provide coordination at the current redundancy level"));
throw special_keys_api_failure();
}
if (result == CoordinatorsResult::SAME_NETWORK_ADDRESSES) {
for (const auto& host : old.hostnames) {
autoCoordinatorsKey += autoCoordinatorsKey.size() ? "," : "";
autoCoordinatorsKey += host.toString();
}
for (const auto& coord : old.coords) {
autoCoordinatorsKey += autoCoordinatorsKey.size() ? "," : "";
autoCoordinatorsKey += coord.toString();
}
} else {
for (const auto& address : _desiredCoordinators) {
autoCoordinatorsKey += autoCoordinatorsKey.size() ? "," : "";
autoCoordinatorsKey += address.toString();
}
}
res.push_back_deep(res.arena(), KeyValueRef(kr.begin, Value(autoCoordinatorsKey)));
return res;
}
Future<RangeResult> CoordinatorsAutoImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
// single key range, the queried range should always be the same as the underlying range
ASSERT(kr == getKeyRange());
return CoordinatorsAutoImplActor(ryw, kr);
}
2021-05-04 04:14:16 +08:00
ACTOR static Future<RangeResult> getMinCommitVersionActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
2021-02-18 17:27:14 +08:00
Optional<Value> val = wait(ryw->getTransaction().get(minRequiredCommitVersionKey));
2021-05-04 04:14:16 +08:00
RangeResult result;
2021-02-18 17:27:14 +08:00
if (val.present()) {
Version minRequiredCommitVersion = BinaryReader::fromStringRef<Version>(val.get(), Unversioned());
ValueRef version(result.arena(), boost::lexical_cast<std::string>(minRequiredCommitVersion));
result.push_back_deep(result.arena(), KeyValueRef(kr.begin, version));
}
return result;
}
AdvanceVersionImpl::AdvanceVersionImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> AdvanceVersionImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
2021-02-18 17:27:14 +08:00
// single key range, the queried range should always be the same as the underlying range
ASSERT(kr == getKeyRange());
auto entry = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("advanceversion")];
if (!ryw->readYourWritesDisabled() && entry.first) {
// ryw enabled and we have written to the special key
2021-05-04 04:14:16 +08:00
RangeResult result;
2021-02-18 17:27:14 +08:00
if (entry.second.present()) {
result.push_back_deep(result.arena(), KeyValueRef(kr.begin, entry.second.get()));
}
return result;
} else {
return getMinCommitVersionActor(ryw, kr);
}
}
ACTOR static Future<Optional<std::string>> advanceVersionCommitActor(ReadYourWritesTransaction* ryw, Version v) {
Optional<Standalone<StringRef>> versionEpochValue = wait(ryw->getTransaction().get(versionEpochKey));
if (versionEpochValue.present()) {
return ManagementAPIError::toJsonString(
false, "advanceversion", "Illegal to modify the version while the version epoch is enabled");
}
// Max version we can set for minRequiredCommitVersionKey,
// making sure the cluster can still be alive for 1000 years after the recovery
static const Version maxAllowedVerion =
std::numeric_limits<int64_t>::max() - 1 - CLIENT_KNOBS->VERSIONS_PER_SECOND * 3600 * 24 * 365 * 1000;
2021-02-18 17:27:14 +08:00
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
TraceEvent(SevDebug, "AdvanceVersion").detail("MaxAllowedVersion", maxAllowedVerion);
if (v > maxAllowedVerion) {
return ManagementAPIError::toJsonString(
false,
"advanceversion",
"The given version is larger than the maximum allowed value(2**63-1-version_per_second*3600*24*365*1000)");
}
2021-02-18 17:27:14 +08:00
Version rv = wait(ryw->getTransaction().getReadVersion());
if (rv <= v) {
ryw->getTransaction().set(minRequiredCommitVersionKey, BinaryWriter::toValue(v + 1, Unversioned()));
} else {
return ManagementAPIError::toJsonString(
false, "advanceversion", "Current read version is larger than the given version");
2021-02-18 17:27:14 +08:00
}
return Optional<std::string>();
}
Future<Optional<std::string>> AdvanceVersionImpl::commit(ReadYourWritesTransaction* ryw) {
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
2021-02-18 17:27:14 +08:00
auto minCommitVersion =
ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("advanceversion")].second;
if (minCommitVersion.present()) {
try {
// Version is int64_t
Version v = boost::lexical_cast<int64_t>(minCommitVersion.get().toString());
return advanceVersionCommitActor(ryw, v);
} catch (boost::bad_lexical_cast& e) {
return Optional<std::string>(ManagementAPIError::toJsonString(
false, "advanceversion", "Invalid version(int64_t) argument: " + minCommitVersion.get().toString()));
}
} else {
ryw->getTransaction().clear(minRequiredCommitVersionKey);
}
return Optional<std::string>();
}
Add fdbcli command to read/write version epoch (#6480) * Initialize cluster version at wall-clock time Previously, new clusters would begin at version 0. After this change, clusters will initialize at a version matching wall-clock time. Instead of using the Unix epoch (or Windows epoch), FDB clusters will use a new epoch, defaulting to January 1, 2010, 01:00:00+00:00. In the future, this base epoch will be modifiable through fdbcli, allowing administrators to advance the cluster version. Basing the version off of time allows different FDB clusters to share data without running into version issues. * Send version epoch to master * Cleanup * Update fdbserver/storageserver.actor.cpp Co-authored-by: A.J. Beamon <aj.beamon@snowflake.com> * Jump directly to expected version if possible * Fix initial version issue on storage servers * Add random recovery offset to start version in simulation * Type fixes * Disable reference time by default Enable on a cluster using the fdbcli command `versionepoch add 0`. * Use correct recoveryTransactionVersion when recovering * Allow version epoch to be adjusted forwards (to decrease the version) * Set version epoch in simulation * Add quiet database check to ensure small version offset * Fix initial version issue on storage servers * Disable reference time by default Enable on a cluster using the fdbcli command `versionepoch add 0`. * Add fdbcli command to read/write version epoch * Cause recovery when version epoch is set * Handle optional version epoch key * Add ability to clear the version epoch This causes version advancement to revert to the old methodology whereas versions attempt to advance by about a million versions per second, instead of trying to match the clock. * Update transaction access * Modify version epoch to use microseconds instead of seconds * Modify fdbcli version target API Move commands from `versionepoch` to `targetversion` top level command. * Add fdbcli tests for * Temporarily disable targetversion cli tests * Fix version epoch fetch issue * Fix Arena issue * Reduce max version jump in simulation to 1,000,000 * Rework fdbcli API It now requires two commands to fully switch a cluster to using the version epoch. First, enable the version epoch with `versionepoch enable` or `versionepoch set <versionepoch>`. At this point, versions will be given out at a faster or slower rate in an attempt to reach the expected version. Then, run `versionepoch commit` to perform a one time jump to the expected version. This is essentially irreversible. * Temporarily disable old targetversion tests * Cleanup * Move version epoch buggify to sequencer This will cause some issues with the QuietDatabase check for the version offset - namely, it won't do anything, since the version epoch is not being written to the txnStateStore in simulation. This will get fixed in the future. Co-authored-by: A.J. Beamon <aj.beamon@snowflake.com>
2022-04-09 03:33:19 +08:00
ACTOR static Future<RangeResult> getVersionEpochActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
Optional<Value> val = wait(ryw->getTransaction().get(versionEpochKey));
RangeResult result;
if (val.present()) {
int64_t versionEpoch = BinaryReader::fromStringRef<int64_t>(val.get(), Unversioned());
ValueRef version(result.arena(), boost::lexical_cast<std::string>(versionEpoch));
result.push_back_deep(result.arena(), KeyValueRef(kr.begin, version));
}
return result;
}
VersionEpochImpl::VersionEpochImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> VersionEpochImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
ASSERT(kr == getKeyRange());
return getVersionEpochActor(ryw, kr);
}
Future<Optional<std::string>> VersionEpochImpl::commit(ReadYourWritesTransaction* ryw) {
auto versionEpoch =
ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandPrefix("versionepoch")].second;
if (versionEpoch.present()) {
int64_t epoch = BinaryReader::fromStringRef<int64_t>(versionEpoch.get(), Unversioned());
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
ryw->getTransaction().set(versionEpochKey, BinaryWriter::toValue(epoch, Unversioned()));
} else {
ryw->getTransaction().clear(versionEpochKey);
}
return Optional<std::string>();
}
ClientProfilingImpl::ClientProfilingImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> ClientProfilingImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
KeyRef prefix = getKeyRange().begin;
RangeResult result = RangeResult();
// client_txn_sample_rate
Key sampleRateKey = LiteralStringRef("client_txn_sample_rate").withPrefix(prefix);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
if (kr.contains(sampleRateKey)) {
auto entry = ryw->getSpecialKeySpaceWriteMap()[sampleRateKey];
if (!ryw->readYourWritesDisabled() && entry.first) {
// clear is forbidden
ASSERT(entry.second.present());
2021-02-26 05:13:34 +08:00
result.push_back_deep(result.arena(), KeyValueRef(sampleRateKey, entry.second.get()));
} else {
std::string sampleRateStr = "default";
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
const double sampleRateDbl = ryw->getDatabase()->globalConfig->get<double>(
fdbClientInfoTxnSampleRate, std::numeric_limits<double>::infinity());
if (!std::isinf(sampleRateDbl)) {
sampleRateStr = std::to_string(sampleRateDbl);
}
result.push_back_deep(result.arena(), KeyValueRef(sampleRateKey, Value(sampleRateStr)));
}
}
// client_txn_size_limit
Key txnSizeLimitKey = LiteralStringRef("client_txn_size_limit").withPrefix(prefix);
if (kr.contains(txnSizeLimitKey)) {
auto entry = ryw->getSpecialKeySpaceWriteMap()[txnSizeLimitKey];
if (!ryw->readYourWritesDisabled() && entry.first) {
// clear is forbidden
ASSERT(entry.second.present());
2021-02-26 05:13:34 +08:00
result.push_back_deep(result.arena(), KeyValueRef(txnSizeLimitKey, entry.second.get()));
} else {
std::string sizeLimitStr = "default";
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
const int64_t sizeLimit = ryw->getDatabase()->globalConfig->get<int64_t>(fdbClientInfoTxnSizeLimit, -1);
if (sizeLimit != -1) {
sizeLimitStr = boost::lexical_cast<std::string>(sizeLimit);
}
result.push_back_deep(result.arena(), KeyValueRef(txnSizeLimitKey, Value(sizeLimitStr)));
}
}
return result;
}
Future<Optional<std::string>> ClientProfilingImpl::commit(ReadYourWritesTransaction* ryw) {
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
Standalone<VectorRef<KeyValueRef>> insertions;
Standalone<VectorRef<KeyRangeRef>> clears;
// client_txn_sample_rate
Key sampleRateKey = LiteralStringRef("client_txn_sample_rate").withPrefix(getKeyRange().begin);
auto rateEntry = ryw->getSpecialKeySpaceWriteMap()[sampleRateKey];
if (rateEntry.first && rateEntry.second.present()) {
std::string sampleRateStr = rateEntry.second.get().toString();
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
if (sampleRateStr == "default") {
clears.push_back_deep(clears.arena(),
KeyRangeRef(fdbClientInfoTxnSampleRate, keyAfter(fdbClientInfoTxnSampleRate)));
} else {
try {
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
double sampleRate = boost::lexical_cast<double>(sampleRateStr);
Tuple rate = Tuple().appendDouble(sampleRate);
insertions.push_back_deep(insertions.arena(), KeyValueRef(fdbClientInfoTxnSampleRate, rate.pack()));
} catch (boost::bad_lexical_cast& e) {
return Optional<std::string>(ManagementAPIError::toJsonString(
false, "profile", "Invalid transaction sample rate(double): " + sampleRateStr));
}
}
}
// client_txn_size_limit
Key txnSizeLimitKey = LiteralStringRef("client_txn_size_limit").withPrefix(getKeyRange().begin);
auto sizeLimitEntry = ryw->getSpecialKeySpaceWriteMap()[txnSizeLimitKey];
if (sizeLimitEntry.first && sizeLimitEntry.second.present()) {
std::string sizeLimitStr = sizeLimitEntry.second.get().toString();
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
if (sizeLimitStr == "default") {
clears.push_back_deep(clears.arena(),
KeyRangeRef(fdbClientInfoTxnSizeLimit, keyAfter(fdbClientInfoTxnSizeLimit)));
} else {
try {
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
int64_t sizeLimit = boost::lexical_cast<int64_t>(sizeLimitStr);
Tuple size = Tuple().append(sizeLimit);
insertions.push_back_deep(insertions.arena(), KeyValueRef(fdbClientInfoTxnSizeLimit, size.pack()));
} catch (boost::bad_lexical_cast& e) {
return Optional<std::string>(ManagementAPIError::toJsonString(
false, "profile", "Invalid transaction size limit(int64_t): " + sizeLimitStr));
}
}
}
Refactor profiling special keys to use GlobalConfig The special keys `\xff\xff/management/profiling/client_txn_sample_rate` and `\xff\xff/management/profiling/client_txn_size_limit` are deprecated in FDB 7.2. However, GlobalConfig was introduced in 7.0, and reading and writing these keys through the special key space was broken in 7.0+. This change modifies the profiling special keys to use GlobalConfig behind the scenes, fixing the broken special keys. The following Python script was used to make sure both GlobalConfig and the profiling special key can be used to read/write/clear profiling data: ``` import fdb import time fdb.api_version(710) @fdb.transactional def set_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to write the key #tr[b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate'] = fdb.tuple.pack((5.0,)) tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = '5.0' @fdb.transactional def clear_sample_rate(tr): tr.options.set_special_key_space_enable_writes() # Alternative way to clear the key #tr.clear(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate') tr[b'\xff\xff/management/profiling/client_txn_sample_rate'] = 'default' @fdb.transactional def get_sample_rate(tr): print(tr.get(b'\xff\xff/global_config/config/fdb_client_info/client_txn_sample_rate')) # Alternative way to read the key #print(tr.get(b'\xff\xff/management/profiling/client_txn_sample_rate')) fdb.options.set_trace_enable() fdb.options.set_trace_format('json') db = fdb.open() get_sample_rate(db) # None (or 'default') set_sample_rate(db) time.sleep(1) # Allow time for global config changes to propagate get_sample_rate(db) # 5.0 clear_sample_rate(db) time.sleep(1) get_sample_rate(db) # None (or 'default') ``` It can be run with `PYTHONPATH=./bindings/python/ python profiling.py`, and reads the `fdb.cluster` file in the current directory. ``` $ PYTHONPATH=./bindings/python/ python sps.py None 5.000000 None ```
2022-04-28 03:45:24 +08:00
GlobalConfig::applyChanges(ryw->getTransaction(), insertions, clears);
return Optional<std::string>();
}
void ClientProfilingImpl::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& range) {
return throwSpecialKeyApiFailure(
ryw, "profile", "Clear range is forbidden for profile client. You can set it to default to disable profiling.");
}
void ClientProfilingImpl::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
return throwSpecialKeyApiFailure(
ryw,
"profile",
"Clear operation is forbidden for profile client. You can set it to default to disable profiling.");
}
ActorLineageImpl::ActorLineageImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
void parse(StringRef& val, int& i) {
i = std::stoi(val.toString());
}
void parse(StringRef& val, double& d) {
d = std::stod(val.toString());
}
void parse(StringRef& val, WaitState& w) {
2021-05-21 02:16:31 +08:00
if (val == LiteralStringRef("disk") || val == LiteralStringRef("Disk")) {
w = WaitState::Disk;
2021-05-21 02:16:31 +08:00
} else if (val == LiteralStringRef("network") || val == LiteralStringRef("Network")) {
w = WaitState::Network;
2021-05-21 02:16:31 +08:00
} else if (val == LiteralStringRef("running") || val == LiteralStringRef("Running")) {
w = WaitState::Running;
} else {
throw std::range_error("failed to parse run state");
}
}
void parse(StringRef& val, time_t& t) {
struct tm tm;
#ifdef _WIN32
std::istringstream s(val.toString());
s.imbue(std::locale(setlocale(LC_TIME, nullptr)));
s >> std::get_time(&tm, "%FT%T%z");
if (s.fail()) {
throw std::invalid_argument("failed to parse ISO 8601 datetime");
}
long timezone;
if (_get_timezone(&timezone) != 0) {
throw std::runtime_error("failed to convert ISO 8601 datetime");
}
timezone = -timezone;
#else
if (strptime(val.toString().c_str(), "%FT%T%z", &tm) == nullptr) {
throw std::invalid_argument("failed to parse ISO 8601 datetime");
}
long timezone = tm.tm_gmtoff;
t = timegm(&tm);
if (t == -1) {
throw std::runtime_error("failed to convert ISO 8601 datetime");
}
t -= timezone;
#endif
}
void parse(StringRef& val, NetworkAddress& a) {
auto address = NetworkAddress::parse(val.toString());
if (!address.isValid()) {
throw std::invalid_argument("invalid host");
}
a = address;
}
// Base case function for parsing function below.
template <typename T>
void parse(std::vector<StringRef>::iterator it, std::vector<StringRef>::iterator end, T& t1) {
if (it == end) {
return;
}
parse(*it, t1);
}
// Given an iterator into a vector of string tokens, an iterator to the end of
// the search space in the vector (exclusive), and a list of references to
// types, parses each token in the vector into the associated type according to
// the order of the arguments.
//
// For example, given the vector ["1", "1.5", "127.0.0.1:4000"] and the
// argument list int a, double b, NetworkAddress c, after this function returns
// each parameter passed in will hold the parsed value from the token list.
//
// The appropriate parsing function must be implemented for the type you wish
// to parse. See the existing parsing functions above, and add your own if
// necessary.
template <typename T, typename... Types>
void parse(std::vector<StringRef>::iterator it, std::vector<StringRef>::iterator end, T& t1, Types&... remaining) {
// Return as soon as all tokens have been parsed. This allows parameters
// passed at the end to act as optional parameters -- they will only be set
// if the value exists.
if (it == end) {
return;
}
try {
parse(*it, t1);
parse(++it, end, remaining...);
} catch (Error& e) {
throw e;
} catch (std::exception& e) {
throw e;
}
}
ACTOR static Future<RangeResult> actorLineageGetRangeActor(ReadYourWritesTransaction* ryw,
KeyRef prefix,
KeyRangeRef kr) {
state RangeResult result;
// Set default values for all fields. The default will be used if the field
// is missing in the key.
state NetworkAddress host;
state WaitState waitStateStart = WaitState{ 0 };
state WaitState waitStateEnd = WaitState{ 2 };
state time_t timeStart = 0;
state time_t timeEnd = std::numeric_limits<time_t>::max();
state int seqStart = 0;
state int seqEnd = std::numeric_limits<int>::max();
state std::vector<StringRef> beginValues = kr.begin.removePrefix(prefix).splitAny("/"_sr);
state std::vector<StringRef> endValues = kr.end.removePrefix(prefix).splitAny("/"_sr);
// Require index (either "state" or "time") and address:port.
if (beginValues.size() < 2 || endValues.size() < 2) {
ryw->setSpecialKeySpaceErrorMsg("missing required parameters (index, host)");
throw special_keys_api_failure();
}
state NetworkAddress endRangeHost;
try {
if (SpecialKeySpace::getActorLineageApiCommandRange("state").contains(kr)) {
// For the range \xff\xff/actor_lineage/state/ip:port/wait-state/time/seq
parse(beginValues.begin() + 1, beginValues.end(), host, waitStateStart, timeStart, seqStart);
if (kr.begin != kr.end) {
parse(endValues.begin() + 1, endValues.end(), endRangeHost, waitStateEnd, timeEnd, seqEnd);
}
} else if (SpecialKeySpace::getActorLineageApiCommandRange("time").contains(kr)) {
// For the range \xff\xff/actor_lineage/time/ip:port/time/wait-state/seq
parse(beginValues.begin() + 1, beginValues.end(), host, timeStart, waitStateStart, seqStart);
if (kr.begin != kr.end) {
parse(endValues.begin() + 1, endValues.end(), endRangeHost, timeEnd, waitStateEnd, seqEnd);
}
} else {
ryw->setSpecialKeySpaceErrorMsg("invalid index in actor_lineage");
throw special_keys_api_failure();
}
} catch (Error& e) {
if (e.code() != special_keys_api_failure().code()) {
ryw->setSpecialKeySpaceErrorMsg("failed to parse key");
throw special_keys_api_failure();
} else {
throw e;
}
}
if (kr.begin != kr.end && host != endRangeHost) {
// The client doesn't know about all the hosts, so a get range covering
// multiple hosts has no way of knowing which IP:port combos to use.
ryw->setSpecialKeySpaceErrorMsg("the host must remain the same on both ends of the range");
throw special_keys_api_failure();
}
// Open endpoint to target process on each call. This can be optimized at
// some point...
state ProcessInterface process;
process.getInterface = RequestStream<GetProcessInterfaceRequest>(Endpoint::wellKnown({ host }, WLTOKEN_PROCESS));
ProcessInterface p = wait(retryBrokenPromise(process.getInterface, GetProcessInterfaceRequest{}));
process = p;
ActorLineageRequest actorLineageRequest;
actorLineageRequest.waitStateStart = waitStateStart;
actorLineageRequest.waitStateEnd = waitStateEnd;
actorLineageRequest.timeStart = timeStart;
actorLineageRequest.timeEnd = timeEnd;
ActorLineageReply reply = wait(process.actorLineage.getReply(actorLineageRequest));
time_t dt = 0;
int seq = -1;
for (const auto& sample : reply.samples) {
2021-05-21 02:16:31 +08:00
time_t datetime = (time_t)sample.time;
char buf[50];
struct tm* tm;
tm = localtime(&datetime);
size_t size = strftime(buf, 50, "%FT%T%z", tm);
std::string date(buf, size);
seq = dt == datetime ? seq + 1 : 0;
dt = datetime;
2021-05-21 02:16:31 +08:00
for (const auto& [waitState, data] : sample.data) {
if (seq < seqStart) {
continue;
} else if (seq >= seqEnd) {
break;
}
std::ostringstream streamKey;
if (SpecialKeySpace::getActorLineageApiCommandRange("state").contains(kr)) {
streamKey << SpecialKeySpace::getActorLineageApiCommandPrefix("state").toString() << host.toString()
<< "/" << to_string(waitState) << "/" << date;
} else if (SpecialKeySpace::getActorLineageApiCommandRange("time").contains(kr)) {
streamKey << SpecialKeySpace::getActorLineageApiCommandPrefix("time").toString() << host.toString()
<< "/" << date << "/" << to_string(waitState);
} else {
ASSERT(false);
}
streamKey << "/" << seq;
msgpack::object_handle oh = msgpack::unpack(data.data(), data.size());
msgpack::object deserialized = oh.get();
std::ostringstream stream;
stream << deserialized;
result.push_back_deep(result.arena(), KeyValueRef(streamKey.str(), stream.str()));
}
2021-05-21 02:16:31 +08:00
if (sample.data.size() == 0) {
std::ostringstream streamKey;
if (SpecialKeySpace::getActorLineageApiCommandRange("state").contains(kr)) {
streamKey << SpecialKeySpace::getActorLineageApiCommandPrefix("state").toString() << host.toString()
<< "/Running/" << date;
} else if (SpecialKeySpace::getActorLineageApiCommandRange("time").contains(kr)) {
streamKey << SpecialKeySpace::getActorLineageApiCommandPrefix("time").toString() << host.toString()
<< "/" << date << "/Running";
} else {
ASSERT(false);
}
streamKey << "/" << seq;
result.push_back_deep(result.arena(), KeyValueRef(streamKey.str(), "{}"_sr));
}
}
return result;
}
Future<RangeResult> ActorLineageImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
return actorLineageGetRangeActor(ryw, getKeyRange().begin, kr);
}
namespace {
std::string_view to_string_view(StringRef sr) {
return std::string_view(reinterpret_cast<const char*>(sr.begin()), sr.size());
}
} // namespace
ActorProfilerConf::ActorProfilerConf(KeyRangeRef kr)
: SpecialKeyRangeRWImpl(kr), config(ProfilerConfig::instance().getConfig()) {}
Future<RangeResult> ActorProfilerConf::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
RangeResult res;
std::string_view begin(to_string_view(kr.begin.removePrefix(range.begin))),
end(to_string_view(kr.end.removePrefix(range.begin)));
for (auto& p : config) {
if (p.first > end) {
break;
} else if (p.first > begin) {
KeyValueRef kv;
2021-05-21 02:16:31 +08:00
kv.key = StringRef(res.arena(), p.first).withPrefix(kr.begin, res.arena());
kv.value = StringRef(res.arena(), p.second);
res.push_back(res.arena(), kv);
}
}
return res;
}
void ActorProfilerConf::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
config[key.removePrefix(range.begin).toString()] = value.toString();
2021-05-21 02:16:31 +08:00
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(value)));
didWrite = true;
}
void ActorProfilerConf::clear(ReadYourWritesTransaction* ryw, const KeyRangeRef& kr) {
std::string begin(kr.begin.removePrefix(range.begin).toString()), end(kr.end.removePrefix(range.begin).toString());
auto first = config.lower_bound(begin);
if (first == config.end()) {
// nothing to clear
return;
}
didWrite = true;
auto last = config.upper_bound(end);
config.erase(first, last);
}
void ActorProfilerConf::clear(ReadYourWritesTransaction* ryw, const KeyRef& key) {
std::string k = key.removePrefix(range.begin).toString();
auto iter = config.find(k);
if (iter != config.end()) {
config.erase(iter);
}
didWrite = true;
}
Future<Optional<std::string>> ActorProfilerConf::commit(ReadYourWritesTransaction* ryw) {
Optional<std::string> res{};
try {
if (didWrite) {
ProfilerConfig::instance().reset(config);
}
return res;
} catch (ConfigError& err) {
return Optional<std::string>{ err.description };
}
}
MaintenanceImpl::MaintenanceImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
2021-04-24 02:13:08 +08:00
// Used to read the healthZoneKey
// If the key is persisted and the delayed read version is still larger than current read version,
// we will calculate the remaining time(truncated to integer, the same as fdbcli) and return back as the value
// If the zoneId is the special one `ignoreSSFailuresZoneString`,
// value will be 0 (same as fdbcli)
2021-05-04 04:14:16 +08:00
ACTOR static Future<RangeResult> MaintenanceGetRangeActor(ReadYourWritesTransaction* ryw,
KeyRef prefix,
KeyRangeRef kr) {
state RangeResult result;
// zoneId
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
Optional<Value> val = wait(ryw->getTransaction().get(healthyZoneKey));
if (val.present()) {
auto healthyZone = decodeHealthyZoneValue(val.get());
if ((healthyZone.first == ignoreSSFailuresZoneString) ||
(healthyZone.second > ryw->getTransaction().getReadVersion().get())) {
Key zone_key = healthyZone.first.withPrefix(prefix);
double seconds = healthyZone.first == ignoreSSFailuresZoneString
? 0
: (healthyZone.second - ryw->getTransaction().getReadVersion().get()) /
CLIENT_KNOBS->CORE_VERSIONSPERSECOND;
if (kr.contains(zone_key)) {
result.push_back_deep(result.arena(),
KeyValueRef(zone_key, Value(boost::lexical_cast<std::string>(seconds))));
}
}
}
return rywGetRange(ryw, kr, result);
}
Future<RangeResult> MaintenanceImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
return MaintenanceGetRangeActor(ryw, getKeyRange().begin, kr);
}
2021-04-24 02:13:08 +08:00
// Commit the change to healthZoneKey
// We do not allow more than one zone to be set in maintenance in one transaction
// In addition, if the zoneId now is 'ignoreSSFailuresZoneString',
// which means the data distribution is disabled for storage failures.
// Only clear this specific key is allowed, any other operations will throw error
ACTOR static Future<Optional<std::string>> maintenanceCommitActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
// read
ryw->getTransaction().setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
ryw->getTransaction().setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
Optional<Value> val = wait(ryw->getTransaction().get(healthyZoneKey));
Optional<std::pair<Key, Version>> healthyZone =
val.present() ? decodeHealthyZoneValue(val.get()) : Optional<std::pair<Key, Version>>();
state RangeMap<Key, std::pair<bool, Optional<Value>>, KeyRangeRef>::Ranges ranges =
ryw->getSpecialKeySpaceWriteMap().containedRanges(kr);
Key zoneId;
double seconds;
bool isSet = false;
// Since maintenance only allows one zone at the same time,
// if a transaction has more than one set operation on different zone keys,
// the commit will throw an error
for (auto iter = ranges.begin(); iter != ranges.end(); ++iter) {
2021-03-27 03:24:45 +08:00
if (!iter->value().first)
continue;
if (iter->value().second.present()) {
if (isSet)
return Optional<std::string>(ManagementAPIError::toJsonString(
false, "maintenance", "Multiple zones given for maintenance, only one allowed at the same time"));
isSet = true;
zoneId = iter->begin().removePrefix(kr.begin);
seconds = boost::lexical_cast<double>(iter->value().second.get().toString());
} else {
// if we already have set operation, then all clear operations will be meaningless, thus skip
if (!isSet && healthyZone.present() && iter.range().contains(healthyZone.get().first.withPrefix(kr.begin)))
ryw->getTransaction().clear(healthyZoneKey);
}
}
if (isSet) {
if (healthyZone.present() && healthyZone.get().first == ignoreSSFailuresZoneString) {
std::string msg = "Maintenance mode cannot be used while data distribution is disabled for storage "
"server failures.";
return Optional<std::string>(ManagementAPIError::toJsonString(false, "maintenance", msg));
} else if (seconds < 0) {
std::string msg =
"The specified maintenance time " + boost::lexical_cast<std::string>(seconds) + " is a negative value";
return Optional<std::string>(ManagementAPIError::toJsonString(false, "maintenance", msg));
} else {
TraceEvent(SevDebug, "SKSMaintenanceSet").detail("ZoneId", zoneId.toString());
ryw->getTransaction().set(healthyZoneKey,
2021-03-27 03:24:45 +08:00
healthyZoneValue(zoneId,
ryw->getTransaction().getReadVersion().get() +
(seconds * CLIENT_KNOBS->CORE_VERSIONSPERSECOND)));
}
}
return Optional<std::string>();
}
Future<Optional<std::string>> MaintenanceImpl::commit(ReadYourWritesTransaction* ryw) {
return maintenanceCommitActor(ryw, getKeyRange());
}
DataDistributionImpl::DataDistributionImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
2021-04-24 02:13:08 +08:00
// Read the system keys dataDistributionModeKey and rebalanceDDIgnoreKey
2021-05-04 04:14:16 +08:00
ACTOR static Future<RangeResult> DataDistributionGetRangeActor(ReadYourWritesTransaction* ryw,
KeyRef prefix,
KeyRangeRef kr) {
state RangeResult result;
// dataDistributionModeKey
state Key modeKey = LiteralStringRef("mode").withPrefix(prefix);
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
if (kr.contains(modeKey)) {
auto entry = ryw->getSpecialKeySpaceWriteMap()[modeKey];
if (ryw->readYourWritesDisabled() || !entry.first) {
Optional<Value> f = wait(ryw->getTransaction().get(dataDistributionModeKey));
int mode = -1;
if (f.present()) {
mode = BinaryReader::fromStringRef<int>(f.get(), Unversioned());
}
result.push_back_deep(result.arena(), KeyValueRef(modeKey, Value(boost::lexical_cast<std::string>(mode))));
}
}
// rebalanceDDIgnoreKey
state Key rebalanceIgnoredKey = LiteralStringRef("rebalance_ignored").withPrefix(prefix);
if (kr.contains(rebalanceIgnoredKey)) {
auto entry = ryw->getSpecialKeySpaceWriteMap()[rebalanceIgnoredKey];
if (ryw->readYourWritesDisabled() || !entry.first) {
Optional<Value> f = wait(ryw->getTransaction().get(rebalanceDDIgnoreKey));
if (f.present()) {
result.push_back_deep(result.arena(), KeyValueRef(rebalanceIgnoredKey, Value()));
}
}
}
return rywGetRange(ryw, kr, result);
}
Future<RangeResult> DataDistributionImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
return DataDistributionGetRangeActor(ryw, getKeyRange().begin, kr);
}
Future<Optional<std::string>> DataDistributionImpl::commit(ReadYourWritesTransaction* ryw) {
// there are two valid keys in the range
// <prefix>/mode -> dataDistributionModeKey, the value is only allowed to be set as "0"(disable) or "1"(enable)
// <prefix>/rebalance_ignored -> rebalanceDDIgnoreKey, value is unused thus empty
ryw->getTransaction().setOption(FDBTransactionOptions::RAW_ACCESS);
Optional<std::string> msg;
KeyRangeRef kr = getKeyRange();
Key modeKey = LiteralStringRef("mode").withPrefix(kr.begin);
Key rebalanceIgnoredKey = LiteralStringRef("rebalance_ignored").withPrefix(kr.begin);
auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(kr);
for (auto iter = ranges.begin(); iter != ranges.end(); ++iter) {
2021-03-27 03:24:45 +08:00
if (!iter->value().first)
continue;
if (iter->value().second.present()) {
if (iter->range() == singleKeyRange(modeKey)) {
try {
int mode = boost::lexical_cast<int>(iter->value().second.get().toString());
Value modeVal = BinaryWriter::toValue(mode, Unversioned());
if (mode == 0 || mode == 1) {
// Whenever configuration changes or DD related system keyspace is changed,
// actor must grab the moveKeysLockOwnerKey and update moveKeysLockWriteKey.
// This prevents concurrent write to the same system keyspace.
// When the owner of the DD related system keyspace changes, DD will reboot
BinaryWriter wrMyOwner(Unversioned());
wrMyOwner << dataDistributionModeLock;
ryw->getTransaction().set(moveKeysLockOwnerKey, wrMyOwner.toValue());
BinaryWriter wrLastWrite(Unversioned());
wrLastWrite << deterministicRandom()->randomUniqueID();
ryw->getTransaction().set(moveKeysLockWriteKey, wrLastWrite.toValue());
// set mode
ryw->getTransaction().set(dataDistributionModeKey, modeVal);
} else
2021-03-27 03:24:45 +08:00
msg = ManagementAPIError::toJsonString(false,
"datadistribution",
"Please set the value of the data_distribution/mode to "
"0(disable) or 1(enable), other values are not allowed");
} catch (boost::bad_lexical_cast& e) {
2021-03-27 03:24:45 +08:00
msg = ManagementAPIError::toJsonString(false,
"datadistribution",
"Invalid datadistribution mode(int): " +
iter->value().second.get().toString());
}
} else if (iter->range() == singleKeyRange(rebalanceIgnoredKey)) {
ValueRef val = iter->value().second.get();
try {
boost::lexical_cast<int>(iter->value().second.get().toString());
} catch (boost::bad_lexical_cast& e) {
2022-03-01 02:39:29 +08:00
ManagementAPIError::toJsonString(
false,
"datadistribution",
"Invalid datadistribution rebalance ignore option (int or empty): " +
iter->value().second.get().toString());
val = ""_sr;
}
ryw->getTransaction().set(rebalanceDDIgnoreKey, iter->value().second.get());
} else {
msg = ManagementAPIError::toJsonString(
2021-03-27 03:24:45 +08:00
false,
"datadistribution",
"Changing invalid keys, please read the documentation to check valid keys in the range");
}
} else {
// clear
if (iter->range().contains(modeKey))
ryw->getTransaction().clear(dataDistributionModeKey);
else if (iter->range().contains(rebalanceIgnoredKey))
ryw->getTransaction().clear(rebalanceDDIgnoreKey);
}
}
return msg;
}
// Clears the special management api keys excludeLocality and failedLocality.
void includeLocalities(ReadYourWritesTransaction* ryw) {
ryw->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
ryw->setOption(FDBTransactionOptions::LOCK_AWARE);
ryw->setOption(FDBTransactionOptions::RAW_ACCESS);
ryw->setOption(FDBTransactionOptions::USE_PROVISIONAL_PROXIES);
// includeLocalities might be used in an emergency transaction, so make sure it is retry-self-conflicting and
// CAUSAL_WRITE_RISKY
ryw->setOption(FDBTransactionOptions::CAUSAL_WRITE_RISKY);
std::string versionKey = deterministicRandom()->randomUniqueID().toString();
// for excluded localities
auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(
SpecialKeySpace::getManagementApiCommandRange("excludedlocality"));
Transaction& tr = ryw->getTransaction();
for (auto& iter : ranges) {
auto entry = iter.value();
if (entry.first && !entry.second.present()) {
tr.addReadConflictRange(singleKeyRange(excludedLocalityVersionKey));
tr.set(excludedLocalityVersionKey, versionKey);
tr.clear(ryw->getDatabase()->specialKeySpace->decode(iter.range()));
}
}
// for failed localities
ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(
SpecialKeySpace::getManagementApiCommandRange("failedlocality"));
for (auto& iter : ranges) {
auto entry = iter.value();
if (entry.first && !entry.second.present()) {
tr.addReadConflictRange(singleKeyRange(failedLocalityVersionKey));
tr.set(failedLocalityVersionKey, versionKey);
tr.clear(ryw->getDatabase()->specialKeySpace->decode(iter.range()));
}
}
}
2022-03-13 21:02:11 +08:00
// Reads the excludedlocality and failed locality keys using management api,
// parses them and returns the list.
bool parseLocalitiesFromKeys(ReadYourWritesTransaction* ryw,
bool failed,
std::unordered_set<std::string>& localities,
std::vector<AddressExclusion>& addresses,
std::set<AddressExclusion>& exclusions,
std::vector<ProcessData>& workers,
Optional<std::string>& msg) {
KeyRangeRef range = failed ? SpecialKeySpace::getManagementApiCommandRange("failedlocality")
: SpecialKeySpace::getManagementApiCommandRange("excludedlocality");
auto ranges = ryw->getSpecialKeySpaceWriteMap().containedRanges(range);
auto iter = ranges.begin();
while (iter != ranges.end()) {
auto entry = iter->value();
// only check for exclude(set) operation, include(clear) are not checked
TraceEvent(SevDebug, "ParseLocalities")
.detail("Valid", entry.first)
.detail("Set", entry.second.present())
.detail("Key", iter->begin().toString());
if (entry.first && entry.second.present()) {
Key locality = iter->begin().removePrefix(range.begin);
2021-06-26 04:05:32 +08:00
if (locality.startsWith(LocalityData::ExcludeLocalityPrefix) &&
locality.toString().find(':') != std::string::npos) {
std::set<AddressExclusion> localityAddresses = getAddressesByLocality(workers, locality.toString());
if (!localityAddresses.empty()) {
std::copy(localityAddresses.begin(), localityAddresses.end(), back_inserter(addresses));
exclusions.insert(localityAddresses.begin(), localityAddresses.end());
}
localities.insert(locality.toString());
} else {
std::string error = "ERROR: \'" + locality.toString() + "\' is not a valid locality\n";
msg = ManagementAPIError::toJsonString(
false, entry.second.present() ? (failed ? "exclude failed" : "exclude") : "include", error);
return false;
}
}
++iter;
}
return true;
}
// On commit, parses the special exclusion keys and get the localities to be excluded, check for exclusions
// and add them to the exclusion list. Also, clears the special management api keys with includeLocalities.
ACTOR Future<Optional<std::string>> excludeLocalityCommitActor(ReadYourWritesTransaction* ryw, bool failed) {
state Optional<std::string> result;
state std::unordered_set<std::string> localities;
state std::vector<AddressExclusion> addresses;
state std::set<AddressExclusion> exclusions;
ryw->setOption(FDBTransactionOptions::RAW_ACCESS);
state std::vector<ProcessData> workers = wait(getWorkers(&ryw->getTransaction()));
if (!parseLocalitiesFromKeys(ryw, failed, localities, addresses, exclusions, workers, result))
return result;
// If force option is not set, we need to do safety check
auto force = ryw->getSpecialKeySpaceWriteMap()[SpecialKeySpace::getManagementApiCommandOptionSpecialKey(
failed ? "failed_locality" : "excluded_locality", "force")];
// only do safety check when we have localities to be excluded and the force option key is not set
if (localities.size() && !(force.first && force.second.present())) {
bool safe = wait(checkExclusion(ryw->getDatabase(), &addresses, &exclusions, failed, &result));
if (!safe)
return result;
}
excludeLocalities(ryw->getTransaction(), localities, failed);
includeLocalities(ryw);
return result;
}
ExcludedLocalitiesRangeImpl::ExcludedLocalitiesRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> ExcludedLocalitiesRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
ryw->setOption(FDBTransactionOptions::RAW_ACCESS);
return rwModuleWithMappingGetRangeActor(ryw, this, kr);
}
void ExcludedLocalitiesRangeImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
// ignore value
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(ValueRef())));
}
Key ExcludedLocalitiesRangeImpl::decode(const KeyRef& key) const {
return key.removePrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)
.withPrefix(LiteralStringRef("\xff/conf/"));
}
Key ExcludedLocalitiesRangeImpl::encode(const KeyRef& key) const {
return key.removePrefix(LiteralStringRef("\xff/conf/"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin);
}
Future<Optional<std::string>> ExcludedLocalitiesRangeImpl::commit(ReadYourWritesTransaction* ryw) {
// exclude locality with failed option as false.
return excludeLocalityCommitActor(ryw, false);
}
FailedLocalitiesRangeImpl::FailedLocalitiesRangeImpl(KeyRangeRef kr) : SpecialKeyRangeRWImpl(kr) {}
Future<RangeResult> FailedLocalitiesRangeImpl::getRange(ReadYourWritesTransaction* ryw,
KeyRangeRef kr,
GetRangeLimits limitsHint) const {
ryw->setOption(FDBTransactionOptions::RAW_ACCESS);
return rwModuleWithMappingGetRangeActor(ryw, this, kr);
}
void FailedLocalitiesRangeImpl::set(ReadYourWritesTransaction* ryw, const KeyRef& key, const ValueRef& value) {
// ignore value
ryw->getSpecialKeySpaceWriteMap().insert(key, std::make_pair(true, Optional<Value>(ValueRef())));
}
Key FailedLocalitiesRangeImpl::decode(const KeyRef& key) const {
return key.removePrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)
.withPrefix(LiteralStringRef("\xff/conf/"));
}
Key FailedLocalitiesRangeImpl::encode(const KeyRef& key) const {
return key.removePrefix(LiteralStringRef("\xff/conf/"))
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin);
}
Future<Optional<std::string>> FailedLocalitiesRangeImpl::commit(ReadYourWritesTransaction* ryw) {
// exclude locality with failed option as true.
return excludeLocalityCommitActor(ryw, true);
}