405 lines
18 KiB
C++
405 lines
18 KiB
C++
/*
|
|
* SpecialKeySpace.actor.cpp
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2020 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "fdbclient/SpecialKeySpace.actor.h"
|
|
#include "flow/UnitTest.h"
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
|
|
|
std::unordered_map<SpecialKeySpace::MODULE, KeyRange> SpecialKeySpace::moduleToBoundary = {
|
|
{ SpecialKeySpace::MODULE::TRANSACTION,
|
|
KeyRangeRef(LiteralStringRef("\xff\xff/transaction/"), LiteralStringRef("\xff\xff/transaction0")) },
|
|
{ SpecialKeySpace::MODULE::WORKERINTERFACE,
|
|
KeyRangeRef(LiteralStringRef("\xff\xff/worker_interfaces/"), LiteralStringRef("\xff\xff/worker_interfaces0")) },
|
|
{ SpecialKeySpace::MODULE::STATUSJSON, singleKeyRange(LiteralStringRef("\xff\xff/status/json")) },
|
|
{ SpecialKeySpace::MODULE::CONNECTIONSTRING, singleKeyRange(LiteralStringRef("\xff\xff/connection_string")) },
|
|
{ SpecialKeySpace::MODULE::CLUSTERFILEPATH, singleKeyRange(LiteralStringRef("\xff\xff/cluster_file_path")) },
|
|
{ SpecialKeySpace::MODULE::METRICS,
|
|
KeyRangeRef(LiteralStringRef("\xff\xff/metrics/"), LiteralStringRef("\xff\xff/metrics0")) }
|
|
};
|
|
|
|
// This function will move the given KeySelector as far as possible to the standard form:
|
|
// orEqual == false && offset == 1 (Standard form)
|
|
// If the corresponding key is not in the underlying key range, it will move over the range
|
|
// The cache object is used to cache the first read result from the rpc call during the key resolution,
|
|
// then when we need to do key resolution or result filtering,
|
|
// we, instead of rpc call, read from this cache object have consistent results
|
|
ACTOR Future<Void> moveKeySelectorOverRangeActor(const SpecialKeyRangeBaseImpl* skrImpl, ReadYourWritesTransaction* ryw,
|
|
KeySelector* ks, Optional<Standalone<RangeResultRef>>* cache) {
|
|
ASSERT(!ks->orEqual); // should be removed before calling
|
|
ASSERT(ks->offset != 1); // never being called if KeySelector is already normalized
|
|
|
|
state Key startKey(skrImpl->getKeyRange().begin);
|
|
state Key endKey(skrImpl->getKeyRange().end);
|
|
state Standalone<RangeResultRef> result;
|
|
|
|
if (ks->offset < 1) {
|
|
// less than the given key
|
|
if (skrImpl->getKeyRange().contains(ks->getKey())) endKey = ks->getKey();
|
|
} else {
|
|
// greater than the given key
|
|
if (skrImpl->getKeyRange().contains(ks->getKey())) startKey = ks->getKey();
|
|
}
|
|
ASSERT(startKey < endKey); // Note : startKey never equals endKey here
|
|
|
|
TraceEvent(SevDebug, "NormalizeKeySelector")
|
|
.detail("OriginalKey", ks->getKey())
|
|
.detail("OriginalOffset", ks->offset)
|
|
.detail("SpecialKeyRangeStart", skrImpl->getKeyRange().begin)
|
|
.detail("SpecialKeyRangeEnd", skrImpl->getKeyRange().end);
|
|
|
|
if (skrImpl->isAsync()) {
|
|
const SpecialKeyRangeAsyncImpl* ptr = dynamic_cast<const SpecialKeyRangeAsyncImpl*>(skrImpl);
|
|
Standalone<RangeResultRef> result_ = wait(ptr->getRange(ryw, KeyRangeRef(startKey, endKey), cache));
|
|
result = result_;
|
|
} else {
|
|
Standalone<RangeResultRef> result_ = wait(skrImpl->getRange(ryw, KeyRangeRef(startKey, endKey)));
|
|
result = result_;
|
|
}
|
|
|
|
if (result.size() == 0) {
|
|
TraceEvent(SevDebug, "ZeroElementsIntheRange").detail("Start", startKey).detail("End", endKey);
|
|
return Void();
|
|
}
|
|
// Note : KeySelector::setKey has byte limit according to the knobs, customize it if needed
|
|
if (ks->offset < 1) {
|
|
if (result.size() >= 1 - ks->offset) {
|
|
ks->setKey(KeyRef(ks->arena(), result[result.size() - (1 - ks->offset)].key));
|
|
ks->offset = 1;
|
|
} else {
|
|
ks->setKey(KeyRef(ks->arena(), result[0].key));
|
|
ks->offset += result.size();
|
|
}
|
|
} else {
|
|
if (result.size() >= ks->offset) {
|
|
ks->setKey(KeyRef(ks->arena(), result[ks->offset - 1].key));
|
|
ks->offset = 1;
|
|
} else {
|
|
ks->setKey(KeyRef(ks->arena(), keyAfter(result[result.size() - 1].key)));
|
|
ks->offset -= result.size();
|
|
}
|
|
}
|
|
TraceEvent(SevDebug, "NormalizeKeySelector")
|
|
.detail("NormalizedKey", ks->getKey())
|
|
.detail("NormalizedOffset", ks->offset)
|
|
.detail("SpecialKeyRangeStart", skrImpl->getKeyRange().begin)
|
|
.detail("SpecialKeyRangeEnd", skrImpl->getKeyRange().end);
|
|
return Void();
|
|
}
|
|
|
|
// This function will normalize the given KeySelector to a standard KeySelector:
|
|
// orEqual == false && offset == 1 (Standard form)
|
|
// If the corresponding key is outside the whole space, it will move to the begin or the end
|
|
// It does have overhead here since we query all keys twice in the worst case.
|
|
// However, moving the KeySelector while handling other parameters like limits makes the code much more complex and hard
|
|
// to maintain; Thus, separate each part to make the code easy to understand and more compact
|
|
// Boundary is the range of the legal key space, which, by default is the range of the module
|
|
// And (\xff\xff, \xff\xff\xff) if SPECIAL_KEY_SPACE_RELAXED is turned on
|
|
ACTOR Future<Void> normalizeKeySelectorActor(SpecialKeySpace* sks, ReadYourWritesTransaction* ryw, KeySelector* ks,
|
|
KeyRangeRef boundary, int* actualOffset,
|
|
Standalone<RangeResultRef>* result,
|
|
Optional<Standalone<RangeResultRef>>* cache) {
|
|
// If offset < 1, where we need to move left, iter points to the range containing at least one smaller key
|
|
// (It's a wasting of time to walk through the range whose begin key is same as ks->key)
|
|
// (rangeContainingKeyBefore itself handles the case where ks->key == Key())
|
|
// Otherwise, we only need to move right if offset > 1, iter points to the range containing the key
|
|
// Since boundary.end is always a key in the RangeMap, it is always safe to move right
|
|
state RangeMap<Key, SpecialKeyRangeBaseImpl*, KeyRangeRef>::Iterator iter =
|
|
ks->offset < 1 ? sks->getImpls().rangeContainingKeyBefore(ks->getKey())
|
|
: sks->getImpls().rangeContaining(ks->getKey());
|
|
while ((ks->offset < 1 && iter->begin() >= boundary.begin) || (ks->offset > 1 && iter->begin() < boundary.end)) {
|
|
if (iter->value() != nullptr) {
|
|
wait(moveKeySelectorOverRangeActor(iter->value(), ryw, ks, cache));
|
|
}
|
|
// Check if we can still move the iterator left
|
|
if (ks->offset < 1) {
|
|
if (iter == sks->getImpls().ranges().begin()) {
|
|
break;
|
|
} else {
|
|
--iter;
|
|
}
|
|
} else if (ks->offset > 1) {
|
|
// Always safe to move right
|
|
++iter;
|
|
}
|
|
}
|
|
*actualOffset = ks->offset;
|
|
|
|
if (!ks->isFirstGreaterOrEqual()) {
|
|
TraceEvent(SevDebug, "ReadToBoundary")
|
|
.detail("TerminateKey", ks->getKey())
|
|
.detail("TerminateOffset", ks->offset);
|
|
// If still not normalized after moving to the boundary,
|
|
// let key selector clamp up to the boundary
|
|
if (ks->offset < 1) {
|
|
result->readToBegin = true;
|
|
ks->setKey(boundary.begin);
|
|
}
|
|
else {
|
|
result->readThroughEnd = true;
|
|
ks->setKey(boundary.end);
|
|
}
|
|
ks->offset = 1;
|
|
}
|
|
return Void();
|
|
}
|
|
|
|
ACTOR Future<Standalone<RangeResultRef>> SpecialKeySpace::checkRYWValid(SpecialKeySpace* sks,
|
|
ReadYourWritesTransaction* ryw,
|
|
KeySelector begin, KeySelector end,
|
|
GetRangeLimits limits, bool reverse) {
|
|
ASSERT(ryw);
|
|
choose {
|
|
when(Standalone<RangeResultRef> result =
|
|
wait(SpecialKeySpace::getRangeAggregationActor(sks, ryw, begin, end, limits, reverse))) {
|
|
return result;
|
|
}
|
|
when(wait(ryw->resetFuture())) { throw internal_error(); }
|
|
}
|
|
}
|
|
|
|
ACTOR Future<Standalone<RangeResultRef>> SpecialKeySpace::getRangeAggregationActor(SpecialKeySpace* sks,
|
|
ReadYourWritesTransaction* ryw,
|
|
KeySelector begin, KeySelector end,
|
|
GetRangeLimits limits,
|
|
bool reverse) {
|
|
// This function handles ranges which cover more than one keyrange and aggregates all results
|
|
// KeySelector, GetRangeLimits and reverse are all handled here
|
|
state Standalone<RangeResultRef> result;
|
|
state Standalone<RangeResultRef> pairs;
|
|
state RangeMap<Key, SpecialKeyRangeBaseImpl*, KeyRangeRef>::Iterator iter;
|
|
state int actualBeginOffset;
|
|
state int actualEndOffset;
|
|
state KeyRangeRef moduleBoundary;
|
|
// used to cache result from potential first read
|
|
state Optional<Standalone<RangeResultRef>> cache;
|
|
|
|
if (ryw->specialKeySpaceRelaxed()) {
|
|
moduleBoundary = sks->range;
|
|
} else {
|
|
auto beginIter = sks->getModules().rangeContaining(begin.getKey());
|
|
if (beginIter->begin() <= end.getKey() && end.getKey() <= beginIter->end()) {
|
|
if (beginIter->value() == SpecialKeySpace::MODULE::UNKNOWN)
|
|
throw special_keys_no_module_found();
|
|
else
|
|
moduleBoundary = beginIter->range();
|
|
} else {
|
|
TraceEvent(SevInfo, "SpecialKeyCrossModuleRead")
|
|
.detail("Begin", begin.toString())
|
|
.detail("End", end.toString())
|
|
.detail("BoundaryBegin", beginIter->begin())
|
|
.detail("BoundaryEnd", beginIter->end());
|
|
throw special_keys_cross_module_read();
|
|
}
|
|
}
|
|
|
|
wait(normalizeKeySelectorActor(sks, ryw, &begin, moduleBoundary, &actualBeginOffset, &result, &cache));
|
|
wait(normalizeKeySelectorActor(sks, ryw, &end, moduleBoundary, &actualEndOffset, &result, &cache));
|
|
// Handle all corner cases like what RYW does
|
|
// return if range inverted
|
|
if (actualBeginOffset >= actualEndOffset && begin.getKey() >= end.getKey()) {
|
|
TEST(true);
|
|
return RangeResultRef(false, false);
|
|
}
|
|
// If touches begin or end, return with readToBegin and readThroughEnd flags
|
|
if (begin.getKey() == moduleBoundary.end || end.getKey() == moduleBoundary.begin) {
|
|
TEST(true);
|
|
return result;
|
|
}
|
|
state RangeMap<Key, SpecialKeyRangeBaseImpl*, KeyRangeRef>::Ranges ranges =
|
|
sks->impls.intersectingRanges(KeyRangeRef(begin.getKey(), end.getKey()));
|
|
// TODO : workaround to write this two together to make the code compact
|
|
// The issue here is boost::iterator_range<> doest not provide rbegin(), rend()
|
|
iter = reverse ? ranges.end() : ranges.begin();
|
|
if (reverse) {
|
|
while (iter != ranges.begin()) {
|
|
--iter;
|
|
if (iter->value() == nullptr) continue;
|
|
KeyRangeRef kr = iter->range();
|
|
KeyRef keyStart = kr.contains(begin.getKey()) ? begin.getKey() : kr.begin;
|
|
KeyRef keyEnd = kr.contains(end.getKey()) ? end.getKey() : kr.end;
|
|
if (iter->value()->isAsync() && cache.present()) {
|
|
const SpecialKeyRangeAsyncImpl* ptr = dynamic_cast<const SpecialKeyRangeAsyncImpl*>(iter->value());
|
|
Standalone<RangeResultRef> pairs_ = wait(ptr->getRange(ryw, KeyRangeRef(keyStart, keyEnd), &cache));
|
|
pairs = pairs_;
|
|
} else {
|
|
Standalone<RangeResultRef> pairs_ = wait(iter->value()->getRange(ryw, KeyRangeRef(keyStart, keyEnd)));
|
|
pairs = pairs_;
|
|
}
|
|
result.arena().dependsOn(pairs.arena());
|
|
// limits handler
|
|
for (int i = pairs.size() - 1; i >= 0; --i) {
|
|
result.push_back(result.arena(), pairs[i]);
|
|
// Note : behavior here is even the last k-v pair makes total bytes larger than specified, it's still
|
|
// returned. In other words, the total size of the returned value (less the last entry) will be less
|
|
// than byteLimit
|
|
limits.decrement(pairs[i]);
|
|
if (limits.isReached()) {
|
|
result.more = true;
|
|
result.readToBegin = false;
|
|
return result;
|
|
};
|
|
}
|
|
}
|
|
} else {
|
|
for (iter = ranges.begin(); iter != ranges.end(); ++iter) {
|
|
if (iter->value() == nullptr) continue;
|
|
KeyRangeRef kr = iter->range();
|
|
KeyRef keyStart = kr.contains(begin.getKey()) ? begin.getKey() : kr.begin;
|
|
KeyRef keyEnd = kr.contains(end.getKey()) ? end.getKey() : kr.end;
|
|
if (iter->value()->isAsync() && cache.present()) {
|
|
const SpecialKeyRangeAsyncImpl* ptr = dynamic_cast<const SpecialKeyRangeAsyncImpl*>(iter->value());
|
|
Standalone<RangeResultRef> pairs_ = wait(ptr->getRange(ryw, KeyRangeRef(keyStart, keyEnd), &cache));
|
|
pairs = pairs_;
|
|
} else {
|
|
Standalone<RangeResultRef> pairs_ = wait(iter->value()->getRange(ryw, KeyRangeRef(keyStart, keyEnd)));
|
|
pairs = pairs_;
|
|
}
|
|
result.arena().dependsOn(pairs.arena());
|
|
// limits handler
|
|
for (int i = 0; i < pairs.size(); ++i) {
|
|
result.push_back(result.arena(), pairs[i]);
|
|
// Note : behavior here is even the last k-v pair makes total bytes larger than specified, it's still
|
|
// returned. In other words, the total size of the returned value (less the last entry) will be less
|
|
// than byteLimit
|
|
limits.decrement(pairs[i]);
|
|
if (limits.isReached()) {
|
|
result.more = true;
|
|
result.readThroughEnd = false;
|
|
return result;
|
|
};
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
Future<Standalone<RangeResultRef>> SpecialKeySpace::getRange(ReadYourWritesTransaction* ryw, KeySelector begin,
|
|
KeySelector end, GetRangeLimits limits, bool reverse) {
|
|
// validate limits here
|
|
if (!limits.isValid()) return range_limits_invalid();
|
|
if (limits.isReached()) {
|
|
TEST(true); // read limit 0
|
|
return Standalone<RangeResultRef>();
|
|
}
|
|
// make sure orEqual == false
|
|
begin.removeOrEqual(begin.arena());
|
|
end.removeOrEqual(end.arena());
|
|
|
|
if (begin.offset >= end.offset && begin.getKey() >= end.getKey()) {
|
|
TEST(true); // range inverted
|
|
return Standalone<RangeResultRef>();
|
|
}
|
|
|
|
return checkRYWValid(this, ryw, begin, end, limits, reverse);
|
|
}
|
|
|
|
ACTOR Future<Optional<Value>> SpecialKeySpace::getActor(SpecialKeySpace* sks, ReadYourWritesTransaction* ryw,
|
|
KeyRef key) {
|
|
// use getRange to workaround this
|
|
Standalone<RangeResultRef> result =
|
|
wait(sks->getRange(ryw, KeySelector(firstGreaterOrEqual(key)), KeySelector(firstGreaterOrEqual(keyAfter(key))),
|
|
GetRangeLimits(CLIENT_KNOBS->TOO_MANY), false));
|
|
ASSERT(result.size() <= 1);
|
|
if (result.size()) {
|
|
return Optional<Value>(result[0].value);
|
|
} else {
|
|
return Optional<Value>();
|
|
}
|
|
}
|
|
|
|
Future<Optional<Value>> SpecialKeySpace::get(ReadYourWritesTransaction* ryw, const Key& key) {
|
|
return getActor(this, ryw, key);
|
|
}
|
|
|
|
ReadConflictRangeImpl::ReadConflictRangeImpl(KeyRangeRef kr) : SpecialKeyRangeBaseImpl(kr) {}
|
|
|
|
ACTOR static Future<Standalone<RangeResultRef>> getReadConflictRangeImpl(ReadYourWritesTransaction* ryw, KeyRange kr) {
|
|
wait(ryw->pendingReads());
|
|
return ryw->getReadConflictRangeIntersecting(kr);
|
|
}
|
|
|
|
Future<Standalone<RangeResultRef>> ReadConflictRangeImpl::getRange(ReadYourWritesTransaction* ryw,
|
|
KeyRangeRef kr) const {
|
|
return getReadConflictRangeImpl(ryw, kr);
|
|
}
|
|
|
|
WriteConflictRangeImpl::WriteConflictRangeImpl(KeyRangeRef kr) : SpecialKeyRangeBaseImpl(kr) {}
|
|
|
|
Future<Standalone<RangeResultRef>> WriteConflictRangeImpl::getRange(ReadYourWritesTransaction* ryw,
|
|
KeyRangeRef kr) const {
|
|
return ryw->getWriteConflictRangeIntersecting(kr);
|
|
}
|
|
|
|
ConflictingKeysImpl::ConflictingKeysImpl(KeyRangeRef kr) : SpecialKeyRangeBaseImpl(kr) {}
|
|
|
|
Future<Standalone<RangeResultRef>> ConflictingKeysImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const {
|
|
Standalone<RangeResultRef> result;
|
|
if (ryw->getTransactionInfo().conflictingKeys) {
|
|
auto krMapPtr = ryw->getTransactionInfo().conflictingKeys.get();
|
|
auto beginIter = krMapPtr->rangeContaining(kr.begin);
|
|
if (beginIter->begin() != kr.begin) ++beginIter;
|
|
auto endIter = krMapPtr->rangeContaining(kr.end);
|
|
for (auto it = beginIter; it != endIter; ++it) {
|
|
result.push_back_deep(result.arena(), KeyValueRef(it->begin(), it->value()));
|
|
}
|
|
if (endIter->begin() != kr.end)
|
|
result.push_back_deep(result.arena(), KeyValueRef(endIter->begin(), endIter->value()));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
ACTOR Future<Standalone<RangeResultRef>> ddMetricsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
|
|
loop {
|
|
try {
|
|
auto keys = kr.removePrefix(ddStatsRange.begin);
|
|
Standalone<VectorRef<DDMetricsRef>> resultWithoutPrefix = wait(
|
|
waitDataDistributionMetricsList(ryw->getDatabase(), keys, CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT));
|
|
Standalone<RangeResultRef> result;
|
|
for (const auto& ddMetricsRef : resultWithoutPrefix) {
|
|
// each begin key is the previous end key, thus we only encode the begin key in the result
|
|
KeyRef beginKey = ddMetricsRef.beginKey.withPrefix(ddStatsRange.begin, result.arena());
|
|
// Use json string encoded in utf-8 to encode the values, easy for adding more fields in the future
|
|
json_spirit::mObject statsObj;
|
|
statsObj["shard_bytes"] = ddMetricsRef.shardBytes;
|
|
std::string statsString =
|
|
json_spirit::write_string(json_spirit::mValue(statsObj), json_spirit::Output_options::raw_utf8);
|
|
ValueRef bytes(result.arena(), statsString);
|
|
result.push_back(result.arena(), KeyValueRef(beginKey, bytes));
|
|
}
|
|
return result;
|
|
} catch (Error& e) {
|
|
state Error err(e);
|
|
if (e.code() == error_code_operation_failed) {
|
|
TraceEvent(SevWarnAlways, "DataDistributorNotPresent")
|
|
.detail("Operation", "DDMetricsReqestThroughSpecialKeys");
|
|
wait(delayJittered(FLOW_KNOBS->PREVENT_FAST_SPIN_DELAY));
|
|
continue;
|
|
}
|
|
throw err;
|
|
}
|
|
}
|
|
}
|
|
|
|
DDStatsRangeImpl::DDStatsRangeImpl(KeyRangeRef kr) : SpecialKeyRangeAsyncImpl(kr) {}
|
|
|
|
Future<Standalone<RangeResultRef>> DDStatsRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const {
|
|
return ddMetricsGetRangeActor(ryw, kr);
|
|
}
|